Add a robots.txt URL to the site root
Adds a robots.txt file to the site root. Defaults defined by 'robotstxt' section of config. New events StartRobotsTxt and EndRobotsTxt to let plugins add information. Probably not useful if path is not /, but won't hurt anything, either.
This commit is contained in:
parent
fec8066bf7
commit
dc62246443
@ -708,3 +708,9 @@ EndUserRegister: When a new user has been registered
|
||||
- &$profile: new profile data
|
||||
- &$user: new user account
|
||||
|
||||
StartRobotsTxt: Before outputting the robots.txt page
|
||||
- &$action: RobotstxtAction being shown
|
||||
|
||||
EndRobotsTxt: After the default robots.txt page (good place for customization)
|
||||
- &$action: RobotstxtAction being shown
|
||||
|
||||
|
14
README
14
README
@ -1496,6 +1496,20 @@ interface. It also makes the user's profile the root URL.
|
||||
enabled: Whether to run in "single user mode". Default false.
|
||||
nickname: nickname of the single user.
|
||||
|
||||
robotstxt
|
||||
---------
|
||||
|
||||
We put out a default robots.txt file to guide the processing of
|
||||
Web crawlers. See http://www.robotstxt.org/ for more information
|
||||
on the format of this file.
|
||||
|
||||
crawldelay: if non-empty, this value is provided as the Crawl-Delay:
|
||||
for the robots.txt file. see http://ur1.ca/l5a0
|
||||
for more information. Default is zero, no explicit delay.
|
||||
disallow: Array of (virtual) directories to disallow. Default is 'main',
|
||||
'search', 'message', 'settings', 'admin'. Ignored when site
|
||||
is private, in which case the entire site ('/') is disallowed.
|
||||
|
||||
Plugins
|
||||
=======
|
||||
|
||||
|
100
actions/robotstxt.php
Normal file
100
actions/robotstxt.php
Normal file
@ -0,0 +1,100 @@
|
||||
<?php
|
||||
/**
|
||||
* StatusNet - the distributed open-source microblogging tool
|
||||
* Copyright (C) 2010, StatusNet, Inc.
|
||||
*
|
||||
* robots.txt generator
|
||||
*
|
||||
* PHP version 5
|
||||
*
|
||||
* @category Action
|
||||
* @package StatusNet
|
||||
* @author Evan Prodromou <evan@status.net>
|
||||
* @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3
|
||||
* @link http://status.net/
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
if (!defined('STATUSNET')) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints out a static robots.txt
|
||||
*
|
||||
* @category Action
|
||||
* @package StatusNet
|
||||
* @author Evan Prodromou <evan@status.net>
|
||||
* @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3
|
||||
* @link http://status.net/
|
||||
*/
|
||||
|
||||
class RobotstxtAction extends Action
|
||||
{
|
||||
/**
|
||||
* Handles requests
|
||||
*
|
||||
* Since this is a relatively static document, we
|
||||
* don't do a prepare()
|
||||
*
|
||||
* @param array $args GET, POST, and URL params; unused.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
|
||||
function handle($args)
|
||||
{
|
||||
if (Event::handle('StartRobotsTxt', array($this))) {
|
||||
|
||||
header('Content-Type: text/plain');
|
||||
|
||||
print "User-Agent: *\n";
|
||||
|
||||
if (common_config('site', 'private')) {
|
||||
|
||||
print "Disallow: /\n";
|
||||
|
||||
} else {
|
||||
|
||||
$disallow = common_config('robotstxt', 'disallow');
|
||||
|
||||
foreach ($disallow as $dir) {
|
||||
print "Disallow: /$dir/\n";
|
||||
}
|
||||
|
||||
$crawldelay = common_config('robotstxt', 'crawldelay');
|
||||
|
||||
if (!empty($crawldelay)) {
|
||||
print "Crawl-delay: " . $crawldelay . "\n";
|
||||
}
|
||||
}
|
||||
|
||||
Event::handle('EndRobotsTxt', array($this));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true; this page doesn't touch the DB.
|
||||
*
|
||||
* @param array $args other arguments
|
||||
*
|
||||
* @return boolean is read only action?
|
||||
*/
|
||||
|
||||
function isReadOnly($args)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
@ -285,8 +285,9 @@ function main()
|
||||
if (!$user && common_config('site', 'private')
|
||||
&& !isLoginAction($action)
|
||||
&& !preg_match('/rss$/', $action)
|
||||
&& !preg_match('/^Api/', $action)
|
||||
) {
|
||||
&& $action != 'robotstxt'
|
||||
&& !preg_match('/^Api/', $action)) {
|
||||
|
||||
// set returnto
|
||||
$rargs =& common_copy_args($args);
|
||||
unset($rargs['action']);
|
||||
|
@ -270,4 +270,8 @@ $default =
|
||||
'singleuser' =>
|
||||
array('enabled' => false,
|
||||
'nickname' => null),
|
||||
'robotstxt' =>
|
||||
array('crawldelay' => 0,
|
||||
'disallow' => array('main', 'settings', 'admin', 'search', 'message')
|
||||
),
|
||||
);
|
||||
|
@ -73,6 +73,8 @@ class Router
|
||||
|
||||
if (Event::handle('StartInitializeRouter', array(&$m))) {
|
||||
|
||||
$m->connect('robots.txt', array('action' => 'robotstxt'));
|
||||
|
||||
$m->connect('opensearch/people', array('action' => 'opensearch',
|
||||
'type' => 'people'));
|
||||
$m->connect('opensearch/notice', array('action' => 'opensearch',
|
||||
|
Loading…
Reference in New Issue
Block a user