From 4973d6a2885790d6e02d6e1e7ef33549293e4ec6 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 22 Mar 2010 00:25:49 -0400 Subject: [PATCH 01/19] user sitemap --- plugins/Sitemap/SitemapPlugin.php | 111 ++++++++++++++++++++++++++++++ plugins/Sitemap/sitemapaction.php | 90 ++++++++++++++++++++++++ plugins/Sitemap/usersitemap.php | 79 +++++++++++++++++++++ 3 files changed, 280 insertions(+) create mode 100644 plugins/Sitemap/SitemapPlugin.php create mode 100644 plugins/Sitemap/sitemapaction.php create mode 100644 plugins/Sitemap/usersitemap.php diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php new file mode 100644 index 0000000000..42ea1dbe62 --- /dev/null +++ b/plugins/Sitemap/SitemapPlugin.php @@ -0,0 +1,111 @@ +. + * + * @category Sample + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + // This check helps protect against security problems; + // your code file can't be executed directly from the web. + exit(1); +} + +/** + * Sitemap plugin + * + * @category Sample + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +class SitemapPlugin extends Plugin +{ + /** + * Load related modules when needed + * + * Most non-trivial plugins will require extra modules to do their work. Typically + * these include data classes, action classes, widget classes, or external libraries. + * + * This method receives a class name and loads the PHP file related to that class. By + * tradition, action classes typically have files named for the action, all lower-case. + * Data classes are in files with the data class name, initial letter capitalized. + * + * Note that this method will be called for *all* overloaded classes, not just ones + * in this plugin! So, make sure to return true by default to let other plugins, and + * the core code, get a chance. + * + * @param string $cls Name of the class to be loaded + * + * @return boolean hook value; true means continue processing, false means stop. + */ + + function onAutoload($cls) + { + $dir = dirname(__FILE__); + + switch ($cls) + { + case 'SitemapindexAction': + case 'NoticesitemapAction': + case 'UsersitemapAction': + require_once $dir . '/' . strtolower(mb_substr($cls, 0, -6)) . '.php'; + return false; + case 'SitemapAction': + require_once $dir . '/' . strtolower($cls) . '.php'; + default: + return true; + } + } + + /** + * Map URLs to actions + * + * @param Net_URL_Mapper $m path-to-action mapper + * + * @return boolean hook value; true means continue processing, false means stop. + */ + + function onRouterInitialized($m) + { + $m->connect('sitemapindex.xml', + array('action' => 'sitemapindex')); + $m->connect('/sitemaps/notice/:year/:month/:day.xml', + array('action' => 'noticesitemap'), + array('year' => '[0-9]{4}', + 'month' => '[1]?[0-9]', + 'day' => '[123]?[0-9]')); + $m->connect('/sitemaps/user/:index.xml', + array('action' => 'usersitemap'), + array('index' => '[0-9]+', + 'month' => '[1]?[0-9]', + 'day' => '[123]?[0-9]')); + return true; + } +} diff --git a/plugins/Sitemap/sitemapaction.php b/plugins/Sitemap/sitemapaction.php new file mode 100644 index 0000000000..ab80b85eaa --- /dev/null +++ b/plugins/Sitemap/sitemapaction.php @@ -0,0 +1,90 @@ +. + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * superclass for sitemap actions + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class SitemapAction extends Action +{ + /** + * handle the action + * + * @param array $args unused. + * + * @return void + */ + + function handle($args) + { + header('Content-Type: text/xml; charset=UTF-8'); + $this->startXML(); + + $this->elementStart('sitemap'); + + while (list($url, $lm, $cf, $p) = $this->nextUrl()) { + $this->showUrl($url, $lm, $cf, $p); + } + + $this->elementEnd('sitemap'); + + $this->endXML(); + } + + function showUrl($url, $lastMod=null, $changeFreq=null, $priority=null) + { + $this->elementStart('url'); + $this->element('loc', null, $url); + if (!is_null($lastMod)) { + $this->element('lastmod', null, $lastMod); + } + if (!is_null($changeFreq)) { + $this->element('changefreq', null, $changeFreq); + } + if (!is_null($priority)) { + $this->element('priority', null, $priority); + } + $this->elementEnd('url'); + } + + function nextUrl() + { + return null; + } +} diff --git a/plugins/Sitemap/usersitemap.php b/plugins/Sitemap/usersitemap.php new file mode 100644 index 0000000000..582a13b664 --- /dev/null +++ b/plugins/Sitemap/usersitemap.php @@ -0,0 +1,79 @@ +. + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * sitemap for users + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class UsersitemapAction extends SitemapAction +{ + const USERS_PER_MAP = 25000; + + var $user = null; + + function prepare($args) + { + parent::prepare($args); + + $i = $this->trimmed('index'); + + $i += 0; + + $offset = ($i-1) * self::USERS_PER_MAP; + $limit = self::USERS_PER_MAP; + + $this->user = new User(); + + $this->user->orderBy('id'); + $this->user->limit($offset, $limit); + + $this->user->find(); + + return true; + } + + function nextUrl() + { + if ($this->user->fetch()) { + return array(common_profile_url($this->user->nickname), null, null, null); + } else { + return null; + } + } +} From 3a9fdb7647d9439da0c12762c4b255d507995713 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 22 Mar 2010 08:09:15 -0400 Subject: [PATCH 02/19] Add a Notice sitemap --- plugins/Sitemap/SitemapPlugin.php | 6 +- plugins/Sitemap/noticesitemap.php | 94 +++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 plugins/Sitemap/noticesitemap.php diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 42ea1dbe62..8889c89306 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -96,11 +96,13 @@ class SitemapPlugin extends Plugin { $m->connect('sitemapindex.xml', array('action' => 'sitemapindex')); - $m->connect('/sitemaps/notice/:year/:month/:day.xml', + $m->connect('/sitemaps/notice/:year/:month/:day/:index.xml', array('action' => 'noticesitemap'), array('year' => '[0-9]{4}', 'month' => '[1]?[0-9]', - 'day' => '[123]?[0-9]')); + 'day' => '[123]?[0-9]', + 'index' => '[0-9]+')); + $m->connect('/sitemaps/user/:index.xml', array('action' => 'usersitemap'), array('index' => '[0-9]+', diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php new file mode 100644 index 0000000000..7eec886363 --- /dev/null +++ b/plugins/Sitemap/noticesitemap.php @@ -0,0 +1,94 @@ +. + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * sitemap for users + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class NoticesitemapAction extends SitemapAction +{ + const NOTICES_PER_MAP = 25000; + + var $notice = null; + + function prepare($args) + { + parent::prepare($args); + + $y = $this->trimmed('year'); + + $m = $this->trimmed('month'); + $d = $this->trimmed('day'); + + $i = $this->trimmed('index'); + + $y += 0; + $m += 0; + $d += 0; + $i += 0; + + $offset = ($i-1) * self::NOTICES_PER_MAP; + $limit = self::NOTICES_PER_MAP; + + $this->notice = new Notice(); + + $this->notice->whereAdd("created > '$y-$m-$d 00:00:00'"); + $this->notice->whereAdd("created <= '$y-$m-$d 11:59:59'"); + $this->notice->whereAdd('is_local = 1'); + + $this->notice->orderBy('id'); + $this->notice->limit($offset, $limit); + + $this->notice->find(); + + return true; + } + + function nextUrl() + { + if ($this->notice->fetch()) { + return array(common_local_url('shownotice', array('notice' => $this->notice->id)), + common_date_w3dtf($this->notice->created), + null, + null); + } else { + return null; + } + } +} From cf7dd2a6afbc5477dd29352e9a6a6de735540c11 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 25 Mar 2010 23:56:09 -0400 Subject: [PATCH 03/19] better query for notices by date --- plugins/Sitemap/SitemapPlugin.php | 17 +++-------------- plugins/Sitemap/noticesitemap.php | 9 ++++++--- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 8889c89306..bb404cd25e 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -50,17 +50,6 @@ class SitemapPlugin extends Plugin /** * Load related modules when needed * - * Most non-trivial plugins will require extra modules to do their work. Typically - * these include data classes, action classes, widget classes, or external libraries. - * - * This method receives a class name and loads the PHP file related to that class. By - * tradition, action classes typically have files named for the action, all lower-case. - * Data classes are in files with the data class name, initial letter capitalized. - * - * Note that this method will be called for *all* overloaded classes, not just ones - * in this plugin! So, make sure to return true by default to let other plugins, and - * the core code, get a chance. - * * @param string $cls Name of the class to be loaded * * @return boolean hook value; true means continue processing, false means stop. @@ -99,9 +88,9 @@ class SitemapPlugin extends Plugin $m->connect('/sitemaps/notice/:year/:month/:day/:index.xml', array('action' => 'noticesitemap'), array('year' => '[0-9]{4}', - 'month' => '[1]?[0-9]', - 'day' => '[123]?[0-9]', - 'index' => '[0-9]+')); + 'month' => '[01][0-9]', + 'day' => '[0123][0-9]', + 'index' => '[1-9][0-9]*')); $m->connect('/sitemaps/user/:index.xml', array('action' => 'usersitemap'), diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php index 7eec886363..0024084863 100644 --- a/plugins/Sitemap/noticesitemap.php +++ b/plugins/Sitemap/noticesitemap.php @@ -68,9 +68,12 @@ class NoticesitemapAction extends SitemapAction $this->notice = new Notice(); - $this->notice->whereAdd("created > '$y-$m-$d 00:00:00'"); - $this->notice->whereAdd("created <= '$y-$m-$d 11:59:59'"); - $this->notice->whereAdd('is_local = 1'); + $dt = sprintf('%04d-%02d-%02d', $y, $m, $d); + + $this->notice->whereAdd("created > '$dt 00:00:00'"); + $this->notice->whereAdd("created <= '$dt 23:59:59'"); + + $this->notice->whereAdd('is_local != 0'); $this->notice->orderBy('id'); $this->notice->limit($offset, $limit); From e7e50926416f5617bcb94928a2d27a9de8b2f231 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 10 Apr 2010 10:03:37 -0400 Subject: [PATCH 04/19] correct element name and namespace for sitemapactions --- plugins/Sitemap/sitemapaction.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/Sitemap/sitemapaction.php b/plugins/Sitemap/sitemapaction.php index ab80b85eaa..bab04ed9d2 100644 --- a/plugins/Sitemap/sitemapaction.php +++ b/plugins/Sitemap/sitemapaction.php @@ -56,13 +56,13 @@ class SitemapAction extends Action header('Content-Type: text/xml; charset=UTF-8'); $this->startXML(); - $this->elementStart('sitemap'); + $this->elementStart('urlset', array('xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9')); while (list($url, $lm, $cf, $p) = $this->nextUrl()) { $this->showUrl($url, $lm, $cf, $p); } - $this->elementEnd('sitemap'); + $this->elementEnd('urlset'); $this->endXML(); } From 8e2766957bf0f6f023385bfa6783d703b3d9a28e Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 10 Apr 2010 10:03:57 -0400 Subject: [PATCH 05/19] move USERS_PER_MAP to plugin --- plugins/Sitemap/SitemapPlugin.php | 3 +++ plugins/Sitemap/usersitemap.php | 6 ++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index bb404cd25e..40263aaeef 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -47,6 +47,8 @@ if (!defined('STATUSNET')) { class SitemapPlugin extends Plugin { + const USERS_PER_MAP = 25000; + /** * Load related modules when needed * @@ -85,6 +87,7 @@ class SitemapPlugin extends Plugin { $m->connect('sitemapindex.xml', array('action' => 'sitemapindex')); + $m->connect('/sitemaps/notice/:year/:month/:day/:index.xml', array('action' => 'noticesitemap'), array('year' => '[0-9]{4}', diff --git a/plugins/Sitemap/usersitemap.php b/plugins/Sitemap/usersitemap.php index 582a13b664..b7cc939a9f 100644 --- a/plugins/Sitemap/usersitemap.php +++ b/plugins/Sitemap/usersitemap.php @@ -43,8 +43,6 @@ if (!defined('STATUSNET')) { class UsersitemapAction extends SitemapAction { - const USERS_PER_MAP = 25000; - var $user = null; function prepare($args) @@ -55,8 +53,8 @@ class UsersitemapAction extends SitemapAction $i += 0; - $offset = ($i-1) * self::USERS_PER_MAP; - $limit = self::USERS_PER_MAP; + $offset = ($i-1) * SitemapPlugin::USERS_PER_MAP; + $limit = SitemapPlugin::USERS_PER_MAP; $this->user = new User(); From 8957d2bdea569594593c55b7d84f05e2998c0633 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 10 Apr 2010 10:21:19 -0400 Subject: [PATCH 06/19] change URLs for user sitemap --- plugins/Sitemap/SitemapPlugin.php | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 40263aaeef..5b2af48795 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -95,11 +95,12 @@ class SitemapPlugin extends Plugin 'day' => '[0123][0-9]', 'index' => '[1-9][0-9]*')); - $m->connect('/sitemaps/user/:index.xml', + $m->connect('/sitemaps/user/:year/:month/:day/:index.xml', array('action' => 'usersitemap'), - array('index' => '[0-9]+', - 'month' => '[1]?[0-9]', - 'day' => '[123]?[0-9]')); + array('year' => '[0-9]{4}', + 'month' => '[01][0-9]', + 'day' => '[0123][0-9]', + 'index' => '[1-9][0-9]*')); return true; } } From 816138a6f11ecf1ec44c261d660f8b2aafe49b21 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 10 Apr 2010 10:24:58 -0400 Subject: [PATCH 07/19] Start of an action for sitemap index --- plugins/Sitemap/sitemapindex.php | 75 ++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 plugins/Sitemap/sitemapindex.php diff --git a/plugins/Sitemap/sitemapindex.php b/plugins/Sitemap/sitemapindex.php new file mode 100644 index 0000000000..09aebe0d8f --- /dev/null +++ b/plugins/Sitemap/sitemapindex.php @@ -0,0 +1,75 @@ +. + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * Show the sitemap index + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class SitemapAction extends Action +{ + /** + * handle the action + * + * @param array $args unused. + * + * @return void + */ + + function handle($args) + { + header('Content-Type: text/xml; charset=UTF-8'); + $this->startXML(); + + $this->elementStart('sitemapindex', array('xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9')); + + $this->showUserSitemaps(); + $this->showNoticeSitemaps(); + + $this->elementEnd('sitemapindex'); + + $this->endXML(); + } + + function showUserSitemaps() + { + $user = new User(); + $cnt = $user->count(); + + } +} From 1c40e7c139af98e4fe9c73093da4183ad8d9e234 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 10:11:18 -0400 Subject: [PATCH 08/19] better calculation for end date in notice sitemaps --- plugins/Sitemap/noticesitemap.php | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php index 0024084863..12a22dbb22 100644 --- a/plugins/Sitemap/noticesitemap.php +++ b/plugins/Sitemap/noticesitemap.php @@ -68,14 +68,21 @@ class NoticesitemapAction extends SitemapAction $this->notice = new Notice(); - $dt = sprintf('%04d-%02d-%02d', $y, $m, $d); + $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); - $this->notice->whereAdd("created > '$dt 00:00:00'"); - $this->notice->whereAdd("created <= '$dt 23:59:59'"); + // XXX: estimates 1d == 24h, which screws up days + // with leap seconds (1d == 24h + 1s). Thankfully they're + // few and far between. + + $enddt = common_sql_date(strtotime($begindt) + (24 * 60 * 60)); + + $this->notice->whereAdd("created >= '$begindt'"); + $this->notice->whereAdd("created < '$enddt'"); $this->notice->whereAdd('is_local != 0'); - $this->notice->orderBy('id'); + $this->notice->orderBy('created'); + $this->notice->limit($offset, $limit); $this->notice->find(); From a4f0dfd3a134ddfa0e16b0a7ae3d205680eda4cf Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 10:11:45 -0400 Subject: [PATCH 09/19] bundle users by reg date --- plugins/Sitemap/usersitemap.php | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/plugins/Sitemap/usersitemap.php b/plugins/Sitemap/usersitemap.php index b7cc939a9f..42cadaca7d 100644 --- a/plugins/Sitemap/usersitemap.php +++ b/plugins/Sitemap/usersitemap.php @@ -49,8 +49,16 @@ class UsersitemapAction extends SitemapAction { parent::prepare($args); + $y = $this->trimmed('year'); + + $m = $this->trimmed('month'); + $d = $this->trimmed('day'); + $i = $this->trimmed('index'); + $y += 0; + $m += 0; + $d += 0; $i += 0; $offset = ($i-1) * SitemapPlugin::USERS_PER_MAP; @@ -58,7 +66,19 @@ class UsersitemapAction extends SitemapAction $this->user = new User(); - $this->user->orderBy('id'); + $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); + + // XXX: estimates 1d == 24h, which screws up days + // with leap seconds (1d == 24h + 1s). Thankfully they're + // few and far between. + + $enddt = common_sql_date(strtotime($begindt) + (24 * 60 * 60)); + + $this->user->whereAdd("created >= '$begindt'"); + $this->user->whereAdd("created < '$enddt'"); + + $this->user->orderBy('created'); + $this->user->limit($offset, $limit); $this->user->find(); From 9970645aa271ad85d19c77d362678b964070d5ed Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 10:23:32 -0400 Subject: [PATCH 10/19] Move NOTICES_PER_MAP to SitemapPlugin --- plugins/Sitemap/SitemapPlugin.php | 3 ++- plugins/Sitemap/noticesitemap.php | 6 ++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 5b2af48795..fa9c9a76d0 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -47,7 +47,8 @@ if (!defined('STATUSNET')) { class SitemapPlugin extends Plugin { - const USERS_PER_MAP = 25000; + const USERS_PER_MAP = 25000; + const NOTICES_PER_MAP = 25000; /** * Load related modules when needed diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php index 12a22dbb22..c8db24efee 100644 --- a/plugins/Sitemap/noticesitemap.php +++ b/plugins/Sitemap/noticesitemap.php @@ -43,8 +43,6 @@ if (!defined('STATUSNET')) { class NoticesitemapAction extends SitemapAction { - const NOTICES_PER_MAP = 25000; - var $notice = null; function prepare($args) @@ -63,8 +61,8 @@ class NoticesitemapAction extends SitemapAction $d += 0; $i += 0; - $offset = ($i-1) * self::NOTICES_PER_MAP; - $limit = self::NOTICES_PER_MAP; + $offset = ($i-1) * SitemapPlugin::NOTICES_PER_MAP; + $limit = SitemapPlugin::NOTICES_PER_MAP; $this->notice = new Notice(); From a18115bec934fb4c27040f02bb3aec8b28083297 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 11:04:56 -0400 Subject: [PATCH 11/19] show sitemapindex with user and notice sitemaps --- plugins/Sitemap/sitemapindex.php | 95 +++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 3 deletions(-) diff --git a/plugins/Sitemap/sitemapindex.php b/plugins/Sitemap/sitemapindex.php index 09aebe0d8f..7942bc3bd0 100644 --- a/plugins/Sitemap/sitemapindex.php +++ b/plugins/Sitemap/sitemapindex.php @@ -41,7 +41,7 @@ if (!defined('STATUSNET')) { * @link http://status.net/ */ -class SitemapAction extends Action +class SitemapindexAction extends Action { /** * handle the action @@ -68,8 +68,97 @@ class SitemapAction extends Action function showUserSitemaps() { - $user = new User(); - $cnt = $user->count(); + $userCounts = $this->getUserCounts(); + foreach ($userCounts as $dt => $cnt) { + $cnt = $cnt+0; + assert($cnt != 0); + $n = (int)$cnt / (int)SitemapPlugin::USERS_PER_MAP; + if (($cnt % SitemapPlugin::USERS_PER_MAP) != 0) { + $n++; + } + for ($i = 1; $i <= $n; $i++) { + $this->showSitemap('user', $dt, $i); + } + } + } + + function showNoticeSitemaps() + { + $noticeCounts = $this->getNoticeCounts(); + + foreach ($noticeCounts as $dt => $cnt) { + assert($cnt != 0); + $n = $cnt / SitemapPlugin::NOTICES_PER_MAP; + if ($cnt % SitemapPlugin::NOTICES_PER_MAP) { + $n++; + } + for ($i = 1; $i <= $n; $i++) { + $this->showSitemap('notice', $dt, $i); + } + } + } + + function getUserCounts() + { + // XXX: cachemeplease + + $user = new User(); + + $user->selectAdd(); + $user->selectAdd('date(created) as regdate, count(*) as regcount'); + $user->groupBy('regdate'); + + $user->find(); + + $userCounts = array(); + + while ($user->fetch()) { + $userCounts[$user->regdate] = $user->regcount; + } + + return $userCounts; + } + + function getNoticeCounts() + { + // XXX: cachemeplease + + $notice = new Notice(); + + $notice->selectAdd(); + $notice->selectAdd('date(created) as postdate, count(*) as postcount'); + $notice->groupBy('postdate'); + + $notice->find(); + + $noticeCounts = array(); + + while ($notice->fetch()) { + $noticeCounts[$notice->postdate] = $notice->postcount; + } + + return $noticeCounts; + } + + function showSitemap($prefix, $dt, $i) + { + list($y, $m, $d) = explode('-', $dt); + + $this->elementStart('sitemap'); + $this->element('loc', null, common_local_url($prefix.'sitemap', + array('year' => $y, + 'month' => $m, + 'day' => $d, + 'index' => $i))); + + $begdate = strtotime("$y-$m-$d 00:00:00"); + $enddate = $begdate + (24 * 60 * 60); + + if ($enddate < time()) { + $this->element('lastmod', null, date(DATE_W3C, $enddate)); + } + + $this->elementEnd('sitemap'); } } From b919f837971c583118e6139e1bea84fd1a51d3a0 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 11:05:19 -0400 Subject: [PATCH 12/19] max users, notices per sitemap = 50K --- plugins/Sitemap/SitemapPlugin.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index fa9c9a76d0..29c32a6242 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -47,8 +47,8 @@ if (!defined('STATUSNET')) { class SitemapPlugin extends Plugin { - const USERS_PER_MAP = 25000; - const NOTICES_PER_MAP = 25000; + const USERS_PER_MAP = 50000; + const NOTICES_PER_MAP = 50000; /** * Load related modules when needed From 9d69906d6b74000bd455b4c14eccf00f8a5d2549 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 11:28:41 -0400 Subject: [PATCH 13/19] use an array for notice sitemap --- plugins/Sitemap/noticesitemap.php | 58 +++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php index c8db24efee..6cf2b3d01f 100644 --- a/plugins/Sitemap/noticesitemap.php +++ b/plugins/Sitemap/noticesitemap.php @@ -43,7 +43,8 @@ if (!defined('STATUSNET')) { class NoticesitemapAction extends SitemapAction { - var $notice = null; + var $notices = null; + var $j = 0; function prepare($args) { @@ -61,10 +62,32 @@ class NoticesitemapAction extends SitemapAction $d += 0; $i += 0; + $this->notices = $this->getNotices($y, $m, $d, $i); + $this->j = 0; + + return true; + } + + function nextUrl() + { + if ($this->j < count($this->notices)) { + $n = $this->notices[$this->j]; + $this->j++; + return array(common_local_url('shownotice', array('notice' => $n[0])), + common_date_w3dtf($n[1]), + null, + null); + } else { + return null; + } + } + + function getNotices($y, $m, $d, $i) + { $offset = ($i-1) * SitemapPlugin::NOTICES_PER_MAP; $limit = SitemapPlugin::NOTICES_PER_MAP; - $this->notice = new Notice(); + $notice = new Notice(); $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); @@ -74,29 +97,26 @@ class NoticesitemapAction extends SitemapAction $enddt = common_sql_date(strtotime($begindt) + (24 * 60 * 60)); - $this->notice->whereAdd("created >= '$begindt'"); - $this->notice->whereAdd("created < '$enddt'"); + $notice->selectAdd(); + $notice->selectAdd('id, created'); - $this->notice->whereAdd('is_local != 0'); + $notice->whereAdd("created >= '$begindt'"); + $notice->whereAdd("created < '$enddt'"); - $this->notice->orderBy('created'); + $notice->whereAdd('is_local != 0'); - $this->notice->limit($offset, $limit); + $notice->orderBy('created'); - $this->notice->find(); + $notice->limit($offset, $limit); - return true; - } + $notice->find(); - function nextUrl() - { - if ($this->notice->fetch()) { - return array(common_local_url('shownotice', array('notice' => $this->notice->id)), - common_date_w3dtf($this->notice->created), - null, - null); - } else { - return null; + $n = array(); + + while ($notice->fetch()) { + $n[] = array($notice->id, $notice->created); } + + return $n; } } From 1030bf35db46044797e521f45d5be38e184ed2db Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 11:52:19 -0400 Subject: [PATCH 14/19] cache results of notice sitemap query --- plugins/Sitemap/noticesitemap.php | 53 ++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php index 6cf2b3d01f..bc8a7bfd65 100644 --- a/plugins/Sitemap/noticesitemap.php +++ b/plugins/Sitemap/noticesitemap.php @@ -84,37 +84,52 @@ class NoticesitemapAction extends SitemapAction function getNotices($y, $m, $d, $i) { - $offset = ($i-1) * SitemapPlugin::NOTICES_PER_MAP; - $limit = SitemapPlugin::NOTICES_PER_MAP; + $n = Notice::cacheGet("sitemap:notice:$y:$m:$d:$i"); - $notice = new Notice(); + if ($n === false) { - $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); + $notice = new Notice(); - // XXX: estimates 1d == 24h, which screws up days - // with leap seconds (1d == 24h + 1s). Thankfully they're - // few and far between. + $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); - $enddt = common_sql_date(strtotime($begindt) + (24 * 60 * 60)); + // XXX: estimates 1d == 24h, which screws up days + // with leap seconds (1d == 24h + 1s). Thankfully they're + // few and far between. - $notice->selectAdd(); - $notice->selectAdd('id, created'); + $theend = strtotime($begindt) + (24 * 60 * 60); + $enddt = common_sql_date($theend); - $notice->whereAdd("created >= '$begindt'"); - $notice->whereAdd("created < '$enddt'"); + $notice->selectAdd(); + $notice->selectAdd('id, created'); - $notice->whereAdd('is_local != 0'); + $notice->whereAdd("created >= '$begindt'"); + $notice->whereAdd("created < '$enddt'"); - $notice->orderBy('created'); + $notice->whereAdd('is_local != 0'); - $notice->limit($offset, $limit); + $notice->orderBy('created'); - $notice->find(); + $offset = ($i-1) * SitemapPlugin::NOTICES_PER_MAP; + $limit = SitemapPlugin::NOTICES_PER_MAP; - $n = array(); + $notice->limit($offset, $limit); - while ($notice->fetch()) { - $n[] = array($notice->id, $notice->created); + $notice->find(); + + $n = array(); + + while ($notice->fetch()) { + $n[] = array($notice->id, $notice->created); + } + + $c = Cache::instance(); + + if (!empty($c)) { + $c->set(Cache::key("sitemap:notice:$y:$m:$d:$i"), + $n, + Cache::COMPRESSED, + ((time() > $theend) ? (time() + 90 * 24 * 60 * 60) : (time() + 5 * 60))); + } } return $n; From c6d9001db552a0980daae82170e00a8373904065 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 12:00:15 -0400 Subject: [PATCH 15/19] cache user data for user sitemap --- plugins/Sitemap/usersitemap.php | 81 +++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 25 deletions(-) diff --git a/plugins/Sitemap/usersitemap.php b/plugins/Sitemap/usersitemap.php index 42cadaca7d..3e5ac46525 100644 --- a/plugins/Sitemap/usersitemap.php +++ b/plugins/Sitemap/usersitemap.php @@ -43,7 +43,8 @@ if (!defined('STATUSNET')) { class UsersitemapAction extends SitemapAction { - var $user = null; + var $users = null; + var $j = 0; function prepare($args) { @@ -61,37 +62,67 @@ class UsersitemapAction extends SitemapAction $d += 0; $i += 0; - $offset = ($i-1) * SitemapPlugin::USERS_PER_MAP; - $limit = SitemapPlugin::USERS_PER_MAP; - - $this->user = new User(); - - $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); - - // XXX: estimates 1d == 24h, which screws up days - // with leap seconds (1d == 24h + 1s). Thankfully they're - // few and far between. - - $enddt = common_sql_date(strtotime($begindt) + (24 * 60 * 60)); - - $this->user->whereAdd("created >= '$begindt'"); - $this->user->whereAdd("created < '$enddt'"); - - $this->user->orderBy('created'); - - $this->user->limit($offset, $limit); - - $this->user->find(); - + $this->users = $this->getUsers($y, $m, $d, $i); + $this->j = 0; return true; } function nextUrl() { - if ($this->user->fetch()) { - return array(common_profile_url($this->user->nickname), null, null, null); + if ($this->j < count($this->users)) { + $nickname = $this->users[$this->j]; + $this->j++; + return array(common_profile_url($nickname), null, null, null); } else { return null; } } + + function getUsers($y, $m, $d, $i) + { + $u = User::cacheGet("sitemap:user:$y:$m:$d:$i"); + + if ($u === false) { + + $user = new User(); + + $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); + + // XXX: estimates 1d == 24h, which screws up days + // with leap seconds (1d == 24h + 1s). Thankfully they're + // few and far between. + + $theend = strtotime($begindt) + (24 * 60 * 60); + $enddt = common_sql_date($theend); + + $user->selectAdd(); + $user->selectAdd('nickname'); + $user->whereAdd("created >= '$begindt'"); + $user->whereAdd("created < '$enddt'"); + + $user->orderBy('created'); + + $offset = ($i-1) * SitemapPlugin::USERS_PER_MAP; + $limit = SitemapPlugin::USERS_PER_MAP; + + $user->limit($offset, $limit); + + $user->find(); + + while ($user->fetch()) { + $u[] = $user->nickname; + } + + $c = Cache::instance(); + + if (!empty($c)) { + $c->set(Cache::key("sitemap:user:$y:$m:$d:$i"), + $u, + Cache::COMPRESSED, + ((time() > $theend) ? (time() + 90 * 24 * 60 * 60) : (time() + 5 * 60))); + } + } + + return $u; + } } From da8b231d2ee4c3b30489d0e010aa2dd29f8e80ec Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 12:06:08 -0400 Subject: [PATCH 16/19] make sure notice and user sitemap are 'in' top level directory --- plugins/Sitemap/SitemapPlugin.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 29c32a6242..ed876d94f8 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -89,14 +89,14 @@ class SitemapPlugin extends Plugin $m->connect('sitemapindex.xml', array('action' => 'sitemapindex')); - $m->connect('/sitemaps/notice/:year/:month/:day/:index.xml', + $m->connect('/notice-sitemap-:year-:month-:day-:index.xml', array('action' => 'noticesitemap'), array('year' => '[0-9]{4}', 'month' => '[01][0-9]', 'day' => '[0123][0-9]', 'index' => '[1-9][0-9]*')); - $m->connect('/sitemaps/user/:year/:month/:day/:index.xml', + $m->connect('/user-sitemap-:year-:month-:day-:index.xml', array('action' => 'usersitemap'), array('year' => '[0-9]{4}', 'month' => '[01][0-9]', From 09e5046dd388baf2629aceea66b5101ee4c4fb86 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 12:13:48 -0400 Subject: [PATCH 17/19] cache notice and user counts in sitemap index --- plugins/Sitemap/sitemapindex.php | 46 +++++++++++++++++++------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/plugins/Sitemap/sitemapindex.php b/plugins/Sitemap/sitemapindex.php index 7942bc3bd0..2055dd7f06 100644 --- a/plugins/Sitemap/sitemapindex.php +++ b/plugins/Sitemap/sitemapindex.php @@ -101,20 +101,25 @@ class SitemapindexAction extends Action function getUserCounts() { - // XXX: cachemeplease + $userCounts = User::cacheGet('sitemap:user:counts'); - $user = new User(); + if ($userCounts === false) { - $user->selectAdd(); - $user->selectAdd('date(created) as regdate, count(*) as regcount'); - $user->groupBy('regdate'); + $user = new User(); - $user->find(); + $user->selectAdd(); + $user->selectAdd('date(created) as regdate, count(*) as regcount'); + $user->groupBy('regdate'); - $userCounts = array(); + $user->find(); - while ($user->fetch()) { - $userCounts[$user->regdate] = $user->regcount; + $userCounts = array(); + + while ($user->fetch()) { + $userCounts[$user->regdate] = $user->regcount; + } + + User::cacheSet('sitemap:user:counts', $userCounts); } return $userCounts; @@ -122,20 +127,25 @@ class SitemapindexAction extends Action function getNoticeCounts() { - // XXX: cachemeplease + $noticeCounts = Notice::cacheGet('sitemap:notice:counts'); - $notice = new Notice(); + if ($noticeCounts === false) { - $notice->selectAdd(); - $notice->selectAdd('date(created) as postdate, count(*) as postcount'); - $notice->groupBy('postdate'); + $notice = new Notice(); - $notice->find(); + $notice->selectAdd(); + $notice->selectAdd('date(created) as postdate, count(*) as postcount'); + $notice->groupBy('postdate'); - $noticeCounts = array(); + $notice->find(); - while ($notice->fetch()) { - $noticeCounts[$notice->postdate] = $notice->postcount; + $noticeCounts = array(); + + while ($notice->fetch()) { + $noticeCounts[$notice->postdate] = $notice->postcount; + } + + Notice::cacheSet('sitemap:notice:counts', $noticeCounts); } return $noticeCounts; From f3f652e451bceb64d919f3e8a2fcbeeb6c9dd187 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 14:32:01 -0400 Subject: [PATCH 18/19] add sitemap statement to robots.txt --- plugins/Sitemap/SitemapPlugin.php | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index ed876d94f8..6fc7021049 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -76,6 +76,23 @@ class SitemapPlugin extends Plugin } } + /** + * Add sitemap-related information at the end of robots.txt + * + * @param Action $action Action being run + * + * @return boolean hook value. + */ + + function onEndRobotsTxt($action) + { + $url = common_local_url('sitemapindex'); + + print "\nSitemap: $url\n"; + + return true; + } + /** * Map URLs to actions * From 5a9ff7c575fb2b23d8b7b3eaf896852bb3501b80 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 14:34:22 -0400 Subject: [PATCH 19/19] note that sitemap actions are readonly --- plugins/Sitemap/sitemapaction.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/plugins/Sitemap/sitemapaction.php b/plugins/Sitemap/sitemapaction.php index bab04ed9d2..45edfccc51 100644 --- a/plugins/Sitemap/sitemapaction.php +++ b/plugins/Sitemap/sitemapaction.php @@ -87,4 +87,9 @@ class SitemapAction extends Action { return null; } + + function isReadOnly() + { + return true; + } }