From beaecb18d5b92b913473dfffd545dc436f50cf66 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 22 Apr 2010 08:49:33 -0700 Subject: [PATCH 01/46] Add statusnet: prefix for API to JSON and XML output I added the statusnet: prefix to the xml output. This prefix should be declared on the root element of all relevant XML output. I also added two StatusNet-specific fields: * statusnet:html - rendered HTML. Clients shouldn't have to guess at the correct HTML rendering for notices, especially since some of the links depend on context. * statusnet:profile_url - profile URL for a user. You can't count on a user being a local user in a distributed microblogging world. So, this shows the explicit profile_url. --- actions/apidirectmessage.php | 3 ++- actions/apiusershow.php | 2 +- lib/apiaction.php | 42 +++++++++++++++++++++++++++--------- 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/actions/apidirectmessage.php b/actions/apidirectmessage.php index 53da9e0c68..7a0f46274c 100644 --- a/actions/apidirectmessage.php +++ b/actions/apidirectmessage.php @@ -232,7 +232,8 @@ class ApiDirectMessageAction extends ApiAuthAction function showXmlDirectMessages() { $this->initDocument('xml'); - $this->elementStart('direct-messages', array('type' => 'array')); + $this->elementStart('direct-messages', array('type' => 'array', + 'xmlns:statusnet' => 'http://status.net/schema/api/1/')); foreach ($this->messages as $m) { $dm_array = $this->directMessageArray($m); diff --git a/actions/apiusershow.php b/actions/apiusershow.php index 6c8fad49ba..28993102c0 100644 --- a/actions/apiusershow.php +++ b/actions/apiusershow.php @@ -113,7 +113,7 @@ class ApiUserShowAction extends ApiPrivateAuthAction if ($this->format == 'xml') { $this->initDocument('xml'); - $this->showTwitterXmlUser($twitter_user); + $this->showTwitterXmlUser($twitter_user, 'user', true); $this->endDocument('xml'); } elseif ($this->format == 'json') { $this->initDocument('json'); diff --git a/lib/apiaction.php b/lib/apiaction.php index 59dc47c23b..6ee0a94d94 100644 --- a/lib/apiaction.php +++ b/lib/apiaction.php @@ -223,6 +223,10 @@ class ApiAction extends Action } } + // StatusNet-specific + + $twitter_user['statusnet:profile_url'] = $profile->profileurl; + return $twitter_user; } @@ -308,6 +312,10 @@ class ApiAction extends Action $twitter_status['user'] = $twitter_user; } + // StatusNet-specific + + $twitter_status['statusnet:html'] = $notice->rendered; + return $twitter_status; } @@ -475,9 +483,13 @@ class ApiAction extends Action } } - function showTwitterXmlStatus($twitter_status, $tag='status') + function showTwitterXmlStatus($twitter_status, $tag='status', $namespaces=false) { - $this->elementStart($tag); + $attrs = array(); + if ($namespaces) { + $attrs['xmlns:statusnet'] = 'http://status.net/schema/api/1/'; + } + $this->elementStart($tag, $attrs); foreach($twitter_status as $element => $value) { switch ($element) { case 'user': @@ -511,9 +523,13 @@ class ApiAction extends Action $this->elementEnd('group'); } - function showTwitterXmlUser($twitter_user, $role='user') + function showTwitterXmlUser($twitter_user, $role='user', $namespaces=false) { - $this->elementStart($role); + $attrs = array(); + if ($namespaces) { + $attrs['xmlns:statusnet'] = 'http://status.net/schema/api/1/'; + } + $this->elementStart($role, $attrs); foreach($twitter_user as $element => $value) { if ($element == 'status') { $this->showTwitterXmlStatus($twitter_user['status']); @@ -595,7 +611,7 @@ class ApiAction extends Action { $this->initDocument('xml'); $twitter_status = $this->twitterStatusArray($notice); - $this->showTwitterXmlStatus($twitter_status); + $this->showTwitterXmlStatus($twitter_status, 'status', true); $this->endDocument('xml'); } @@ -611,7 +627,8 @@ class ApiAction extends Action { $this->initDocument('xml'); - $this->elementStart('statuses', array('type' => 'array')); + $this->elementStart('statuses', array('type' => 'array', + 'xmlns:statusnet' => 'http://status.net/schema/api/1/')); if (is_array($notice)) { foreach ($notice as $n) { @@ -778,9 +795,13 @@ class ApiAction extends Action $this->elementEnd('entry'); } - function showXmlDirectMessage($dm) + function showXmlDirectMessage($dm, $namespaces=false) { - $this->elementStart('direct_message'); + $attrs = array(); + if ($namespaces) { + $attrs['xmlns:statusnet'] = 'http://status.net/schema/api/1/'; + } + $this->elementStart('direct_message', $attrs); foreach($dm as $element => $value) { switch ($element) { case 'sender': @@ -857,7 +878,7 @@ class ApiAction extends Action { $this->initDocument('xml'); $dmsg = $this->directMessageArray($message); - $this->showXmlDirectMessage($dmsg); + $this->showXmlDirectMessage($dmsg, true); $this->endDocument('xml'); } @@ -974,7 +995,8 @@ class ApiAction extends Action { $this->initDocument('xml'); - $this->elementStart('users', array('type' => 'array')); + $this->elementStart('users', array('type' => 'array', + 'xmlns:statusnet' => 'http://status.net/schema/api/1/')); if (is_array($user)) { foreach ($user as $u) { From 4973d6a2885790d6e02d6e1e7ef33549293e4ec6 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 22 Mar 2010 00:25:49 -0400 Subject: [PATCH 02/46] user sitemap --- plugins/Sitemap/SitemapPlugin.php | 111 ++++++++++++++++++++++++++++++ plugins/Sitemap/sitemapaction.php | 90 ++++++++++++++++++++++++ plugins/Sitemap/usersitemap.php | 79 +++++++++++++++++++++ 3 files changed, 280 insertions(+) create mode 100644 plugins/Sitemap/SitemapPlugin.php create mode 100644 plugins/Sitemap/sitemapaction.php create mode 100644 plugins/Sitemap/usersitemap.php diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php new file mode 100644 index 0000000000..42ea1dbe62 --- /dev/null +++ b/plugins/Sitemap/SitemapPlugin.php @@ -0,0 +1,111 @@ +. + * + * @category Sample + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + // This check helps protect against security problems; + // your code file can't be executed directly from the web. + exit(1); +} + +/** + * Sitemap plugin + * + * @category Sample + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +class SitemapPlugin extends Plugin +{ + /** + * Load related modules when needed + * + * Most non-trivial plugins will require extra modules to do their work. Typically + * these include data classes, action classes, widget classes, or external libraries. + * + * This method receives a class name and loads the PHP file related to that class. By + * tradition, action classes typically have files named for the action, all lower-case. + * Data classes are in files with the data class name, initial letter capitalized. + * + * Note that this method will be called for *all* overloaded classes, not just ones + * in this plugin! So, make sure to return true by default to let other plugins, and + * the core code, get a chance. + * + * @param string $cls Name of the class to be loaded + * + * @return boolean hook value; true means continue processing, false means stop. + */ + + function onAutoload($cls) + { + $dir = dirname(__FILE__); + + switch ($cls) + { + case 'SitemapindexAction': + case 'NoticesitemapAction': + case 'UsersitemapAction': + require_once $dir . '/' . strtolower(mb_substr($cls, 0, -6)) . '.php'; + return false; + case 'SitemapAction': + require_once $dir . '/' . strtolower($cls) . '.php'; + default: + return true; + } + } + + /** + * Map URLs to actions + * + * @param Net_URL_Mapper $m path-to-action mapper + * + * @return boolean hook value; true means continue processing, false means stop. + */ + + function onRouterInitialized($m) + { + $m->connect('sitemapindex.xml', + array('action' => 'sitemapindex')); + $m->connect('/sitemaps/notice/:year/:month/:day.xml', + array('action' => 'noticesitemap'), + array('year' => '[0-9]{4}', + 'month' => '[1]?[0-9]', + 'day' => '[123]?[0-9]')); + $m->connect('/sitemaps/user/:index.xml', + array('action' => 'usersitemap'), + array('index' => '[0-9]+', + 'month' => '[1]?[0-9]', + 'day' => '[123]?[0-9]')); + return true; + } +} diff --git a/plugins/Sitemap/sitemapaction.php b/plugins/Sitemap/sitemapaction.php new file mode 100644 index 0000000000..ab80b85eaa --- /dev/null +++ b/plugins/Sitemap/sitemapaction.php @@ -0,0 +1,90 @@ +. + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * superclass for sitemap actions + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class SitemapAction extends Action +{ + /** + * handle the action + * + * @param array $args unused. + * + * @return void + */ + + function handle($args) + { + header('Content-Type: text/xml; charset=UTF-8'); + $this->startXML(); + + $this->elementStart('sitemap'); + + while (list($url, $lm, $cf, $p) = $this->nextUrl()) { + $this->showUrl($url, $lm, $cf, $p); + } + + $this->elementEnd('sitemap'); + + $this->endXML(); + } + + function showUrl($url, $lastMod=null, $changeFreq=null, $priority=null) + { + $this->elementStart('url'); + $this->element('loc', null, $url); + if (!is_null($lastMod)) { + $this->element('lastmod', null, $lastMod); + } + if (!is_null($changeFreq)) { + $this->element('changefreq', null, $changeFreq); + } + if (!is_null($priority)) { + $this->element('priority', null, $priority); + } + $this->elementEnd('url'); + } + + function nextUrl() + { + return null; + } +} diff --git a/plugins/Sitemap/usersitemap.php b/plugins/Sitemap/usersitemap.php new file mode 100644 index 0000000000..582a13b664 --- /dev/null +++ b/plugins/Sitemap/usersitemap.php @@ -0,0 +1,79 @@ +. + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * sitemap for users + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class UsersitemapAction extends SitemapAction +{ + const USERS_PER_MAP = 25000; + + var $user = null; + + function prepare($args) + { + parent::prepare($args); + + $i = $this->trimmed('index'); + + $i += 0; + + $offset = ($i-1) * self::USERS_PER_MAP; + $limit = self::USERS_PER_MAP; + + $this->user = new User(); + + $this->user->orderBy('id'); + $this->user->limit($offset, $limit); + + $this->user->find(); + + return true; + } + + function nextUrl() + { + if ($this->user->fetch()) { + return array(common_profile_url($this->user->nickname), null, null, null); + } else { + return null; + } + } +} From 3a9fdb7647d9439da0c12762c4b255d507995713 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 22 Mar 2010 08:09:15 -0400 Subject: [PATCH 03/46] Add a Notice sitemap --- plugins/Sitemap/SitemapPlugin.php | 6 +- plugins/Sitemap/noticesitemap.php | 94 +++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 plugins/Sitemap/noticesitemap.php diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 42ea1dbe62..8889c89306 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -96,11 +96,13 @@ class SitemapPlugin extends Plugin { $m->connect('sitemapindex.xml', array('action' => 'sitemapindex')); - $m->connect('/sitemaps/notice/:year/:month/:day.xml', + $m->connect('/sitemaps/notice/:year/:month/:day/:index.xml', array('action' => 'noticesitemap'), array('year' => '[0-9]{4}', 'month' => '[1]?[0-9]', - 'day' => '[123]?[0-9]')); + 'day' => '[123]?[0-9]', + 'index' => '[0-9]+')); + $m->connect('/sitemaps/user/:index.xml', array('action' => 'usersitemap'), array('index' => '[0-9]+', diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php new file mode 100644 index 0000000000..7eec886363 --- /dev/null +++ b/plugins/Sitemap/noticesitemap.php @@ -0,0 +1,94 @@ +. + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * sitemap for users + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class NoticesitemapAction extends SitemapAction +{ + const NOTICES_PER_MAP = 25000; + + var $notice = null; + + function prepare($args) + { + parent::prepare($args); + + $y = $this->trimmed('year'); + + $m = $this->trimmed('month'); + $d = $this->trimmed('day'); + + $i = $this->trimmed('index'); + + $y += 0; + $m += 0; + $d += 0; + $i += 0; + + $offset = ($i-1) * self::NOTICES_PER_MAP; + $limit = self::NOTICES_PER_MAP; + + $this->notice = new Notice(); + + $this->notice->whereAdd("created > '$y-$m-$d 00:00:00'"); + $this->notice->whereAdd("created <= '$y-$m-$d 11:59:59'"); + $this->notice->whereAdd('is_local = 1'); + + $this->notice->orderBy('id'); + $this->notice->limit($offset, $limit); + + $this->notice->find(); + + return true; + } + + function nextUrl() + { + if ($this->notice->fetch()) { + return array(common_local_url('shownotice', array('notice' => $this->notice->id)), + common_date_w3dtf($this->notice->created), + null, + null); + } else { + return null; + } + } +} From cf7dd2a6afbc5477dd29352e9a6a6de735540c11 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 25 Mar 2010 23:56:09 -0400 Subject: [PATCH 04/46] better query for notices by date --- plugins/Sitemap/SitemapPlugin.php | 17 +++-------------- plugins/Sitemap/noticesitemap.php | 9 ++++++--- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 8889c89306..bb404cd25e 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -50,17 +50,6 @@ class SitemapPlugin extends Plugin /** * Load related modules when needed * - * Most non-trivial plugins will require extra modules to do their work. Typically - * these include data classes, action classes, widget classes, or external libraries. - * - * This method receives a class name and loads the PHP file related to that class. By - * tradition, action classes typically have files named for the action, all lower-case. - * Data classes are in files with the data class name, initial letter capitalized. - * - * Note that this method will be called for *all* overloaded classes, not just ones - * in this plugin! So, make sure to return true by default to let other plugins, and - * the core code, get a chance. - * * @param string $cls Name of the class to be loaded * * @return boolean hook value; true means continue processing, false means stop. @@ -99,9 +88,9 @@ class SitemapPlugin extends Plugin $m->connect('/sitemaps/notice/:year/:month/:day/:index.xml', array('action' => 'noticesitemap'), array('year' => '[0-9]{4}', - 'month' => '[1]?[0-9]', - 'day' => '[123]?[0-9]', - 'index' => '[0-9]+')); + 'month' => '[01][0-9]', + 'day' => '[0123][0-9]', + 'index' => '[1-9][0-9]*')); $m->connect('/sitemaps/user/:index.xml', array('action' => 'usersitemap'), diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php index 7eec886363..0024084863 100644 --- a/plugins/Sitemap/noticesitemap.php +++ b/plugins/Sitemap/noticesitemap.php @@ -68,9 +68,12 @@ class NoticesitemapAction extends SitemapAction $this->notice = new Notice(); - $this->notice->whereAdd("created > '$y-$m-$d 00:00:00'"); - $this->notice->whereAdd("created <= '$y-$m-$d 11:59:59'"); - $this->notice->whereAdd('is_local = 1'); + $dt = sprintf('%04d-%02d-%02d', $y, $m, $d); + + $this->notice->whereAdd("created > '$dt 00:00:00'"); + $this->notice->whereAdd("created <= '$dt 23:59:59'"); + + $this->notice->whereAdd('is_local != 0'); $this->notice->orderBy('id'); $this->notice->limit($offset, $limit); From e7e50926416f5617bcb94928a2d27a9de8b2f231 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 10 Apr 2010 10:03:37 -0400 Subject: [PATCH 05/46] correct element name and namespace for sitemapactions --- plugins/Sitemap/sitemapaction.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/Sitemap/sitemapaction.php b/plugins/Sitemap/sitemapaction.php index ab80b85eaa..bab04ed9d2 100644 --- a/plugins/Sitemap/sitemapaction.php +++ b/plugins/Sitemap/sitemapaction.php @@ -56,13 +56,13 @@ class SitemapAction extends Action header('Content-Type: text/xml; charset=UTF-8'); $this->startXML(); - $this->elementStart('sitemap'); + $this->elementStart('urlset', array('xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9')); while (list($url, $lm, $cf, $p) = $this->nextUrl()) { $this->showUrl($url, $lm, $cf, $p); } - $this->elementEnd('sitemap'); + $this->elementEnd('urlset'); $this->endXML(); } From 8e2766957bf0f6f023385bfa6783d703b3d9a28e Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 10 Apr 2010 10:03:57 -0400 Subject: [PATCH 06/46] move USERS_PER_MAP to plugin --- plugins/Sitemap/SitemapPlugin.php | 3 +++ plugins/Sitemap/usersitemap.php | 6 ++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index bb404cd25e..40263aaeef 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -47,6 +47,8 @@ if (!defined('STATUSNET')) { class SitemapPlugin extends Plugin { + const USERS_PER_MAP = 25000; + /** * Load related modules when needed * @@ -85,6 +87,7 @@ class SitemapPlugin extends Plugin { $m->connect('sitemapindex.xml', array('action' => 'sitemapindex')); + $m->connect('/sitemaps/notice/:year/:month/:day/:index.xml', array('action' => 'noticesitemap'), array('year' => '[0-9]{4}', diff --git a/plugins/Sitemap/usersitemap.php b/plugins/Sitemap/usersitemap.php index 582a13b664..b7cc939a9f 100644 --- a/plugins/Sitemap/usersitemap.php +++ b/plugins/Sitemap/usersitemap.php @@ -43,8 +43,6 @@ if (!defined('STATUSNET')) { class UsersitemapAction extends SitemapAction { - const USERS_PER_MAP = 25000; - var $user = null; function prepare($args) @@ -55,8 +53,8 @@ class UsersitemapAction extends SitemapAction $i += 0; - $offset = ($i-1) * self::USERS_PER_MAP; - $limit = self::USERS_PER_MAP; + $offset = ($i-1) * SitemapPlugin::USERS_PER_MAP; + $limit = SitemapPlugin::USERS_PER_MAP; $this->user = new User(); From 8957d2bdea569594593c55b7d84f05e2998c0633 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 10 Apr 2010 10:21:19 -0400 Subject: [PATCH 07/46] change URLs for user sitemap --- plugins/Sitemap/SitemapPlugin.php | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 40263aaeef..5b2af48795 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -95,11 +95,12 @@ class SitemapPlugin extends Plugin 'day' => '[0123][0-9]', 'index' => '[1-9][0-9]*')); - $m->connect('/sitemaps/user/:index.xml', + $m->connect('/sitemaps/user/:year/:month/:day/:index.xml', array('action' => 'usersitemap'), - array('index' => '[0-9]+', - 'month' => '[1]?[0-9]', - 'day' => '[123]?[0-9]')); + array('year' => '[0-9]{4}', + 'month' => '[01][0-9]', + 'day' => '[0123][0-9]', + 'index' => '[1-9][0-9]*')); return true; } } From 816138a6f11ecf1ec44c261d660f8b2aafe49b21 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 10 Apr 2010 10:24:58 -0400 Subject: [PATCH 08/46] Start of an action for sitemap index --- plugins/Sitemap/sitemapindex.php | 75 ++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 plugins/Sitemap/sitemapindex.php diff --git a/plugins/Sitemap/sitemapindex.php b/plugins/Sitemap/sitemapindex.php new file mode 100644 index 0000000000..09aebe0d8f --- /dev/null +++ b/plugins/Sitemap/sitemapindex.php @@ -0,0 +1,75 @@ +. + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * Show the sitemap index + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class SitemapAction extends Action +{ + /** + * handle the action + * + * @param array $args unused. + * + * @return void + */ + + function handle($args) + { + header('Content-Type: text/xml; charset=UTF-8'); + $this->startXML(); + + $this->elementStart('sitemapindex', array('xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9')); + + $this->showUserSitemaps(); + $this->showNoticeSitemaps(); + + $this->elementEnd('sitemapindex'); + + $this->endXML(); + } + + function showUserSitemaps() + { + $user = new User(); + $cnt = $user->count(); + + } +} From 1c40e7c139af98e4fe9c73093da4183ad8d9e234 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 10:11:18 -0400 Subject: [PATCH 09/46] better calculation for end date in notice sitemaps --- plugins/Sitemap/noticesitemap.php | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php index 0024084863..12a22dbb22 100644 --- a/plugins/Sitemap/noticesitemap.php +++ b/plugins/Sitemap/noticesitemap.php @@ -68,14 +68,21 @@ class NoticesitemapAction extends SitemapAction $this->notice = new Notice(); - $dt = sprintf('%04d-%02d-%02d', $y, $m, $d); + $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); - $this->notice->whereAdd("created > '$dt 00:00:00'"); - $this->notice->whereAdd("created <= '$dt 23:59:59'"); + // XXX: estimates 1d == 24h, which screws up days + // with leap seconds (1d == 24h + 1s). Thankfully they're + // few and far between. + + $enddt = common_sql_date(strtotime($begindt) + (24 * 60 * 60)); + + $this->notice->whereAdd("created >= '$begindt'"); + $this->notice->whereAdd("created < '$enddt'"); $this->notice->whereAdd('is_local != 0'); - $this->notice->orderBy('id'); + $this->notice->orderBy('created'); + $this->notice->limit($offset, $limit); $this->notice->find(); From a4f0dfd3a134ddfa0e16b0a7ae3d205680eda4cf Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 10:11:45 -0400 Subject: [PATCH 10/46] bundle users by reg date --- plugins/Sitemap/usersitemap.php | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/plugins/Sitemap/usersitemap.php b/plugins/Sitemap/usersitemap.php index b7cc939a9f..42cadaca7d 100644 --- a/plugins/Sitemap/usersitemap.php +++ b/plugins/Sitemap/usersitemap.php @@ -49,8 +49,16 @@ class UsersitemapAction extends SitemapAction { parent::prepare($args); + $y = $this->trimmed('year'); + + $m = $this->trimmed('month'); + $d = $this->trimmed('day'); + $i = $this->trimmed('index'); + $y += 0; + $m += 0; + $d += 0; $i += 0; $offset = ($i-1) * SitemapPlugin::USERS_PER_MAP; @@ -58,7 +66,19 @@ class UsersitemapAction extends SitemapAction $this->user = new User(); - $this->user->orderBy('id'); + $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); + + // XXX: estimates 1d == 24h, which screws up days + // with leap seconds (1d == 24h + 1s). Thankfully they're + // few and far between. + + $enddt = common_sql_date(strtotime($begindt) + (24 * 60 * 60)); + + $this->user->whereAdd("created >= '$begindt'"); + $this->user->whereAdd("created < '$enddt'"); + + $this->user->orderBy('created'); + $this->user->limit($offset, $limit); $this->user->find(); From 9970645aa271ad85d19c77d362678b964070d5ed Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 10:23:32 -0400 Subject: [PATCH 11/46] Move NOTICES_PER_MAP to SitemapPlugin --- plugins/Sitemap/SitemapPlugin.php | 3 ++- plugins/Sitemap/noticesitemap.php | 6 ++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 5b2af48795..fa9c9a76d0 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -47,7 +47,8 @@ if (!defined('STATUSNET')) { class SitemapPlugin extends Plugin { - const USERS_PER_MAP = 25000; + const USERS_PER_MAP = 25000; + const NOTICES_PER_MAP = 25000; /** * Load related modules when needed diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php index 12a22dbb22..c8db24efee 100644 --- a/plugins/Sitemap/noticesitemap.php +++ b/plugins/Sitemap/noticesitemap.php @@ -43,8 +43,6 @@ if (!defined('STATUSNET')) { class NoticesitemapAction extends SitemapAction { - const NOTICES_PER_MAP = 25000; - var $notice = null; function prepare($args) @@ -63,8 +61,8 @@ class NoticesitemapAction extends SitemapAction $d += 0; $i += 0; - $offset = ($i-1) * self::NOTICES_PER_MAP; - $limit = self::NOTICES_PER_MAP; + $offset = ($i-1) * SitemapPlugin::NOTICES_PER_MAP; + $limit = SitemapPlugin::NOTICES_PER_MAP; $this->notice = new Notice(); From a18115bec934fb4c27040f02bb3aec8b28083297 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 11:04:56 -0400 Subject: [PATCH 12/46] show sitemapindex with user and notice sitemaps --- plugins/Sitemap/sitemapindex.php | 95 +++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 3 deletions(-) diff --git a/plugins/Sitemap/sitemapindex.php b/plugins/Sitemap/sitemapindex.php index 09aebe0d8f..7942bc3bd0 100644 --- a/plugins/Sitemap/sitemapindex.php +++ b/plugins/Sitemap/sitemapindex.php @@ -41,7 +41,7 @@ if (!defined('STATUSNET')) { * @link http://status.net/ */ -class SitemapAction extends Action +class SitemapindexAction extends Action { /** * handle the action @@ -68,8 +68,97 @@ class SitemapAction extends Action function showUserSitemaps() { - $user = new User(); - $cnt = $user->count(); + $userCounts = $this->getUserCounts(); + foreach ($userCounts as $dt => $cnt) { + $cnt = $cnt+0; + assert($cnt != 0); + $n = (int)$cnt / (int)SitemapPlugin::USERS_PER_MAP; + if (($cnt % SitemapPlugin::USERS_PER_MAP) != 0) { + $n++; + } + for ($i = 1; $i <= $n; $i++) { + $this->showSitemap('user', $dt, $i); + } + } + } + + function showNoticeSitemaps() + { + $noticeCounts = $this->getNoticeCounts(); + + foreach ($noticeCounts as $dt => $cnt) { + assert($cnt != 0); + $n = $cnt / SitemapPlugin::NOTICES_PER_MAP; + if ($cnt % SitemapPlugin::NOTICES_PER_MAP) { + $n++; + } + for ($i = 1; $i <= $n; $i++) { + $this->showSitemap('notice', $dt, $i); + } + } + } + + function getUserCounts() + { + // XXX: cachemeplease + + $user = new User(); + + $user->selectAdd(); + $user->selectAdd('date(created) as regdate, count(*) as regcount'); + $user->groupBy('regdate'); + + $user->find(); + + $userCounts = array(); + + while ($user->fetch()) { + $userCounts[$user->regdate] = $user->regcount; + } + + return $userCounts; + } + + function getNoticeCounts() + { + // XXX: cachemeplease + + $notice = new Notice(); + + $notice->selectAdd(); + $notice->selectAdd('date(created) as postdate, count(*) as postcount'); + $notice->groupBy('postdate'); + + $notice->find(); + + $noticeCounts = array(); + + while ($notice->fetch()) { + $noticeCounts[$notice->postdate] = $notice->postcount; + } + + return $noticeCounts; + } + + function showSitemap($prefix, $dt, $i) + { + list($y, $m, $d) = explode('-', $dt); + + $this->elementStart('sitemap'); + $this->element('loc', null, common_local_url($prefix.'sitemap', + array('year' => $y, + 'month' => $m, + 'day' => $d, + 'index' => $i))); + + $begdate = strtotime("$y-$m-$d 00:00:00"); + $enddate = $begdate + (24 * 60 * 60); + + if ($enddate < time()) { + $this->element('lastmod', null, date(DATE_W3C, $enddate)); + } + + $this->elementEnd('sitemap'); } } From b919f837971c583118e6139e1bea84fd1a51d3a0 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 11:05:19 -0400 Subject: [PATCH 13/46] max users, notices per sitemap = 50K --- plugins/Sitemap/SitemapPlugin.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index fa9c9a76d0..29c32a6242 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -47,8 +47,8 @@ if (!defined('STATUSNET')) { class SitemapPlugin extends Plugin { - const USERS_PER_MAP = 25000; - const NOTICES_PER_MAP = 25000; + const USERS_PER_MAP = 50000; + const NOTICES_PER_MAP = 50000; /** * Load related modules when needed From 9d69906d6b74000bd455b4c14eccf00f8a5d2549 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 11:28:41 -0400 Subject: [PATCH 14/46] use an array for notice sitemap --- plugins/Sitemap/noticesitemap.php | 58 +++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php index c8db24efee..6cf2b3d01f 100644 --- a/plugins/Sitemap/noticesitemap.php +++ b/plugins/Sitemap/noticesitemap.php @@ -43,7 +43,8 @@ if (!defined('STATUSNET')) { class NoticesitemapAction extends SitemapAction { - var $notice = null; + var $notices = null; + var $j = 0; function prepare($args) { @@ -61,10 +62,32 @@ class NoticesitemapAction extends SitemapAction $d += 0; $i += 0; + $this->notices = $this->getNotices($y, $m, $d, $i); + $this->j = 0; + + return true; + } + + function nextUrl() + { + if ($this->j < count($this->notices)) { + $n = $this->notices[$this->j]; + $this->j++; + return array(common_local_url('shownotice', array('notice' => $n[0])), + common_date_w3dtf($n[1]), + null, + null); + } else { + return null; + } + } + + function getNotices($y, $m, $d, $i) + { $offset = ($i-1) * SitemapPlugin::NOTICES_PER_MAP; $limit = SitemapPlugin::NOTICES_PER_MAP; - $this->notice = new Notice(); + $notice = new Notice(); $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); @@ -74,29 +97,26 @@ class NoticesitemapAction extends SitemapAction $enddt = common_sql_date(strtotime($begindt) + (24 * 60 * 60)); - $this->notice->whereAdd("created >= '$begindt'"); - $this->notice->whereAdd("created < '$enddt'"); + $notice->selectAdd(); + $notice->selectAdd('id, created'); - $this->notice->whereAdd('is_local != 0'); + $notice->whereAdd("created >= '$begindt'"); + $notice->whereAdd("created < '$enddt'"); - $this->notice->orderBy('created'); + $notice->whereAdd('is_local != 0'); - $this->notice->limit($offset, $limit); + $notice->orderBy('created'); - $this->notice->find(); + $notice->limit($offset, $limit); - return true; - } + $notice->find(); - function nextUrl() - { - if ($this->notice->fetch()) { - return array(common_local_url('shownotice', array('notice' => $this->notice->id)), - common_date_w3dtf($this->notice->created), - null, - null); - } else { - return null; + $n = array(); + + while ($notice->fetch()) { + $n[] = array($notice->id, $notice->created); } + + return $n; } } From 1030bf35db46044797e521f45d5be38e184ed2db Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 11:52:19 -0400 Subject: [PATCH 15/46] cache results of notice sitemap query --- plugins/Sitemap/noticesitemap.php | 53 ++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php index 6cf2b3d01f..bc8a7bfd65 100644 --- a/plugins/Sitemap/noticesitemap.php +++ b/plugins/Sitemap/noticesitemap.php @@ -84,37 +84,52 @@ class NoticesitemapAction extends SitemapAction function getNotices($y, $m, $d, $i) { - $offset = ($i-1) * SitemapPlugin::NOTICES_PER_MAP; - $limit = SitemapPlugin::NOTICES_PER_MAP; + $n = Notice::cacheGet("sitemap:notice:$y:$m:$d:$i"); - $notice = new Notice(); + if ($n === false) { - $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); + $notice = new Notice(); - // XXX: estimates 1d == 24h, which screws up days - // with leap seconds (1d == 24h + 1s). Thankfully they're - // few and far between. + $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); - $enddt = common_sql_date(strtotime($begindt) + (24 * 60 * 60)); + // XXX: estimates 1d == 24h, which screws up days + // with leap seconds (1d == 24h + 1s). Thankfully they're + // few and far between. - $notice->selectAdd(); - $notice->selectAdd('id, created'); + $theend = strtotime($begindt) + (24 * 60 * 60); + $enddt = common_sql_date($theend); - $notice->whereAdd("created >= '$begindt'"); - $notice->whereAdd("created < '$enddt'"); + $notice->selectAdd(); + $notice->selectAdd('id, created'); - $notice->whereAdd('is_local != 0'); + $notice->whereAdd("created >= '$begindt'"); + $notice->whereAdd("created < '$enddt'"); - $notice->orderBy('created'); + $notice->whereAdd('is_local != 0'); - $notice->limit($offset, $limit); + $notice->orderBy('created'); - $notice->find(); + $offset = ($i-1) * SitemapPlugin::NOTICES_PER_MAP; + $limit = SitemapPlugin::NOTICES_PER_MAP; - $n = array(); + $notice->limit($offset, $limit); - while ($notice->fetch()) { - $n[] = array($notice->id, $notice->created); + $notice->find(); + + $n = array(); + + while ($notice->fetch()) { + $n[] = array($notice->id, $notice->created); + } + + $c = Cache::instance(); + + if (!empty($c)) { + $c->set(Cache::key("sitemap:notice:$y:$m:$d:$i"), + $n, + Cache::COMPRESSED, + ((time() > $theend) ? (time() + 90 * 24 * 60 * 60) : (time() + 5 * 60))); + } } return $n; From c6d9001db552a0980daae82170e00a8373904065 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 12:00:15 -0400 Subject: [PATCH 16/46] cache user data for user sitemap --- plugins/Sitemap/usersitemap.php | 81 +++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 25 deletions(-) diff --git a/plugins/Sitemap/usersitemap.php b/plugins/Sitemap/usersitemap.php index 42cadaca7d..3e5ac46525 100644 --- a/plugins/Sitemap/usersitemap.php +++ b/plugins/Sitemap/usersitemap.php @@ -43,7 +43,8 @@ if (!defined('STATUSNET')) { class UsersitemapAction extends SitemapAction { - var $user = null; + var $users = null; + var $j = 0; function prepare($args) { @@ -61,37 +62,67 @@ class UsersitemapAction extends SitemapAction $d += 0; $i += 0; - $offset = ($i-1) * SitemapPlugin::USERS_PER_MAP; - $limit = SitemapPlugin::USERS_PER_MAP; - - $this->user = new User(); - - $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); - - // XXX: estimates 1d == 24h, which screws up days - // with leap seconds (1d == 24h + 1s). Thankfully they're - // few and far between. - - $enddt = common_sql_date(strtotime($begindt) + (24 * 60 * 60)); - - $this->user->whereAdd("created >= '$begindt'"); - $this->user->whereAdd("created < '$enddt'"); - - $this->user->orderBy('created'); - - $this->user->limit($offset, $limit); - - $this->user->find(); - + $this->users = $this->getUsers($y, $m, $d, $i); + $this->j = 0; return true; } function nextUrl() { - if ($this->user->fetch()) { - return array(common_profile_url($this->user->nickname), null, null, null); + if ($this->j < count($this->users)) { + $nickname = $this->users[$this->j]; + $this->j++; + return array(common_profile_url($nickname), null, null, null); } else { return null; } } + + function getUsers($y, $m, $d, $i) + { + $u = User::cacheGet("sitemap:user:$y:$m:$d:$i"); + + if ($u === false) { + + $user = new User(); + + $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); + + // XXX: estimates 1d == 24h, which screws up days + // with leap seconds (1d == 24h + 1s). Thankfully they're + // few and far between. + + $theend = strtotime($begindt) + (24 * 60 * 60); + $enddt = common_sql_date($theend); + + $user->selectAdd(); + $user->selectAdd('nickname'); + $user->whereAdd("created >= '$begindt'"); + $user->whereAdd("created < '$enddt'"); + + $user->orderBy('created'); + + $offset = ($i-1) * SitemapPlugin::USERS_PER_MAP; + $limit = SitemapPlugin::USERS_PER_MAP; + + $user->limit($offset, $limit); + + $user->find(); + + while ($user->fetch()) { + $u[] = $user->nickname; + } + + $c = Cache::instance(); + + if (!empty($c)) { + $c->set(Cache::key("sitemap:user:$y:$m:$d:$i"), + $u, + Cache::COMPRESSED, + ((time() > $theend) ? (time() + 90 * 24 * 60 * 60) : (time() + 5 * 60))); + } + } + + return $u; + } } From da8b231d2ee4c3b30489d0e010aa2dd29f8e80ec Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 12:06:08 -0400 Subject: [PATCH 17/46] make sure notice and user sitemap are 'in' top level directory --- plugins/Sitemap/SitemapPlugin.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 29c32a6242..ed876d94f8 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -89,14 +89,14 @@ class SitemapPlugin extends Plugin $m->connect('sitemapindex.xml', array('action' => 'sitemapindex')); - $m->connect('/sitemaps/notice/:year/:month/:day/:index.xml', + $m->connect('/notice-sitemap-:year-:month-:day-:index.xml', array('action' => 'noticesitemap'), array('year' => '[0-9]{4}', 'month' => '[01][0-9]', 'day' => '[0123][0-9]', 'index' => '[1-9][0-9]*')); - $m->connect('/sitemaps/user/:year/:month/:day/:index.xml', + $m->connect('/user-sitemap-:year-:month-:day-:index.xml', array('action' => 'usersitemap'), array('year' => '[0-9]{4}', 'month' => '[01][0-9]', From 09e5046dd388baf2629aceea66b5101ee4c4fb86 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 12:13:48 -0400 Subject: [PATCH 18/46] cache notice and user counts in sitemap index --- plugins/Sitemap/sitemapindex.php | 46 +++++++++++++++++++------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/plugins/Sitemap/sitemapindex.php b/plugins/Sitemap/sitemapindex.php index 7942bc3bd0..2055dd7f06 100644 --- a/plugins/Sitemap/sitemapindex.php +++ b/plugins/Sitemap/sitemapindex.php @@ -101,20 +101,25 @@ class SitemapindexAction extends Action function getUserCounts() { - // XXX: cachemeplease + $userCounts = User::cacheGet('sitemap:user:counts'); - $user = new User(); + if ($userCounts === false) { - $user->selectAdd(); - $user->selectAdd('date(created) as regdate, count(*) as regcount'); - $user->groupBy('regdate'); + $user = new User(); - $user->find(); + $user->selectAdd(); + $user->selectAdd('date(created) as regdate, count(*) as regcount'); + $user->groupBy('regdate'); - $userCounts = array(); + $user->find(); - while ($user->fetch()) { - $userCounts[$user->regdate] = $user->regcount; + $userCounts = array(); + + while ($user->fetch()) { + $userCounts[$user->regdate] = $user->regcount; + } + + User::cacheSet('sitemap:user:counts', $userCounts); } return $userCounts; @@ -122,20 +127,25 @@ class SitemapindexAction extends Action function getNoticeCounts() { - // XXX: cachemeplease + $noticeCounts = Notice::cacheGet('sitemap:notice:counts'); - $notice = new Notice(); + if ($noticeCounts === false) { - $notice->selectAdd(); - $notice->selectAdd('date(created) as postdate, count(*) as postcount'); - $notice->groupBy('postdate'); + $notice = new Notice(); - $notice->find(); + $notice->selectAdd(); + $notice->selectAdd('date(created) as postdate, count(*) as postcount'); + $notice->groupBy('postdate'); - $noticeCounts = array(); + $notice->find(); - while ($notice->fetch()) { - $noticeCounts[$notice->postdate] = $notice->postcount; + $noticeCounts = array(); + + while ($notice->fetch()) { + $noticeCounts[$notice->postdate] = $notice->postcount; + } + + Notice::cacheSet('sitemap:notice:counts', $noticeCounts); } return $noticeCounts; From f3f652e451bceb64d919f3e8a2fcbeeb6c9dd187 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 14:32:01 -0400 Subject: [PATCH 19/46] add sitemap statement to robots.txt --- plugins/Sitemap/SitemapPlugin.php | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index ed876d94f8..6fc7021049 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -76,6 +76,23 @@ class SitemapPlugin extends Plugin } } + /** + * Add sitemap-related information at the end of robots.txt + * + * @param Action $action Action being run + * + * @return boolean hook value. + */ + + function onEndRobotsTxt($action) + { + $url = common_local_url('sitemapindex'); + + print "\nSitemap: $url\n"; + + return true; + } + /** * Map URLs to actions * From 5a9ff7c575fb2b23d8b7b3eaf896852bb3501b80 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 14:34:22 -0400 Subject: [PATCH 20/46] note that sitemap actions are readonly --- plugins/Sitemap/sitemapaction.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/plugins/Sitemap/sitemapaction.php b/plugins/Sitemap/sitemapaction.php index bab04ed9d2..45edfccc51 100644 --- a/plugins/Sitemap/sitemapaction.php +++ b/plugins/Sitemap/sitemapaction.php @@ -87,4 +87,9 @@ class SitemapAction extends Action { return null; } + + function isReadOnly() + { + return true; + } } From ce0e6cb50d88c593db62edd8375c4414e8a8ebf8 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 22 Mar 2010 00:25:49 -0400 Subject: [PATCH 21/46] user sitemap --- plugins/Sitemap/SitemapPlugin.php | 111 ++++++++++++++++++++++++++++++ plugins/Sitemap/sitemapaction.php | 90 ++++++++++++++++++++++++ plugins/Sitemap/usersitemap.php | 79 +++++++++++++++++++++ 3 files changed, 280 insertions(+) create mode 100644 plugins/Sitemap/SitemapPlugin.php create mode 100644 plugins/Sitemap/sitemapaction.php create mode 100644 plugins/Sitemap/usersitemap.php diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php new file mode 100644 index 0000000000..42ea1dbe62 --- /dev/null +++ b/plugins/Sitemap/SitemapPlugin.php @@ -0,0 +1,111 @@ +. + * + * @category Sample + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + // This check helps protect against security problems; + // your code file can't be executed directly from the web. + exit(1); +} + +/** + * Sitemap plugin + * + * @category Sample + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +class SitemapPlugin extends Plugin +{ + /** + * Load related modules when needed + * + * Most non-trivial plugins will require extra modules to do their work. Typically + * these include data classes, action classes, widget classes, or external libraries. + * + * This method receives a class name and loads the PHP file related to that class. By + * tradition, action classes typically have files named for the action, all lower-case. + * Data classes are in files with the data class name, initial letter capitalized. + * + * Note that this method will be called for *all* overloaded classes, not just ones + * in this plugin! So, make sure to return true by default to let other plugins, and + * the core code, get a chance. + * + * @param string $cls Name of the class to be loaded + * + * @return boolean hook value; true means continue processing, false means stop. + */ + + function onAutoload($cls) + { + $dir = dirname(__FILE__); + + switch ($cls) + { + case 'SitemapindexAction': + case 'NoticesitemapAction': + case 'UsersitemapAction': + require_once $dir . '/' . strtolower(mb_substr($cls, 0, -6)) . '.php'; + return false; + case 'SitemapAction': + require_once $dir . '/' . strtolower($cls) . '.php'; + default: + return true; + } + } + + /** + * Map URLs to actions + * + * @param Net_URL_Mapper $m path-to-action mapper + * + * @return boolean hook value; true means continue processing, false means stop. + */ + + function onRouterInitialized($m) + { + $m->connect('sitemapindex.xml', + array('action' => 'sitemapindex')); + $m->connect('/sitemaps/notice/:year/:month/:day.xml', + array('action' => 'noticesitemap'), + array('year' => '[0-9]{4}', + 'month' => '[1]?[0-9]', + 'day' => '[123]?[0-9]')); + $m->connect('/sitemaps/user/:index.xml', + array('action' => 'usersitemap'), + array('index' => '[0-9]+', + 'month' => '[1]?[0-9]', + 'day' => '[123]?[0-9]')); + return true; + } +} diff --git a/plugins/Sitemap/sitemapaction.php b/plugins/Sitemap/sitemapaction.php new file mode 100644 index 0000000000..ab80b85eaa --- /dev/null +++ b/plugins/Sitemap/sitemapaction.php @@ -0,0 +1,90 @@ +. + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * superclass for sitemap actions + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class SitemapAction extends Action +{ + /** + * handle the action + * + * @param array $args unused. + * + * @return void + */ + + function handle($args) + { + header('Content-Type: text/xml; charset=UTF-8'); + $this->startXML(); + + $this->elementStart('sitemap'); + + while (list($url, $lm, $cf, $p) = $this->nextUrl()) { + $this->showUrl($url, $lm, $cf, $p); + } + + $this->elementEnd('sitemap'); + + $this->endXML(); + } + + function showUrl($url, $lastMod=null, $changeFreq=null, $priority=null) + { + $this->elementStart('url'); + $this->element('loc', null, $url); + if (!is_null($lastMod)) { + $this->element('lastmod', null, $lastMod); + } + if (!is_null($changeFreq)) { + $this->element('changefreq', null, $changeFreq); + } + if (!is_null($priority)) { + $this->element('priority', null, $priority); + } + $this->elementEnd('url'); + } + + function nextUrl() + { + return null; + } +} diff --git a/plugins/Sitemap/usersitemap.php b/plugins/Sitemap/usersitemap.php new file mode 100644 index 0000000000..582a13b664 --- /dev/null +++ b/plugins/Sitemap/usersitemap.php @@ -0,0 +1,79 @@ +. + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * sitemap for users + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class UsersitemapAction extends SitemapAction +{ + const USERS_PER_MAP = 25000; + + var $user = null; + + function prepare($args) + { + parent::prepare($args); + + $i = $this->trimmed('index'); + + $i += 0; + + $offset = ($i-1) * self::USERS_PER_MAP; + $limit = self::USERS_PER_MAP; + + $this->user = new User(); + + $this->user->orderBy('id'); + $this->user->limit($offset, $limit); + + $this->user->find(); + + return true; + } + + function nextUrl() + { + if ($this->user->fetch()) { + return array(common_profile_url($this->user->nickname), null, null, null); + } else { + return null; + } + } +} From 524cd9df936bd27d5b64fbf08b219b56cb071122 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 22 Mar 2010 08:09:15 -0400 Subject: [PATCH 22/46] Add a Notice sitemap --- plugins/Sitemap/SitemapPlugin.php | 6 +- plugins/Sitemap/noticesitemap.php | 94 +++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 plugins/Sitemap/noticesitemap.php diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 42ea1dbe62..8889c89306 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -96,11 +96,13 @@ class SitemapPlugin extends Plugin { $m->connect('sitemapindex.xml', array('action' => 'sitemapindex')); - $m->connect('/sitemaps/notice/:year/:month/:day.xml', + $m->connect('/sitemaps/notice/:year/:month/:day/:index.xml', array('action' => 'noticesitemap'), array('year' => '[0-9]{4}', 'month' => '[1]?[0-9]', - 'day' => '[123]?[0-9]')); + 'day' => '[123]?[0-9]', + 'index' => '[0-9]+')); + $m->connect('/sitemaps/user/:index.xml', array('action' => 'usersitemap'), array('index' => '[0-9]+', diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php new file mode 100644 index 0000000000..7eec886363 --- /dev/null +++ b/plugins/Sitemap/noticesitemap.php @@ -0,0 +1,94 @@ +. + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * sitemap for users + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class NoticesitemapAction extends SitemapAction +{ + const NOTICES_PER_MAP = 25000; + + var $notice = null; + + function prepare($args) + { + parent::prepare($args); + + $y = $this->trimmed('year'); + + $m = $this->trimmed('month'); + $d = $this->trimmed('day'); + + $i = $this->trimmed('index'); + + $y += 0; + $m += 0; + $d += 0; + $i += 0; + + $offset = ($i-1) * self::NOTICES_PER_MAP; + $limit = self::NOTICES_PER_MAP; + + $this->notice = new Notice(); + + $this->notice->whereAdd("created > '$y-$m-$d 00:00:00'"); + $this->notice->whereAdd("created <= '$y-$m-$d 11:59:59'"); + $this->notice->whereAdd('is_local = 1'); + + $this->notice->orderBy('id'); + $this->notice->limit($offset, $limit); + + $this->notice->find(); + + return true; + } + + function nextUrl() + { + if ($this->notice->fetch()) { + return array(common_local_url('shownotice', array('notice' => $this->notice->id)), + common_date_w3dtf($this->notice->created), + null, + null); + } else { + return null; + } + } +} From 0a04f9d49db4f28ace3f3d94ec0763a926296b44 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 25 Mar 2010 23:56:09 -0400 Subject: [PATCH 23/46] better query for notices by date --- plugins/Sitemap/SitemapPlugin.php | 17 +++-------------- plugins/Sitemap/noticesitemap.php | 9 ++++++--- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 8889c89306..bb404cd25e 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -50,17 +50,6 @@ class SitemapPlugin extends Plugin /** * Load related modules when needed * - * Most non-trivial plugins will require extra modules to do their work. Typically - * these include data classes, action classes, widget classes, or external libraries. - * - * This method receives a class name and loads the PHP file related to that class. By - * tradition, action classes typically have files named for the action, all lower-case. - * Data classes are in files with the data class name, initial letter capitalized. - * - * Note that this method will be called for *all* overloaded classes, not just ones - * in this plugin! So, make sure to return true by default to let other plugins, and - * the core code, get a chance. - * * @param string $cls Name of the class to be loaded * * @return boolean hook value; true means continue processing, false means stop. @@ -99,9 +88,9 @@ class SitemapPlugin extends Plugin $m->connect('/sitemaps/notice/:year/:month/:day/:index.xml', array('action' => 'noticesitemap'), array('year' => '[0-9]{4}', - 'month' => '[1]?[0-9]', - 'day' => '[123]?[0-9]', - 'index' => '[0-9]+')); + 'month' => '[01][0-9]', + 'day' => '[0123][0-9]', + 'index' => '[1-9][0-9]*')); $m->connect('/sitemaps/user/:index.xml', array('action' => 'usersitemap'), diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php index 7eec886363..0024084863 100644 --- a/plugins/Sitemap/noticesitemap.php +++ b/plugins/Sitemap/noticesitemap.php @@ -68,9 +68,12 @@ class NoticesitemapAction extends SitemapAction $this->notice = new Notice(); - $this->notice->whereAdd("created > '$y-$m-$d 00:00:00'"); - $this->notice->whereAdd("created <= '$y-$m-$d 11:59:59'"); - $this->notice->whereAdd('is_local = 1'); + $dt = sprintf('%04d-%02d-%02d', $y, $m, $d); + + $this->notice->whereAdd("created > '$dt 00:00:00'"); + $this->notice->whereAdd("created <= '$dt 23:59:59'"); + + $this->notice->whereAdd('is_local != 0'); $this->notice->orderBy('id'); $this->notice->limit($offset, $limit); From d65a65756b88347b208b10f6abd2573d7703b6d5 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 10 Apr 2010 10:03:37 -0400 Subject: [PATCH 24/46] correct element name and namespace for sitemapactions --- plugins/Sitemap/sitemapaction.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/Sitemap/sitemapaction.php b/plugins/Sitemap/sitemapaction.php index ab80b85eaa..bab04ed9d2 100644 --- a/plugins/Sitemap/sitemapaction.php +++ b/plugins/Sitemap/sitemapaction.php @@ -56,13 +56,13 @@ class SitemapAction extends Action header('Content-Type: text/xml; charset=UTF-8'); $this->startXML(); - $this->elementStart('sitemap'); + $this->elementStart('urlset', array('xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9')); while (list($url, $lm, $cf, $p) = $this->nextUrl()) { $this->showUrl($url, $lm, $cf, $p); } - $this->elementEnd('sitemap'); + $this->elementEnd('urlset'); $this->endXML(); } From 9e592baa39dfab0c1e09c4356fb8d434379797c3 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 10 Apr 2010 10:03:57 -0400 Subject: [PATCH 25/46] move USERS_PER_MAP to plugin --- plugins/Sitemap/SitemapPlugin.php | 3 +++ plugins/Sitemap/usersitemap.php | 6 ++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index bb404cd25e..40263aaeef 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -47,6 +47,8 @@ if (!defined('STATUSNET')) { class SitemapPlugin extends Plugin { + const USERS_PER_MAP = 25000; + /** * Load related modules when needed * @@ -85,6 +87,7 @@ class SitemapPlugin extends Plugin { $m->connect('sitemapindex.xml', array('action' => 'sitemapindex')); + $m->connect('/sitemaps/notice/:year/:month/:day/:index.xml', array('action' => 'noticesitemap'), array('year' => '[0-9]{4}', diff --git a/plugins/Sitemap/usersitemap.php b/plugins/Sitemap/usersitemap.php index 582a13b664..b7cc939a9f 100644 --- a/plugins/Sitemap/usersitemap.php +++ b/plugins/Sitemap/usersitemap.php @@ -43,8 +43,6 @@ if (!defined('STATUSNET')) { class UsersitemapAction extends SitemapAction { - const USERS_PER_MAP = 25000; - var $user = null; function prepare($args) @@ -55,8 +53,8 @@ class UsersitemapAction extends SitemapAction $i += 0; - $offset = ($i-1) * self::USERS_PER_MAP; - $limit = self::USERS_PER_MAP; + $offset = ($i-1) * SitemapPlugin::USERS_PER_MAP; + $limit = SitemapPlugin::USERS_PER_MAP; $this->user = new User(); From 63c4eef64322da6a360c9ef3d7e1a20de9ca9cdd Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 10 Apr 2010 10:21:19 -0400 Subject: [PATCH 26/46] change URLs for user sitemap --- plugins/Sitemap/SitemapPlugin.php | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 40263aaeef..5b2af48795 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -95,11 +95,12 @@ class SitemapPlugin extends Plugin 'day' => '[0123][0-9]', 'index' => '[1-9][0-9]*')); - $m->connect('/sitemaps/user/:index.xml', + $m->connect('/sitemaps/user/:year/:month/:day/:index.xml', array('action' => 'usersitemap'), - array('index' => '[0-9]+', - 'month' => '[1]?[0-9]', - 'day' => '[123]?[0-9]')); + array('year' => '[0-9]{4}', + 'month' => '[01][0-9]', + 'day' => '[0123][0-9]', + 'index' => '[1-9][0-9]*')); return true; } } From 35272f638c0f162f43c951e1ffcef55c8f54787e Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 10 Apr 2010 10:24:58 -0400 Subject: [PATCH 27/46] Start of an action for sitemap index --- plugins/Sitemap/sitemapindex.php | 75 ++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 plugins/Sitemap/sitemapindex.php diff --git a/plugins/Sitemap/sitemapindex.php b/plugins/Sitemap/sitemapindex.php new file mode 100644 index 0000000000..09aebe0d8f --- /dev/null +++ b/plugins/Sitemap/sitemapindex.php @@ -0,0 +1,75 @@ +. + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * Show the sitemap index + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class SitemapAction extends Action +{ + /** + * handle the action + * + * @param array $args unused. + * + * @return void + */ + + function handle($args) + { + header('Content-Type: text/xml; charset=UTF-8'); + $this->startXML(); + + $this->elementStart('sitemapindex', array('xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9')); + + $this->showUserSitemaps(); + $this->showNoticeSitemaps(); + + $this->elementEnd('sitemapindex'); + + $this->endXML(); + } + + function showUserSitemaps() + { + $user = new User(); + $cnt = $user->count(); + + } +} From 9fdafaf07e0ef90e378c6a3e81dbcf60e31c2cac Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 10:11:18 -0400 Subject: [PATCH 28/46] better calculation for end date in notice sitemaps --- plugins/Sitemap/noticesitemap.php | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php index 0024084863..12a22dbb22 100644 --- a/plugins/Sitemap/noticesitemap.php +++ b/plugins/Sitemap/noticesitemap.php @@ -68,14 +68,21 @@ class NoticesitemapAction extends SitemapAction $this->notice = new Notice(); - $dt = sprintf('%04d-%02d-%02d', $y, $m, $d); + $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); - $this->notice->whereAdd("created > '$dt 00:00:00'"); - $this->notice->whereAdd("created <= '$dt 23:59:59'"); + // XXX: estimates 1d == 24h, which screws up days + // with leap seconds (1d == 24h + 1s). Thankfully they're + // few and far between. + + $enddt = common_sql_date(strtotime($begindt) + (24 * 60 * 60)); + + $this->notice->whereAdd("created >= '$begindt'"); + $this->notice->whereAdd("created < '$enddt'"); $this->notice->whereAdd('is_local != 0'); - $this->notice->orderBy('id'); + $this->notice->orderBy('created'); + $this->notice->limit($offset, $limit); $this->notice->find(); From 144cdb559dc345016d087409c57554dd1fb03911 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 10:11:45 -0400 Subject: [PATCH 29/46] bundle users by reg date --- plugins/Sitemap/usersitemap.php | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/plugins/Sitemap/usersitemap.php b/plugins/Sitemap/usersitemap.php index b7cc939a9f..42cadaca7d 100644 --- a/plugins/Sitemap/usersitemap.php +++ b/plugins/Sitemap/usersitemap.php @@ -49,8 +49,16 @@ class UsersitemapAction extends SitemapAction { parent::prepare($args); + $y = $this->trimmed('year'); + + $m = $this->trimmed('month'); + $d = $this->trimmed('day'); + $i = $this->trimmed('index'); + $y += 0; + $m += 0; + $d += 0; $i += 0; $offset = ($i-1) * SitemapPlugin::USERS_PER_MAP; @@ -58,7 +66,19 @@ class UsersitemapAction extends SitemapAction $this->user = new User(); - $this->user->orderBy('id'); + $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); + + // XXX: estimates 1d == 24h, which screws up days + // with leap seconds (1d == 24h + 1s). Thankfully they're + // few and far between. + + $enddt = common_sql_date(strtotime($begindt) + (24 * 60 * 60)); + + $this->user->whereAdd("created >= '$begindt'"); + $this->user->whereAdd("created < '$enddt'"); + + $this->user->orderBy('created'); + $this->user->limit($offset, $limit); $this->user->find(); From b73c8ff441008a4fa09eef66871f2cfebc0569b5 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 10:23:32 -0400 Subject: [PATCH 30/46] Move NOTICES_PER_MAP to SitemapPlugin --- plugins/Sitemap/SitemapPlugin.php | 3 ++- plugins/Sitemap/noticesitemap.php | 6 ++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 5b2af48795..fa9c9a76d0 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -47,7 +47,8 @@ if (!defined('STATUSNET')) { class SitemapPlugin extends Plugin { - const USERS_PER_MAP = 25000; + const USERS_PER_MAP = 25000; + const NOTICES_PER_MAP = 25000; /** * Load related modules when needed diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php index 12a22dbb22..c8db24efee 100644 --- a/plugins/Sitemap/noticesitemap.php +++ b/plugins/Sitemap/noticesitemap.php @@ -43,8 +43,6 @@ if (!defined('STATUSNET')) { class NoticesitemapAction extends SitemapAction { - const NOTICES_PER_MAP = 25000; - var $notice = null; function prepare($args) @@ -63,8 +61,8 @@ class NoticesitemapAction extends SitemapAction $d += 0; $i += 0; - $offset = ($i-1) * self::NOTICES_PER_MAP; - $limit = self::NOTICES_PER_MAP; + $offset = ($i-1) * SitemapPlugin::NOTICES_PER_MAP; + $limit = SitemapPlugin::NOTICES_PER_MAP; $this->notice = new Notice(); From 4b321f96fc9b45ae3000088b8cfd856f9ffe1529 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 11:04:56 -0400 Subject: [PATCH 31/46] show sitemapindex with user and notice sitemaps --- plugins/Sitemap/sitemapindex.php | 95 +++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 3 deletions(-) diff --git a/plugins/Sitemap/sitemapindex.php b/plugins/Sitemap/sitemapindex.php index 09aebe0d8f..7942bc3bd0 100644 --- a/plugins/Sitemap/sitemapindex.php +++ b/plugins/Sitemap/sitemapindex.php @@ -41,7 +41,7 @@ if (!defined('STATUSNET')) { * @link http://status.net/ */ -class SitemapAction extends Action +class SitemapindexAction extends Action { /** * handle the action @@ -68,8 +68,97 @@ class SitemapAction extends Action function showUserSitemaps() { - $user = new User(); - $cnt = $user->count(); + $userCounts = $this->getUserCounts(); + foreach ($userCounts as $dt => $cnt) { + $cnt = $cnt+0; + assert($cnt != 0); + $n = (int)$cnt / (int)SitemapPlugin::USERS_PER_MAP; + if (($cnt % SitemapPlugin::USERS_PER_MAP) != 0) { + $n++; + } + for ($i = 1; $i <= $n; $i++) { + $this->showSitemap('user', $dt, $i); + } + } + } + + function showNoticeSitemaps() + { + $noticeCounts = $this->getNoticeCounts(); + + foreach ($noticeCounts as $dt => $cnt) { + assert($cnt != 0); + $n = $cnt / SitemapPlugin::NOTICES_PER_MAP; + if ($cnt % SitemapPlugin::NOTICES_PER_MAP) { + $n++; + } + for ($i = 1; $i <= $n; $i++) { + $this->showSitemap('notice', $dt, $i); + } + } + } + + function getUserCounts() + { + // XXX: cachemeplease + + $user = new User(); + + $user->selectAdd(); + $user->selectAdd('date(created) as regdate, count(*) as regcount'); + $user->groupBy('regdate'); + + $user->find(); + + $userCounts = array(); + + while ($user->fetch()) { + $userCounts[$user->regdate] = $user->regcount; + } + + return $userCounts; + } + + function getNoticeCounts() + { + // XXX: cachemeplease + + $notice = new Notice(); + + $notice->selectAdd(); + $notice->selectAdd('date(created) as postdate, count(*) as postcount'); + $notice->groupBy('postdate'); + + $notice->find(); + + $noticeCounts = array(); + + while ($notice->fetch()) { + $noticeCounts[$notice->postdate] = $notice->postcount; + } + + return $noticeCounts; + } + + function showSitemap($prefix, $dt, $i) + { + list($y, $m, $d) = explode('-', $dt); + + $this->elementStart('sitemap'); + $this->element('loc', null, common_local_url($prefix.'sitemap', + array('year' => $y, + 'month' => $m, + 'day' => $d, + 'index' => $i))); + + $begdate = strtotime("$y-$m-$d 00:00:00"); + $enddate = $begdate + (24 * 60 * 60); + + if ($enddate < time()) { + $this->element('lastmod', null, date(DATE_W3C, $enddate)); + } + + $this->elementEnd('sitemap'); } } From 8b9ce731f4707f6939497d139521acee56596dea Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 11:05:19 -0400 Subject: [PATCH 32/46] max users, notices per sitemap = 50K --- plugins/Sitemap/SitemapPlugin.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index fa9c9a76d0..29c32a6242 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -47,8 +47,8 @@ if (!defined('STATUSNET')) { class SitemapPlugin extends Plugin { - const USERS_PER_MAP = 25000; - const NOTICES_PER_MAP = 25000; + const USERS_PER_MAP = 50000; + const NOTICES_PER_MAP = 50000; /** * Load related modules when needed From 610d8021d8653822dbc52f0790eee41e4db38c17 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 11:28:41 -0400 Subject: [PATCH 33/46] use an array for notice sitemap --- plugins/Sitemap/noticesitemap.php | 58 +++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php index c8db24efee..6cf2b3d01f 100644 --- a/plugins/Sitemap/noticesitemap.php +++ b/plugins/Sitemap/noticesitemap.php @@ -43,7 +43,8 @@ if (!defined('STATUSNET')) { class NoticesitemapAction extends SitemapAction { - var $notice = null; + var $notices = null; + var $j = 0; function prepare($args) { @@ -61,10 +62,32 @@ class NoticesitemapAction extends SitemapAction $d += 0; $i += 0; + $this->notices = $this->getNotices($y, $m, $d, $i); + $this->j = 0; + + return true; + } + + function nextUrl() + { + if ($this->j < count($this->notices)) { + $n = $this->notices[$this->j]; + $this->j++; + return array(common_local_url('shownotice', array('notice' => $n[0])), + common_date_w3dtf($n[1]), + null, + null); + } else { + return null; + } + } + + function getNotices($y, $m, $d, $i) + { $offset = ($i-1) * SitemapPlugin::NOTICES_PER_MAP; $limit = SitemapPlugin::NOTICES_PER_MAP; - $this->notice = new Notice(); + $notice = new Notice(); $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); @@ -74,29 +97,26 @@ class NoticesitemapAction extends SitemapAction $enddt = common_sql_date(strtotime($begindt) + (24 * 60 * 60)); - $this->notice->whereAdd("created >= '$begindt'"); - $this->notice->whereAdd("created < '$enddt'"); + $notice->selectAdd(); + $notice->selectAdd('id, created'); - $this->notice->whereAdd('is_local != 0'); + $notice->whereAdd("created >= '$begindt'"); + $notice->whereAdd("created < '$enddt'"); - $this->notice->orderBy('created'); + $notice->whereAdd('is_local != 0'); - $this->notice->limit($offset, $limit); + $notice->orderBy('created'); - $this->notice->find(); + $notice->limit($offset, $limit); - return true; - } + $notice->find(); - function nextUrl() - { - if ($this->notice->fetch()) { - return array(common_local_url('shownotice', array('notice' => $this->notice->id)), - common_date_w3dtf($this->notice->created), - null, - null); - } else { - return null; + $n = array(); + + while ($notice->fetch()) { + $n[] = array($notice->id, $notice->created); } + + return $n; } } From 946cd15e8bc12bece4a14a07c6e109c3d8904a66 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 11:52:19 -0400 Subject: [PATCH 34/46] cache results of notice sitemap query --- plugins/Sitemap/noticesitemap.php | 53 ++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php index 6cf2b3d01f..bc8a7bfd65 100644 --- a/plugins/Sitemap/noticesitemap.php +++ b/plugins/Sitemap/noticesitemap.php @@ -84,37 +84,52 @@ class NoticesitemapAction extends SitemapAction function getNotices($y, $m, $d, $i) { - $offset = ($i-1) * SitemapPlugin::NOTICES_PER_MAP; - $limit = SitemapPlugin::NOTICES_PER_MAP; + $n = Notice::cacheGet("sitemap:notice:$y:$m:$d:$i"); - $notice = new Notice(); + if ($n === false) { - $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); + $notice = new Notice(); - // XXX: estimates 1d == 24h, which screws up days - // with leap seconds (1d == 24h + 1s). Thankfully they're - // few and far between. + $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); - $enddt = common_sql_date(strtotime($begindt) + (24 * 60 * 60)); + // XXX: estimates 1d == 24h, which screws up days + // with leap seconds (1d == 24h + 1s). Thankfully they're + // few and far between. - $notice->selectAdd(); - $notice->selectAdd('id, created'); + $theend = strtotime($begindt) + (24 * 60 * 60); + $enddt = common_sql_date($theend); - $notice->whereAdd("created >= '$begindt'"); - $notice->whereAdd("created < '$enddt'"); + $notice->selectAdd(); + $notice->selectAdd('id, created'); - $notice->whereAdd('is_local != 0'); + $notice->whereAdd("created >= '$begindt'"); + $notice->whereAdd("created < '$enddt'"); - $notice->orderBy('created'); + $notice->whereAdd('is_local != 0'); - $notice->limit($offset, $limit); + $notice->orderBy('created'); - $notice->find(); + $offset = ($i-1) * SitemapPlugin::NOTICES_PER_MAP; + $limit = SitemapPlugin::NOTICES_PER_MAP; - $n = array(); + $notice->limit($offset, $limit); - while ($notice->fetch()) { - $n[] = array($notice->id, $notice->created); + $notice->find(); + + $n = array(); + + while ($notice->fetch()) { + $n[] = array($notice->id, $notice->created); + } + + $c = Cache::instance(); + + if (!empty($c)) { + $c->set(Cache::key("sitemap:notice:$y:$m:$d:$i"), + $n, + Cache::COMPRESSED, + ((time() > $theend) ? (time() + 90 * 24 * 60 * 60) : (time() + 5 * 60))); + } } return $n; From 3e8172585d7146cacb44ee4543ea619f6a196561 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 12:00:15 -0400 Subject: [PATCH 35/46] cache user data for user sitemap --- plugins/Sitemap/usersitemap.php | 81 +++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 25 deletions(-) diff --git a/plugins/Sitemap/usersitemap.php b/plugins/Sitemap/usersitemap.php index 42cadaca7d..3e5ac46525 100644 --- a/plugins/Sitemap/usersitemap.php +++ b/plugins/Sitemap/usersitemap.php @@ -43,7 +43,8 @@ if (!defined('STATUSNET')) { class UsersitemapAction extends SitemapAction { - var $user = null; + var $users = null; + var $j = 0; function prepare($args) { @@ -61,37 +62,67 @@ class UsersitemapAction extends SitemapAction $d += 0; $i += 0; - $offset = ($i-1) * SitemapPlugin::USERS_PER_MAP; - $limit = SitemapPlugin::USERS_PER_MAP; - - $this->user = new User(); - - $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); - - // XXX: estimates 1d == 24h, which screws up days - // with leap seconds (1d == 24h + 1s). Thankfully they're - // few and far between. - - $enddt = common_sql_date(strtotime($begindt) + (24 * 60 * 60)); - - $this->user->whereAdd("created >= '$begindt'"); - $this->user->whereAdd("created < '$enddt'"); - - $this->user->orderBy('created'); - - $this->user->limit($offset, $limit); - - $this->user->find(); - + $this->users = $this->getUsers($y, $m, $d, $i); + $this->j = 0; return true; } function nextUrl() { - if ($this->user->fetch()) { - return array(common_profile_url($this->user->nickname), null, null, null); + if ($this->j < count($this->users)) { + $nickname = $this->users[$this->j]; + $this->j++; + return array(common_profile_url($nickname), null, null, null); } else { return null; } } + + function getUsers($y, $m, $d, $i) + { + $u = User::cacheGet("sitemap:user:$y:$m:$d:$i"); + + if ($u === false) { + + $user = new User(); + + $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); + + // XXX: estimates 1d == 24h, which screws up days + // with leap seconds (1d == 24h + 1s). Thankfully they're + // few and far between. + + $theend = strtotime($begindt) + (24 * 60 * 60); + $enddt = common_sql_date($theend); + + $user->selectAdd(); + $user->selectAdd('nickname'); + $user->whereAdd("created >= '$begindt'"); + $user->whereAdd("created < '$enddt'"); + + $user->orderBy('created'); + + $offset = ($i-1) * SitemapPlugin::USERS_PER_MAP; + $limit = SitemapPlugin::USERS_PER_MAP; + + $user->limit($offset, $limit); + + $user->find(); + + while ($user->fetch()) { + $u[] = $user->nickname; + } + + $c = Cache::instance(); + + if (!empty($c)) { + $c->set(Cache::key("sitemap:user:$y:$m:$d:$i"), + $u, + Cache::COMPRESSED, + ((time() > $theend) ? (time() + 90 * 24 * 60 * 60) : (time() + 5 * 60))); + } + } + + return $u; + } } From 5ff9c0242b2ad1df22af5630a7ebfdcce8177212 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 12:06:08 -0400 Subject: [PATCH 36/46] make sure notice and user sitemap are 'in' top level directory --- plugins/Sitemap/SitemapPlugin.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 29c32a6242..ed876d94f8 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -89,14 +89,14 @@ class SitemapPlugin extends Plugin $m->connect('sitemapindex.xml', array('action' => 'sitemapindex')); - $m->connect('/sitemaps/notice/:year/:month/:day/:index.xml', + $m->connect('/notice-sitemap-:year-:month-:day-:index.xml', array('action' => 'noticesitemap'), array('year' => '[0-9]{4}', 'month' => '[01][0-9]', 'day' => '[0123][0-9]', 'index' => '[1-9][0-9]*')); - $m->connect('/sitemaps/user/:year/:month/:day/:index.xml', + $m->connect('/user-sitemap-:year-:month-:day-:index.xml', array('action' => 'usersitemap'), array('year' => '[0-9]{4}', 'month' => '[01][0-9]', From e363b724b96d0509e56edabcb7fb199698e158b7 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 12:13:48 -0400 Subject: [PATCH 37/46] cache notice and user counts in sitemap index --- plugins/Sitemap/sitemapindex.php | 46 +++++++++++++++++++------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/plugins/Sitemap/sitemapindex.php b/plugins/Sitemap/sitemapindex.php index 7942bc3bd0..2055dd7f06 100644 --- a/plugins/Sitemap/sitemapindex.php +++ b/plugins/Sitemap/sitemapindex.php @@ -101,20 +101,25 @@ class SitemapindexAction extends Action function getUserCounts() { - // XXX: cachemeplease + $userCounts = User::cacheGet('sitemap:user:counts'); - $user = new User(); + if ($userCounts === false) { - $user->selectAdd(); - $user->selectAdd('date(created) as regdate, count(*) as regcount'); - $user->groupBy('regdate'); + $user = new User(); - $user->find(); + $user->selectAdd(); + $user->selectAdd('date(created) as regdate, count(*) as regcount'); + $user->groupBy('regdate'); - $userCounts = array(); + $user->find(); - while ($user->fetch()) { - $userCounts[$user->regdate] = $user->regcount; + $userCounts = array(); + + while ($user->fetch()) { + $userCounts[$user->regdate] = $user->regcount; + } + + User::cacheSet('sitemap:user:counts', $userCounts); } return $userCounts; @@ -122,20 +127,25 @@ class SitemapindexAction extends Action function getNoticeCounts() { - // XXX: cachemeplease + $noticeCounts = Notice::cacheGet('sitemap:notice:counts'); - $notice = new Notice(); + if ($noticeCounts === false) { - $notice->selectAdd(); - $notice->selectAdd('date(created) as postdate, count(*) as postcount'); - $notice->groupBy('postdate'); + $notice = new Notice(); - $notice->find(); + $notice->selectAdd(); + $notice->selectAdd('date(created) as postdate, count(*) as postcount'); + $notice->groupBy('postdate'); - $noticeCounts = array(); + $notice->find(); - while ($notice->fetch()) { - $noticeCounts[$notice->postdate] = $notice->postcount; + $noticeCounts = array(); + + while ($notice->fetch()) { + $noticeCounts[$notice->postdate] = $notice->postcount; + } + + Notice::cacheSet('sitemap:notice:counts', $noticeCounts); } return $noticeCounts; From 1c858e17eaf0e9509b7e165ea1b0d45f7e923361 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 14:32:01 -0400 Subject: [PATCH 38/46] add sitemap statement to robots.txt --- plugins/Sitemap/SitemapPlugin.php | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index ed876d94f8..6fc7021049 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -76,6 +76,23 @@ class SitemapPlugin extends Plugin } } + /** + * Add sitemap-related information at the end of robots.txt + * + * @param Action $action Action being run + * + * @return boolean hook value. + */ + + function onEndRobotsTxt($action) + { + $url = common_local_url('sitemapindex'); + + print "\nSitemap: $url\n"; + + return true; + } + /** * Map URLs to actions * From 45e6e537cacc23aedabb1c0b0518766de1041768 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 12 Apr 2010 14:34:22 -0400 Subject: [PATCH 39/46] note that sitemap actions are readonly --- plugins/Sitemap/sitemapaction.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/plugins/Sitemap/sitemapaction.php b/plugins/Sitemap/sitemapaction.php index bab04ed9d2..45edfccc51 100644 --- a/plugins/Sitemap/sitemapaction.php +++ b/plugins/Sitemap/sitemapaction.php @@ -87,4 +87,9 @@ class SitemapAction extends Action { return null; } + + function isReadOnly() + { + return true; + } } From 40618b49e1d7bde1b348f3675b12e3f582884515 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 3 May 2010 11:23:01 -0400 Subject: [PATCH 40/46] mark notice pages as being archived --- plugins/Sitemap/noticesitemap.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php index bc8a7bfd65..9f323f72aa 100644 --- a/plugins/Sitemap/noticesitemap.php +++ b/plugins/Sitemap/noticesitemap.php @@ -75,7 +75,7 @@ class NoticesitemapAction extends SitemapAction $this->j++; return array(common_local_url('shownotice', array('notice' => $n[0])), common_date_w3dtf($n[1]), - null, + 'never', null); } else { return null; From 3e349a71cad00f2f99ecccb73dce3805b4fcb41c Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 3 May 2010 11:23:18 -0400 Subject: [PATCH 41/46] mark user pages as being high priority --- plugins/Sitemap/usersitemap.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/Sitemap/usersitemap.php b/plugins/Sitemap/usersitemap.php index 3e5ac46525..de12007157 100644 --- a/plugins/Sitemap/usersitemap.php +++ b/plugins/Sitemap/usersitemap.php @@ -72,7 +72,7 @@ class UsersitemapAction extends SitemapAction if ($this->j < count($this->users)) { $nickname = $this->users[$this->j]; $this->j++; - return array(common_profile_url($nickname), null, null, null); + return array(common_profile_url($nickname), null, null, '1.0'); } else { return null; } From 7d85b79814e7e91a88f23d7c7e752a0bcfc83ff4 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 3 May 2010 14:00:12 -0400 Subject: [PATCH 42/46] Database tables to cache expensive query data We need to bundle counts of notices and users by date. This can be expensive for large sites. So, new tables are added to cache the results of these queries, which don't change after the date is over. --- plugins/Sitemap/SitemapPlugin.php | 36 +++++++ plugins/Sitemap/Sitemap_notice_count.php | 125 +++++++++++++++++++++++ plugins/Sitemap/Sitemap_user_count.php | 121 ++++++++++++++++++++++ 3 files changed, 282 insertions(+) create mode 100644 plugins/Sitemap/Sitemap_notice_count.php create mode 100644 plugins/Sitemap/Sitemap_user_count.php diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 6fc7021049..831694efc6 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -121,4 +121,40 @@ class SitemapPlugin extends Plugin 'index' => '[1-9][0-9]*')); return true; } + + /** + * Database schema setup + * + * We cache some data persistently to avoid overlong queries. + * + * @see Sitemap_user_count + * @see Sitemap_notice_count + * + * @return boolean hook value; true means continue processing, false means stop. + */ + + function onCheckSchema() + { + $schema = Schema::get(); + + // For storing user-submitted flags on profiles + + $schema->ensureTable('sitemap_user_count', + array(new ColumnDef('registration_date', 'date', null, + true, 'PRI'), + new ColumnDef('user_count', 'integer'), + new ColumnDef('created', 'datetime', + null, false), + new ColumnDef('modified', 'timestamp'))); + + $schema->ensureTable('sitemap_notice_count', + array(new ColumnDef('notice_date', 'date', null, + true, 'PRI'), + new ColumnDef('notice_count', 'integer'), + new ColumnDef('created', 'datetime', + null, false), + new ColumnDef('modified', 'timestamp'))); + + return true; + } } diff --git a/plugins/Sitemap/Sitemap_notice_count.php b/plugins/Sitemap/Sitemap_notice_count.php new file mode 100644 index 0000000000..72bb2b9d41 --- /dev/null +++ b/plugins/Sitemap/Sitemap_notice_count.php @@ -0,0 +1,125 @@ + + * @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3 + * @link http://status.net/ + * + * StatusNet - the distributed open-source microblogging tool + * Copyright (C) 2010, StatusNet, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +require_once INSTALLDIR . '/classes/Memcached_DataObject.php'; + +/** + * Data class for counting notices by date + * + * We make a separate sitemap for each notice posted by date. + * To save ourselves some (not inconsiderable) processing effort, + * we cache this data in the sitemap_notice_count table. Each + * row represents a day since the site has been started, with a count + * of notices posted on that day. Since, after the end of the day, + * this number doesn't change, it's a good candidate for persistent caching. + * + * @category Data + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3 + * @link http://status.net/ + * + * @see DB_DataObject + */ + +class Sitemap_notice_count extends Memcached_DataObject +{ + public $__table = 'sitemap_notice_count'; // table name + + public $notice_date; // date primary_key not_null + public $notice_count; // int(4) + public $created; + public $modified; + + /** + * Get an instance by key + * + * This is a utility method to get a single instance with a given key value. + * + * @param string $k Key to use to lookup (usually 'notice_id' for this class) + * @param mixed $v Value to lookup + * + * @return Sitemap_notice_count object found, or null for no hits + * + */ + + function staticGet($k, $v=null) + { + return Memcached_DataObject::staticGet('Sitemap_notice_count', $k, $v); + } + + /** + * return table definition for DB_DataObject + * + * DB_DataObject needs to know something about the table to manipulate + * instances. This method provides all the DB_DataObject needs to know. + * + * @return array array of column definitions + */ + + function table() + { + return array('notice_date' => DB_DATAOBJECT_DATE + DB_DATAOBJECT_NOTNULL, + 'notice_count' => DB_DATAOBJECT_INT, + 'created' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL, + 'modified' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL); + } + + /** + * return key definitions for DB_DataObject + * + * DB_DataObject needs to know about keys that the table has; this function + * defines them. + * + * @return array key definitions + */ + + function keys() + { + return array('notice_date' => 'K'); + } + + /** + * return key definitions for Memcached_DataObject + * + * Our caching system uses the same key definitions, but uses a different + * method to get them. + * + * @return array key definitions + */ + + function keyTypes() + { + return $this->keys(); + } +} diff --git a/plugins/Sitemap/Sitemap_user_count.php b/plugins/Sitemap/Sitemap_user_count.php new file mode 100644 index 0000000000..1a7a6577d5 --- /dev/null +++ b/plugins/Sitemap/Sitemap_user_count.php @@ -0,0 +1,121 @@ + + * @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3 + * @link http://status.net/ + * + * StatusNet - the distributed open-source microblogging tool + * Copyright (C) 2010, StatusNet, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +require_once INSTALLDIR . '/classes/Memcached_DataObject.php'; + +/** + * Data class for counting users by date + * + * We make a separate sitemap for each user registered by date. + * To save ourselves some processing effort, we cache this data + * + * @category Action + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3 + * @link http://status.net/ + * + * @see DB_DataObject + */ + +class Sitemap_user_count extends Memcached_DataObject +{ + public $__table = 'sitemap_user_count'; // table name + + public $registration_date; // date primary_key not_null + public $user_count; // int(4) + public $created; + public $modified; + + /** + * Get an instance by key + * + * This is a utility method to get a single instance with a given key value. + * + * @param string $k Key to use to lookup (usually 'user_id' for this class) + * @param mixed $v Value to lookup + * + * @return Sitemap_user_count object found, or null for no hits + * + */ + + function staticGet($k, $v=null) + { + return Memcached_DataObject::staticGet('Sitemap_user_count', $k, $v); + } + + /** + * return table definition for DB_DataObject + * + * DB_DataObject needs to know something about the table to manipulate + * instances. This method provides all the DB_DataObject needs to know. + * + * @return array array of column definitions + */ + + function table() + { + return array('registration_date' => DB_DATAOBJECT_DATE + DB_DATAOBJECT_NOTNULL, + 'user_count' => DB_DATAOBJECT_INT, + 'created' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL, + 'modified' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL); + } + + /** + * return key definitions for DB_DataObject + * + * DB_DataObject needs to know about keys that the table has; this function + * defines them. + * + * @return array key definitions + */ + + function keys() + { + return array('registration_date' => 'K'); + } + + /** + * return key definitions for Memcached_DataObject + * + * Our caching system uses the same key definitions, but uses a different + * method to get them. + * + * @return array key definitions + */ + + function keyTypes() + { + return $this->keys(); + } +} From 416161c94366292a623aecf8fe79b0d73c337e98 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Fri, 21 May 2010 16:47:30 -0400 Subject: [PATCH 43/46] make user counts use the database table --- plugins/Sitemap/SitemapPlugin.php | 4 + plugins/Sitemap/Sitemap_user_count.php | 166 +++++++++++++++++++++++++ plugins/Sitemap/sitemapindex.php | 35 ++---- 3 files changed, 180 insertions(+), 25 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 831694efc6..82c007d66f 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -64,6 +64,9 @@ class SitemapPlugin extends Plugin switch ($cls) { + case 'Sitemap_user_count': + require_once $dir . '/' . $cls . '.php'; + return false; case 'SitemapindexAction': case 'NoticesitemapAction': case 'UsersitemapAction': @@ -71,6 +74,7 @@ class SitemapPlugin extends Plugin return false; case 'SitemapAction': require_once $dir . '/' . strtolower($cls) . '.php'; + return false; default: return true; } diff --git a/plugins/Sitemap/Sitemap_user_count.php b/plugins/Sitemap/Sitemap_user_count.php index 1a7a6577d5..7743b05326 100644 --- a/plugins/Sitemap/Sitemap_user_count.php +++ b/plugins/Sitemap/Sitemap_user_count.php @@ -105,6 +105,11 @@ class Sitemap_user_count extends Memcached_DataObject return array('registration_date' => 'K'); } + function sequenceKey() + { + return array(false, false, false); + } + /** * return key definitions for Memcached_DataObject * @@ -118,4 +123,165 @@ class Sitemap_user_count extends Memcached_DataObject { return $this->keys(); } + + static function getAll() + { + $userCounts = self::cacheGet('sitemap:user:counts'); + + if ($userCounts === false) { + + $suc = new Sitemap_user_count(); + $suc->orderBy('registration_date DESC'); + + // Fetch the first one to check up-to-date-itude + + $n = $suc->find(true); + + $today = self::today(); + $userCounts = array(); + + if (!$n) { // No counts saved yet + $userCounts = self::initializeCounts(); + } else if ($suc->registration_date < $today) { // There are counts but not up to today + $userCounts = self::fillInCounts($suc->registration_date); + } else if ($suc->registration_date == $today) { // Refresh today's + $userCounts[$today] = self::updateToday(); + } + + // starts with second-to-last date + + while ($suc->fetch()) { + $userCounts[$suc->registration_date] = $suc->user_count; + } + + self::cacheSet('sitemap:user:counts', $userCounts); + } + + return $userCounts; + } + + static function initializeCounts() + { + $firstDate = self::getFirstDate(); // awww + $today = self::today(); + + $counts = array(); + + for ($d = $firstDate; $d <= $today; $d = self::incrementDay($d)) { + common_debug("Date = '$d'"); + $n = self::getCount($d); + self::insertCount($d, $n); + $counts[$d] = $n; + } + + return $counts; + } + + static function fillInCounts($lastDate) + { + $today = self::today(); + + $counts = array(); + + $n = self::getCount($lastDate); + self::updateCount($lastDate, $n); + + $counts[$lastDate] = $n; + + for ($d = self::incrementDay($lastDate); $d <= $today; $d = self::incrementDay($d)) { + $n = self::getCount($d); + self::insertCount($d, $n); + } + + return $counts; + } + + static function updateToday() + { + $today = self::today(); + + $n = self::getCount($today); + self::updateCount($today, $n); + + return $n; + } + + static function getCount($d) + { + $user = new User(); + $user->whereAdd('created BETWEEN "'.$d.' 00:00:00" AND "'.self::incrementDay($d).' 00:00:00"'); + $n = $user->count(); + + return $n; + } + + static function insertCount($d, $n) + { + common_debug("Inserting count '$n' for '$d'"); + + $suc = new Sitemap_user_count(); + + $suc->registration_date = DB_DataObject_Cast::date($d); + $suc->user_count = $n; + $suc->created = common_sql_now(); + $suc->modified = $suc->created; + + if (!$suc->insert()) { + common_log(LOG_WARNING, "Could not save user counts for '$d'"); + } + } + + static function updateCount($d, $n) + { + $suc = Sitemap_user_count::staticGet('registration_date', DB_DataObject_Cast::date($d)); + + if (empty($suc)) { + throw new Exception("No such registration date: $d"); + } + + $orig = clone($suc); + + $suc->registration_date = DB_DataObject_Cast::date($d); + $suc->user_count = $n; + $suc->created = common_sql_now(); + $suc->modified = $suc->created; + + if (!$suc->update($orig)) { + common_log(LOG_WARNING, "Could not save user counts for '$d'"); + } + } + + static function incrementDay($d) + { + $dt = self::dateStrToInt($d); + return self::dateIntToStr($dt + 24 * 60 * 60); + } + + static function dateStrToInt($d) + { + return strtotime($d.' 00:00:00'); + } + + static function dateIntToStr($dt) + { + return date('Y-m-d', $dt); + } + + static function getFirstDate() + { + $u = new User(); + $u->selectAdd(); + $u->selectAdd('date(min(created)) as first_date'); + if ($u->find(true)) { + return $u->first_date; + } else { + // Is this right? + return self::dateIntToStr(time()); + } + } + + static function today() + { + return self::dateIntToStr(time()); + } } diff --git a/plugins/Sitemap/sitemapindex.php b/plugins/Sitemap/sitemapindex.php index 2055dd7f06..a3328340fe 100644 --- a/plugins/Sitemap/sitemapindex.php +++ b/plugins/Sitemap/sitemapindex.php @@ -68,11 +68,15 @@ class SitemapindexAction extends Action function showUserSitemaps() { - $userCounts = $this->getUserCounts(); + $userCounts = Sitemap_user_count::getAll(); foreach ($userCounts as $dt => $cnt) { $cnt = $cnt+0; - assert($cnt != 0); + + if ($cnt == 0) { + continue; + } + $n = (int)$cnt / (int)SitemapPlugin::USERS_PER_MAP; if (($cnt % SitemapPlugin::USERS_PER_MAP) != 0) { $n++; @@ -88,7 +92,9 @@ class SitemapindexAction extends Action $noticeCounts = $this->getNoticeCounts(); foreach ($noticeCounts as $dt => $cnt) { - assert($cnt != 0); + if ($cnt == 0) { + continue; + } $n = $cnt / SitemapPlugin::NOTICES_PER_MAP; if ($cnt % SitemapPlugin::NOTICES_PER_MAP) { $n++; @@ -101,28 +107,7 @@ class SitemapindexAction extends Action function getUserCounts() { - $userCounts = User::cacheGet('sitemap:user:counts'); - - if ($userCounts === false) { - - $user = new User(); - - $user->selectAdd(); - $user->selectAdd('date(created) as regdate, count(*) as regcount'); - $user->groupBy('regdate'); - - $user->find(); - - $userCounts = array(); - - while ($user->fetch()) { - $userCounts[$user->regdate] = $user->regcount; - } - - User::cacheSet('sitemap:user:counts', $userCounts); - } - - return $userCounts; + return Sitemap_user_count::getAll(); } function getNoticeCounts() From 271d7dd8509537aee7e10fd6c8f493e62e6c75b2 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 31 May 2010 07:48:14 -0700 Subject: [PATCH 44/46] load Sitemap_notice_count --- plugins/Sitemap/SitemapPlugin.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php index 82c007d66f..7ef5f1aa9b 100644 --- a/plugins/Sitemap/SitemapPlugin.php +++ b/plugins/Sitemap/SitemapPlugin.php @@ -65,6 +65,7 @@ class SitemapPlugin extends Plugin switch ($cls) { case 'Sitemap_user_count': + case 'Sitemap_notice_count': require_once $dir . '/' . $cls . '.php'; return false; case 'SitemapindexAction': @@ -141,8 +142,6 @@ class SitemapPlugin extends Plugin { $schema = Schema::get(); - // For storing user-submitted flags on profiles - $schema->ensureTable('sitemap_user_count', array(new ColumnDef('registration_date', 'date', null, true, 'PRI'), From d78dfd627e558804c1ce79c69f586db29fa6c0fa Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 31 May 2010 07:50:10 -0700 Subject: [PATCH 45/46] cache notice counts in utility table --- plugins/Sitemap/Sitemap_notice_count.php | 162 +++++++++++++++++++++++ 1 file changed, 162 insertions(+) diff --git a/plugins/Sitemap/Sitemap_notice_count.php b/plugins/Sitemap/Sitemap_notice_count.php index 72bb2b9d41..673417b788 100644 --- a/plugins/Sitemap/Sitemap_notice_count.php +++ b/plugins/Sitemap/Sitemap_notice_count.php @@ -122,4 +122,166 @@ class Sitemap_notice_count extends Memcached_DataObject { return $this->keys(); } + + static function getAll() + { + $noticeCounts = self::cacheGet('sitemap:notice:counts'); + + if ($noticeCounts === false) { + + $snc = new Sitemap_notice_count(); + $snc->orderBy('notice_date DESC'); + + // Fetch the first one to check up-to-date-itude + + $n = $snc->find(true); + + $today = self::today(); + $noticeCounts = array(); + + if (!$n) { // No counts saved yet + $noticeCounts = self::initializeCounts(); + } else if ($snc->notice_date < $today) { // There are counts but not up to today + $noticeCounts = self::fillInCounts($snc->notice_date); + } else if ($snc->notice_date == $today) { // Refresh today's + $noticeCounts[$today] = self::updateToday(); + } + + // starts with second-to-last date + + while ($snc->fetch()) { + $noticeCounts[$snc->notice_date] = $snc->notice_count; + } + + self::cacheSet('sitemap:notice:counts', $noticeCounts); + } + + return $noticeCounts; + } + + static function initializeCounts() + { + $firstDate = self::getFirstDate(); // awww + $today = self::today(); + + $counts = array(); + + for ($d = $firstDate; $d <= $today; $d = self::incrementDay($d)) { + $n = self::getCount($d); + self::insertCount($d, $n); + $counts[$d] = $n; + } + + return $counts; + } + + static function fillInCounts($lastDate) + { + $today = self::today(); + + $counts = array(); + + $n = self::getCount($lastDate); + self::updateCount($lastDate, $n); + + $counts[$lastDate] = $n; + + for ($d = self::incrementDay($lastDate); $d <= $today; $d = self::incrementDay($d)) { + $n = self::getCount($d); + self::insertCount($d, $n); + } + + return $counts; + } + + static function updateToday() + { + $today = self::today(); + + $n = self::getCount($today); + self::updateCount($today, $n); + + return $n; + } + + static function getCount($d) + { + $notice = new Notice(); + $notice->whereAdd('created BETWEEN "'.$d.' 00:00:00" AND "'.self::incrementDay($d).' 00:00:00"'); + $n = $notice->count(); + + return $n; + } + + static function insertCount($d, $n) + { + $snc = new Sitemap_notice_count(); + + $snc->notice_date = DB_DataObject_Cast::date($d); + + $snc->notice_count = $n; + $snc->created = common_sql_now(); + $snc->modified = $snc->created; + + if (!$snc->insert()) { + common_log(LOG_WARNING, "Could not save user counts for '$d'"); + } + } + + static function updateCount($d, $n) + { + $snc = Sitemap_notice_count::staticGet('notice_date', DB_DataObject_Cast::date($d)); + + if (empty($snc)) { + throw new Exception("No such registration date: $d"); + } + + $orig = clone($snc); + + $snc->notice_date = DB_DataObject_Cast::date($d); + + $snc->notice_count = $n; + $snc->created = common_sql_now(); + $snc->modified = $snc->created; + + if (!$snc->update($orig)) { + common_log(LOG_WARNING, "Could not save user counts for '$d'"); + } + } + + static function incrementDay($d) + { + $dt = self::dateStrToInt($d); + return self::dateIntToStr($dt + 24 * 60 * 60); + } + + static function dateStrToInt($d) + { + return strtotime($d.' 00:00:00'); + } + + static function dateIntToStr($dt) + { + return date('Y-m-d', $dt); + } + + static function getFirstDate() + { + $n = new Notice(); + + $n->selectAdd(); + $n->selectAdd('date(min(created)) as first_date'); + + if ($n->find(true)) { + return $n->first_date; + } else { + // Is this right? + return self::dateIntToStr(time()); + } + } + + static function today() + { + return self::dateIntToStr(time()); + } } From 1066b264247811ec9371ff2f473f5d7f2a6dd98a Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 31 May 2010 07:50:27 -0700 Subject: [PATCH 46/46] use sitemap_notice_count in sitemap index --- plugins/Sitemap/sitemapindex.php | 37 ++++---------------------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/plugins/Sitemap/sitemapindex.php b/plugins/Sitemap/sitemapindex.php index a3328340fe..5150b1aeb8 100644 --- a/plugins/Sitemap/sitemapindex.php +++ b/plugins/Sitemap/sitemapindex.php @@ -58,8 +58,8 @@ class SitemapindexAction extends Action $this->elementStart('sitemapindex', array('xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9')); - $this->showUserSitemaps(); $this->showNoticeSitemaps(); + $this->showUserSitemaps(); $this->elementEnd('sitemapindex'); @@ -89,7 +89,9 @@ class SitemapindexAction extends Action function showNoticeSitemaps() { - $noticeCounts = $this->getNoticeCounts(); + $noticeCounts = Sitemap_notice_count::getAll(); + + common_debug(sprintf("Got %d notice counts", count($noticeCounts))); foreach ($noticeCounts as $dt => $cnt) { if ($cnt == 0) { @@ -105,37 +107,6 @@ class SitemapindexAction extends Action } } - function getUserCounts() - { - return Sitemap_user_count::getAll(); - } - - function getNoticeCounts() - { - $noticeCounts = Notice::cacheGet('sitemap:notice:counts'); - - if ($noticeCounts === false) { - - $notice = new Notice(); - - $notice->selectAdd(); - $notice->selectAdd('date(created) as postdate, count(*) as postcount'); - $notice->groupBy('postdate'); - - $notice->find(); - - $noticeCounts = array(); - - while ($notice->fetch()) { - $noticeCounts[$notice->postdate] = $notice->postcount; - } - - Notice::cacheSet('sitemap:notice:counts', $noticeCounts); - } - - return $noticeCounts; - } - function showSitemap($prefix, $dt, $i) { list($y, $m, $d) = explode('-', $dt);