From 55b1f3d84c7390528634db5b95396a181e13003c Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 25 Feb 2011 11:04:57 -0800 Subject: [PATCH] Scalability work on user backup stream generation. UserActivityStream -- used to create a full activity stream including subscriptions, favorites, notices, etc -- normally buffers everything into memory at once. This is infeasible for accounts with long histories of serious usage; it can take tens of seconds just to pull all records from the database, and working with them all in memory is very likely to hit resource limits. This commit adds an alternate mode for this class which avoids pulling notices until during the actual output. Instead of pre-sorting and buffering all the notices, empty spaces between the other activities are filled in with notices as we're making output. This means more smaller queries spread out during operations, and less stuff kept in memory. Callers (backupaccount action, and backupuser.php) which can stream their output pass an $outputMode param of UserActivityStream::OUTPUT_RAW, and during getString() it'll send straight to output as well as slurping the notices in this extra funky fashion. Other callers will let it default to the OUTPUT_STRING mode, which keeps the previous behavior. There should be a better way to do this, swapping out the stringer output for raw output more consitently. --- actions/backupaccount.php | 4 +- lib/useractivitystream.php | 89 ++++++++++++++++++++++++++++++++++++-- scripts/backupuser.php | 2 +- 3 files changed, 90 insertions(+), 5 deletions(-) diff --git a/actions/backupaccount.php b/actions/backupaccount.php index 928aba69ce..35458ca796 100644 --- a/actions/backupaccount.php +++ b/actions/backupaccount.php @@ -118,11 +118,13 @@ class BackupaccountAction extends Action { $cur = common_current_user(); - $stream = new UserActivityStream($cur); + $stream = new UserActivityStream($cur, true, UserActivityStream::OUTPUT_RAW); header('Content-Disposition: attachment; filename='.$cur->nickname.'.atom'); header('Content-Type: application/atom+xml; charset=utf-8'); + // @fixme atom feed logic is in getString... + // but we just want it to output to the outputter. $this->raw($stream->getString()); } diff --git a/lib/useractivitystream.php b/lib/useractivitystream.php index 53d0107aa9..d1e3e28fb8 100644 --- a/lib/useractivitystream.php +++ b/lib/useractivitystream.php @@ -29,15 +29,48 @@ class UserActivityStream extends AtomUserNoticeFeed { public $activities = array(); - function __construct($user, $indent = true) + const OUTPUT_STRING = 1; + const OUTPUT_RAW = 2; + public $outputMode = self::OUTPUT_STRING; + + /** + * + * @param User $user + * @param boolean $indent + * @param boolean $outputMode: UserActivityStream::OUTPUT_STRING to return a string, + * or UserActivityStream::OUTPUT_RAW to go to raw output. + * Raw output mode will attempt to stream, keeping less + * data in memory but will leave $this->activities incomplete. + */ + function __construct($user, $indent = true, $outputMode = UserActivityStream::OUTPUT_STRING) { parent::__construct($user, null, $indent); + $this->outputMode = $outputMode; + if ($this->outputMode == self::OUTPUT_STRING) { + // String buffering? Grab all the notices now. + $notices = $this->getNotices(); + } elseif ($this->outputMode == self::OUTPUT_RAW) { + // Raw output... need to restructure from the stringer init. + $this->xw = new XMLWriter(); + $this->xw->openURI('php://output'); + if(is_null($indent)) { + $indent = common_config('site', 'indent'); + } + $this->xw->setIndent($indent); + + // We'll fetch notices later. + $notices = array(); + } else { + throw new Exception('Invalid outputMode provided to ' . __METHOD__); + } + + // Assume that everything but notices is feasible + // to pull at once and work with in memory... $subscriptions = $this->getSubscriptions(); $subscribers = $this->getSubscribers(); $groups = $this->getGroups(); $faves = $this->getFaves(); - $notices = $this->getNotices(); $objs = array_merge($subscriptions, $subscribers, $groups, $faves, $notices); @@ -45,16 +78,44 @@ class UserActivityStream extends AtomUserNoticeFeed usort($objs, 'UserActivityStream::compareObject'); + // We'll keep these around for later, and interleave them into + // the output stream with the user's notices. foreach ($objs as $obj) { $this->activities[] = $obj->asActivity(); } } + /** + * Interleave the pre-sorted subs/groups/faves with the user's + * notices, all in reverse chron order. + */ function renderEntries() { + $end = time() + 1; foreach ($this->activities as $act) { + $start = $act->time; + + if ($this->outputMode == self::OUTPUT_RAW && $start != $end) { + // In raw mode, we haven't pre-fetched notices. + // Grab the chunks of notices between other activities. + $notices = $this->getNoticesBetween($start, $end); + foreach ($notices as $noticeAct) { + $noticeAct->asActivity()->outputTo($this, false, false); + } + } + // Only show the author sub-element if it's different from default user $act->outputTo($this, false, ($act->actor->id != $this->user->uri)); + + $end = $start; + } + + if ($this->outputMode == self::OUTPUT_RAW) { + // Grab anything after the last pre-sorted activity. + $notices = $this->getNoticesBetween(0, $end); + foreach ($notices as $noticeAct) { + $noticeAct->asActivity()->outputTo($this, false, false); + } } } @@ -121,7 +182,13 @@ class UserActivityStream extends AtomUserNoticeFeed return $faves; } - function getNotices() + /** + * + * @param int $start unix timestamp for earliest + * @param int $end unix timestamp for latest + * @return array of Notice objects + */ + function getNoticesBetween($start=0, $end=0) { $notices = array(); @@ -129,6 +196,17 @@ class UserActivityStream extends AtomUserNoticeFeed $notice->profile_id = $this->user->id; + if ($start) { + $tsstart = common_sql_date($start); + $notice->whereAdd("created >= '$tsstart'"); + } + if ($end) { + $tsend = common_sql_date($end); + $notice->whereAdd("created < '$tsend'"); + } + + $notice->orderBy('created DESC'); + if ($notice->find()) { while ($notice->fetch()) { $notices[] = clone($notice); @@ -138,6 +216,11 @@ class UserActivityStream extends AtomUserNoticeFeed return $notices; } + function getNotices() + { + return $this->getNoticesBetween(); + } + function getGroups() { $groups = array(); diff --git a/scripts/backupuser.php b/scripts/backupuser.php index 49fc1cefdc..ee2951fc8f 100644 --- a/scripts/backupuser.php +++ b/scripts/backupuser.php @@ -36,7 +36,7 @@ require_once INSTALLDIR.'/scripts/commandline.inc'; try { $user = getUser(); - $actstr = new UserActivityStream($user); + $actstr = new UserActivityStream($user, true, UserActivityStream::OUTPUT_RAW); print $actstr->getString(); } catch (Exception $e) { print $e->getMessage()."\n";