Scalability work on user backup stream generation.

UserActivityStream -- used to create a full activity stream including subscriptions, favorites, notices, etc -- normally buffers everything into memory at once. This is infeasible for accounts with long histories of serious usage; it can take tens of seconds just to pull all records from the database, and working with them all in memory is very likely to hit resource limits.
This commit adds an alternate mode for this class which avoids pulling notices until during the actual output. Instead of pre-sorting and buffering all the notices, empty spaces between the other activities are filled in with notices as we're making output. This means more smaller queries spread out during operations, and less stuff kept in memory.

Callers (backupaccount action, and backupuser.php) which can stream their output pass an $outputMode param of UserActivityStream::OUTPUT_RAW, and during getString() it'll send straight to output as well as slurping the notices in this extra funky fashion.
Other callers will let it default to the OUTPUT_STRING mode, which keeps the previous behavior.

There should be a better way to do this, swapping out the stringer output for raw output more consitently.
This commit is contained in:
Brion Vibber 2011-02-25 11:04:57 -08:00
parent 77c280a48b
commit 55b1f3d84c
3 changed files with 90 additions and 5 deletions

View File

@ -118,11 +118,13 @@ class BackupaccountAction extends Action
{
$cur = common_current_user();
$stream = new UserActivityStream($cur);
$stream = new UserActivityStream($cur, true, UserActivityStream::OUTPUT_RAW);
header('Content-Disposition: attachment; filename='.$cur->nickname.'.atom');
header('Content-Type: application/atom+xml; charset=utf-8');
// @fixme atom feed logic is in getString...
// but we just want it to output to the outputter.
$this->raw($stream->getString());
}

View File

@ -29,15 +29,48 @@ class UserActivityStream extends AtomUserNoticeFeed
{
public $activities = array();
function __construct($user, $indent = true)
const OUTPUT_STRING = 1;
const OUTPUT_RAW = 2;
public $outputMode = self::OUTPUT_STRING;
/**
*
* @param User $user
* @param boolean $indent
* @param boolean $outputMode: UserActivityStream::OUTPUT_STRING to return a string,
* or UserActivityStream::OUTPUT_RAW to go to raw output.
* Raw output mode will attempt to stream, keeping less
* data in memory but will leave $this->activities incomplete.
*/
function __construct($user, $indent = true, $outputMode = UserActivityStream::OUTPUT_STRING)
{
parent::__construct($user, null, $indent);
$this->outputMode = $outputMode;
if ($this->outputMode == self::OUTPUT_STRING) {
// String buffering? Grab all the notices now.
$notices = $this->getNotices();
} elseif ($this->outputMode == self::OUTPUT_RAW) {
// Raw output... need to restructure from the stringer init.
$this->xw = new XMLWriter();
$this->xw->openURI('php://output');
if(is_null($indent)) {
$indent = common_config('site', 'indent');
}
$this->xw->setIndent($indent);
// We'll fetch notices later.
$notices = array();
} else {
throw new Exception('Invalid outputMode provided to ' . __METHOD__);
}
// Assume that everything but notices is feasible
// to pull at once and work with in memory...
$subscriptions = $this->getSubscriptions();
$subscribers = $this->getSubscribers();
$groups = $this->getGroups();
$faves = $this->getFaves();
$notices = $this->getNotices();
$objs = array_merge($subscriptions, $subscribers, $groups, $faves, $notices);
@ -45,16 +78,44 @@ class UserActivityStream extends AtomUserNoticeFeed
usort($objs, 'UserActivityStream::compareObject');
// We'll keep these around for later, and interleave them into
// the output stream with the user's notices.
foreach ($objs as $obj) {
$this->activities[] = $obj->asActivity();
}
}
/**
* Interleave the pre-sorted subs/groups/faves with the user's
* notices, all in reverse chron order.
*/
function renderEntries()
{
$end = time() + 1;
foreach ($this->activities as $act) {
$start = $act->time;
if ($this->outputMode == self::OUTPUT_RAW && $start != $end) {
// In raw mode, we haven't pre-fetched notices.
// Grab the chunks of notices between other activities.
$notices = $this->getNoticesBetween($start, $end);
foreach ($notices as $noticeAct) {
$noticeAct->asActivity()->outputTo($this, false, false);
}
}
// Only show the author sub-element if it's different from default user
$act->outputTo($this, false, ($act->actor->id != $this->user->uri));
$end = $start;
}
if ($this->outputMode == self::OUTPUT_RAW) {
// Grab anything after the last pre-sorted activity.
$notices = $this->getNoticesBetween(0, $end);
foreach ($notices as $noticeAct) {
$noticeAct->asActivity()->outputTo($this, false, false);
}
}
}
@ -121,7 +182,13 @@ class UserActivityStream extends AtomUserNoticeFeed
return $faves;
}
function getNotices()
/**
*
* @param int $start unix timestamp for earliest
* @param int $end unix timestamp for latest
* @return array of Notice objects
*/
function getNoticesBetween($start=0, $end=0)
{
$notices = array();
@ -129,6 +196,17 @@ class UserActivityStream extends AtomUserNoticeFeed
$notice->profile_id = $this->user->id;
if ($start) {
$tsstart = common_sql_date($start);
$notice->whereAdd("created >= '$tsstart'");
}
if ($end) {
$tsend = common_sql_date($end);
$notice->whereAdd("created < '$tsend'");
}
$notice->orderBy('created DESC');
if ($notice->find()) {
while ($notice->fetch()) {
$notices[] = clone($notice);
@ -138,6 +216,11 @@ class UserActivityStream extends AtomUserNoticeFeed
return $notices;
}
function getNotices()
{
return $this->getNoticesBetween();
}
function getGroups()
{
$groups = array();

View File

@ -36,7 +36,7 @@ require_once INSTALLDIR.'/scripts/commandline.inc';
try {
$user = getUser();
$actstr = new UserActivityStream($user);
$actstr = new UserActivityStream($user, true, UserActivityStream::OUTPUT_RAW);
print $actstr->getString();
} catch (Exception $e) {
print $e->getMessage()."\n";