Ticket 2141: bugs with weighted popularity lists across year boundary.

Consolidated several separate implementations of the same weighting algorithm into common_sql_weight() and fixed some bugs...
For MySQL, now using timestampdiff() instead of subtraction for the comparison, so we get sane results when the year doesn't match, and utc_timestamp() rather than now() so we don't get negative ages for recent items with local server timezone.
Unknown whether the same problems affect PostgreSQL, but note that it lacks the timestampdiff() SQL function.
This commit is contained in:
Brion Vibber 2010-01-04 13:01:17 -08:00
parent d32fb7c7c4
commit af95005bc4
6 changed files with 37 additions and 30 deletions

View File

@ -185,11 +185,7 @@ class FavoritedAction extends Action
function showContent() function showContent()
{ {
if (common_config('db', 'type') == 'pgsql') { $weightexpr = common_sql_weight('fave.modified', common_config('popular', 'dropoff'));
$weightexpr='sum(exp(-extract(epoch from (now() - fave.modified)) / %s))';
} else {
$weightexpr='sum(exp(-(now() - fave.modified) / %s))';
}
$qry = 'SELECT notice.*, '. $qry = 'SELECT notice.*, '.
$weightexpr . ' as weight ' . $weightexpr . ' as weight ' .
@ -207,7 +203,7 @@ class FavoritedAction extends Action
} }
$notice = Memcached_DataObject::cachedQuery('Notice', $notice = Memcached_DataObject::cachedQuery('Notice',
sprintf($qry, common_config('popular', 'dropoff')), $qry,
600); 600);
$nl = new NoticeList($notice, $this); $nl = new NoticeList($notice, $this);

View File

@ -105,12 +105,8 @@ class PublictagcloudAction extends Action
#Add the aggregated columns... #Add the aggregated columns...
$tags->selectAdd('max(notice_id) as last_notice_id'); $tags->selectAdd('max(notice_id) as last_notice_id');
if(common_config('db','type')=='pgsql') { $calc = common_sql_weight('created', common_config('tag', 'dropoff'));
$calc='sum(exp(-extract(epoch from (now()-created))/%s)) as weight'; $tags->selectAdd($calc . ' as weight');
} else {
$calc='sum(exp(-(now() - created)/%s)) as weight';
}
$tags->selectAdd(sprintf($calc, common_config('tag', 'dropoff')));
$tags->groupBy('tag'); $tags->groupBy('tag');
$tags->orderBy('weight DESC'); $tags->orderBy('weight DESC');
@ -136,7 +132,12 @@ class PublictagcloudAction extends Action
$this->elementStart('dd'); $this->elementStart('dd');
$this->elementStart('ul', 'tags xoxo tag-cloud'); $this->elementStart('ul', 'tags xoxo tag-cloud');
foreach ($tw as $tag => $weight) { foreach ($tw as $tag => $weight) {
$this->showTag($tag, $weight, $weight/$sum); if ($sum) {
$weightedSum = $weight/$sum;
} else {
$weightedSum = 0.5;
}
$this->showTag($tag, $weight, $weightedSum);
} }
$this->elementEnd('ul'); $this->elementEnd('ul');
$this->elementEnd('dd'); $this->elementEnd('dd');

View File

@ -58,11 +58,7 @@ class GroupTagCloudSection extends TagCloudSection
function getTags() function getTags()
{ {
if (common_config('db', 'type') == 'pgsql') { $weightexpr = common_sql_weight('notice_tag.created', common_config('tag', 'dropoff'));
$weightexpr='sum(exp(-extract(epoch from (now() - notice_tag.created)) / %s))';
} else {
$weightexpr='sum(exp(-(now() - notice_tag.created) / %s))';
}
$names = $this->group->getAliases(); $names = $this->group->getAliases();
@ -99,7 +95,6 @@ class GroupTagCloudSection extends TagCloudSection
$tag = Memcached_DataObject::cachedQuery('Notice_tag', $tag = Memcached_DataObject::cachedQuery('Notice_tag',
sprintf($qry, sprintf($qry,
common_config('tag', 'dropoff'),
$this->group->id, $this->group->id,
$namestring), $namestring),
3600); 3600);

View File

@ -58,13 +58,9 @@ class PersonalTagCloudSection extends TagCloudSection
function getTags() function getTags()
{ {
if (common_config('db', 'type') == 'pgsql') { $weightexpr = common_sql_weight('notice_tag.created', common_config('tag', 'dropoff'));
$weightexpr='sum(exp(-extract(epoch from (now() - notice_tag.created)) / %s))';
} else { $qry = 'SELECT notice_tag.tag, '.
$weightexpr='sum(exp(-(now() - notice_tag.created) / %s))';
}
$qry = 'SELECT notice_tag.tag, '.
$weightexpr . ' as weight ' . $weightexpr . ' as weight ' .
'FROM notice_tag JOIN notice ' . 'FROM notice_tag JOIN notice ' .
'ON notice_tag.notice_id = notice.id ' . 'ON notice_tag.notice_id = notice.id ' .
@ -83,7 +79,6 @@ class PersonalTagCloudSection extends TagCloudSection
$tag = Memcached_DataObject::cachedQuery('Notice_tag', $tag = Memcached_DataObject::cachedQuery('Notice_tag',
sprintf($qry, sprintf($qry,
common_config('tag', 'dropoff'),
$this->user->id), $this->user->id),
3600); 3600);
return $tag; return $tag;

View File

@ -48,17 +48,17 @@ class PopularNoticeSection extends NoticeSection
{ {
function getNotices() function getNotices()
{ {
// @fixme there should be a common func for this
if (common_config('db', 'type') == 'pgsql') { if (common_config('db', 'type') == 'pgsql') {
$weightexpr='sum(exp(-extract(epoch from (now() - fave.modified)) / %s))';
if (!empty($this->out->tag)) { if (!empty($this->out->tag)) {
$tag = pg_escape_string($this->out->tag); $tag = pg_escape_string($this->out->tag);
} }
} else { } else {
$weightexpr='sum(exp(-(now() - fave.modified) / %s))';
if (!empty($this->out->tag)) { if (!empty($this->out->tag)) {
$tag = mysql_escape_string($this->out->tag); $tag = mysql_escape_string($this->out->tag);
} }
} }
$weightexpr = common_sql_weight('fave.modified', common_config('popular', 'dropoff'));
$qry = "SELECT notice.*, $weightexpr as weight "; $qry = "SELECT notice.*, $weightexpr as weight ";
if(isset($tag)) { if(isset($tag)) {
$qry .= 'FROM notice_tag, notice JOIN fave ON notice.id = fave.notice_id ' . $qry .= 'FROM notice_tag, notice JOIN fave ON notice.id = fave.notice_id ' .
@ -78,7 +78,7 @@ class PopularNoticeSection extends NoticeSection
$qry .= ' LIMIT ' . $limit . ' OFFSET ' . $offset; $qry .= ' LIMIT ' . $limit . ' OFFSET ' . $offset;
$notice = Memcached_DataObject::cachedQuery('Notice', $notice = Memcached_DataObject::cachedQuery('Notice',
sprintf($qry, common_config('popular', 'dropoff')), $qry,
1200); 1200);
return $notice; return $notice;
} }

View File

@ -908,6 +908,26 @@ function common_sql_date($datetime)
return strftime('%Y-%m-%d %H:%M:%S', $datetime); return strftime('%Y-%m-%d %H:%M:%S', $datetime);
} }
/**
* Return an SQL fragment to calculate an age-based weight from a given
* timestamp or datetime column.
*
* @param string $column name of field we're comparing against current time
* @param integer $dropoff divisor for age in seconds before exponentiation
* @return string SQL fragment
*/
function common_sql_weight($column, $dropoff)
{
if (common_config('db', 'type') == 'pgsql') {
// PostgreSQL doesn't support timestampdiff function.
// @fixme will this use the right time zone?
// @fixme does this handle cross-year subtraction correctly?
return "sum(exp(-extract(epoch from (now() - $column)) / $dropoff))";
} else {
return "sum(exp(timestampdiff(second, utc_timestamp(), $column) / $dropoff))";
}
}
function common_redirect($url, $code=307) function common_redirect($url, $code=307)
{ {
static $status = array(301 => "Moved Permanently", static $status = array(301 => "Moved Permanently",