From 18573632b463c8ecf865b8a6f6a1887fa91e5ca7 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 29 Jul 2009 11:45:32 -0400 Subject: [PATCH] New algorithm for reply_to saving I changed the reply-to algorithm so that we only say a notice is in reply to another notice if a) we receive that information from the Web or API or b) it's in a "low bandwidth" (XMPP, SMS) channel, and begins with "@nickname" or "T NICKNAME". The goal is to avoid false-positives and make conversation trees more accurate and useful. --- classes/Notice.php | 160 +++++++++++++++++++++++++++++++++------------ 1 file changed, 117 insertions(+), 43 deletions(-) diff --git a/classes/Notice.php b/classes/Notice.php index c2770edbe8..ebd5e1efd5 100644 --- a/classes/Notice.php +++ b/classes/Notice.php @@ -102,15 +102,14 @@ class Notice extends Memcached_DataObject if (!$count) { return true; } - + //turn each into their canonical tag //this is needed to remove dupes before saving e.g. #hash.tag = #hashtag $hashtags = array(); for($i=0; $iis_local = $is_local; } - $notice->query('BEGIN'); - - $notice->reply_to = $reply_to; if (!empty($created)) { $notice->created = $created; } else { $notice->created = common_sql_now(); } + $notice->content = $final; $notice->rendered = common_render_content($final, $notice); $notice->source = $source; $notice->uri = $uri; - if (!empty($reply_to)) { - $reply_notice = Notice::staticGet('id', $reply_to); - if (!empty($reply_notice)) { - $notice->reply_to = $reply_to; - $notice->conversation = $reply_notice->conversation; - } + $notice->reply_to = self::getReplyTo($reply_to, $profile_id, $source, $final); + + if (!empty($notice->reply_to)) { + $reply = Notice::staticGet('id', $notice->reply_to); + $notice->conversation = $reply->conversation; } if (Event::handle('StartNoticeSave', array(&$notice))) { + // XXX: some of these functions write to the DB + + $notice->query('BEGIN'); + $id = $notice->insert(); if (!$id) { @@ -213,18 +213,33 @@ class Notice extends Memcached_DataObject return _('Problem saving notice.'); } - # Update the URI after the notice is in the database - if (!$uri) { - $orig = clone($notice); - $notice->uri = common_notice_uri($notice); + // Update ID-dependent columns: URI, conversation + $orig = clone($notice); + + $changed = false; + + if (empty($uri)) { + $notice->uri = common_notice_uri($notice); + $changed = true; + } + + // If it's not part of a conversation, it's + // the beginning of a new conversation. + + if (empty($notice->conversation)) { + $notice->conversation = $notice->id; + $changed = true; + } + + if ($changed) { if (!$notice->update($orig)) { common_log_db_error($notice, 'UPDATE', __FILE__); return _('Problem saving notice.'); } } - # XXX: do we need to change this for remote users? + // XXX: do we need to change this for remote users? $notice->saveReplies(); $notice->saveTags(); @@ -232,8 +247,13 @@ class Notice extends Memcached_DataObject $notice->addToInboxes(); $notice->saveUrls(); + + // FIXME: why do we have to re-render the content? + // Remove this if it's not necessary. + $orig2 = clone($notice); - $notice->rendered = common_render_content($final, $notice); + + $notice->rendered = common_render_content($final, $notice); if (!$notice->update($orig2)) { common_log_db_error($notice, 'UPDATE', __FILE__); return _('Problem saving notice.'); @@ -290,9 +310,9 @@ class Notice extends Memcached_DataObject $notice->profile_id = $profile_id; $notice->content = $content; if (common_config('db','type') == 'pgsql') - $notice->whereAdd('extract(epoch from now() - created) < ' . common_config('site', 'dupelimit')); + $notice->whereAdd('extract(epoch from now() - created) < ' . common_config('site', 'dupelimit')); else - $notice->whereAdd('now() - created < ' . common_config('site', 'dupelimit')); + $notice->whereAdd('now() - created < ' . common_config('site', 'dupelimit')); $cnt = $notice->count(); return ($cnt == 0); @@ -906,14 +926,14 @@ class Notice extends Memcached_DataObject { $user = new User(); - if(common_config('db','quote_identifiers')) - $user_table = '"user"'; - else $user_table = 'user'; + if(common_config('db','quote_identifiers')) + $user_table = '"user"'; + else $user_table = 'user'; $qry = 'SELECT id ' . - 'FROM '. $user_table .' JOIN subscription '. - 'ON '. $user_table .'.id = subscription.subscriber ' . + 'FROM '. $user_table .' JOIN subscription '. + 'ON '. $user_table .'.id = subscription.subscriber ' . 'WHERE subscription.subscribed = %d '; $user->query(sprintf($qry, $this->profile_id)); @@ -1031,16 +1051,6 @@ class Notice extends Memcached_DataObject if (!$recipient) { continue; } - if ($i == 0 && ($recipient->id != $sender->id) && !$this->reply_to) { // Don't save reply to self - $reply_for = $recipient; - $recipient_notice = $reply_for->getCurrentNotice(); - if ($recipient_notice) { - $orig = clone($this); - $this->reply_to = $recipient_notice->id; - $this->conversation = $recipient_notice->conversation; - $this->update($orig); - } - } // Don't save replies from blocked profile to local user $recipient_user = User::staticGet('id', $recipient->id); if ($recipient_user && $recipient_user->hasBlocked($sender)) { @@ -1087,14 +1097,6 @@ class Notice extends Memcached_DataObject } } - // If it's not a reply, make it the root of a new conversation - - if (empty($this->conversation)) { - $orig = clone($this); - $this->conversation = $this->id; - $this->update($orig); - } - foreach (array_keys($replied) as $recipient) { $user = User::staticGet('id', $recipient); if ($user) { @@ -1266,4 +1268,76 @@ class Notice extends Memcached_DataObject return $ids; } + + /** + * Determine which notice, if any, a new notice is in reply to. + * + * For conversation tracking, we try to see where this notice fits + * in the tree. Rough algorithm is: + * + * if (reply_to is set and valid) { + * return reply_to; + * } else if ((source not API or Web) and (content starts with "T NAME" or "@name ")) { + * return ID of last notice by initial @name in content; + * } + * + * Note that all @nickname instances will still be used to save "reply" records, + * so the notice shows up in the mentioned users' "replies" tab. + * + * @param integer $reply_to ID passed in by Web or API + * @param integer $profile_id ID of author + * @param string $source Source tag, like 'web' or 'gwibber' + * @param string $content Final notice content + * + * @return integer ID of replied-to notice, or null for not a reply. + */ + + static function getReplyTo($reply_to, $profile_id, $source, $content) + { + static $lb = array('xmpp', 'mail', 'sms', 'omb'); + + // If $reply_to is specified, we check that it exists, and then + // return it if it does + + if (!empty($reply_to)) { + $reply_notice = Notice::staticGet('id', $reply_to); + if (!empty($reply_notice)) { + return $reply_to; + } + } + + // If it's not a "low bandwidth" source (one where you can't set + // a reply_to argument), we return. This is mostly web and API + // clients. + + if (!in_array($source, $lb)) { + return null; + } + + // Is there an initial @ or T? + + if (preg_match('/^T ([A-Z0-9]{1,64}) /', $content, $match) || + preg_match('/^@([a-z0-9]{1,64})\s+/', $content, $match)) { + $nickname = common_canonical_nickname($match[1]); + } else { + return null; + } + + // Figure out who that is. + + $sender = Profile::staticGet('id', $profile_id); + $recipient = common_relative_profile($sender, $nickname, common_sql_now()); + + if (empty($recipient)) { + return null; + } + + // Get their last notice + + $last = $recipient->getCurrentNotice(); + + if (!empty($last)) { + return $last->id; + } + } }