From 6b29f6ed702f69a9b1b6a9359c1eaa22b9c5ba9d Mon Sep 17 00:00:00 2001 From: millette Date: Sun, 23 Nov 2008 13:51:36 -0500 Subject: [PATCH] sphinx integration for notices and profiles darcs-hash:20081123185136-099f7-ca600dbdfff09762802e1cc58cf8777c45a24402.gz --- actions/noticesearch.php | 11 ++++--- actions/noticesearchrss.php | 17 ++++------- actions/peoplesearch.php | 14 +++++---- classes/SearchEngines.php | 57 ++++++++++++++++++++++++++++--------- lib/common.php | 1 + 5 files changed, 65 insertions(+), 35 deletions(-) diff --git a/actions/noticesearch.php b/actions/noticesearch.php index f8dad2a20e..96e4d777fa 100644 --- a/actions/noticesearch.php +++ b/actions/noticesearch.php @@ -20,7 +20,6 @@ if (!defined('LACONICA')) { exit(1); } require_once(INSTALLDIR.'/lib/searchaction.php'); -define('NOTICES_PER_PAGE', 20); # XXX common parent for people and content search? @@ -42,13 +41,17 @@ class NoticesearchAction extends SearchAction { $q = strtolower($q); $search_engine = $notice->getSearchEngine('identica_notices'); - $search_engine->query($q); + $search_engine->set_sort_mode('chron'); # Ask for an extra to see if there's more. $search_engine->limit((($page-1)*NOTICES_PER_PAGE), NOTICES_PER_PAGE + 1); - $cnt = $notice->find(); - + if (false === $search_engine->query($q)) { + $cnt = 0; + } + else { + $cnt = $notice->find(); + } if ($cnt > 0) { $terms = preg_split('/[\s,]+/', $q); common_element_start('ul', array('id' => 'notices')); diff --git a/actions/noticesearchrss.php b/actions/noticesearchrss.php index f598d833eb..0f38515a02 100644 --- a/actions/noticesearchrss.php +++ b/actions/noticesearchrss.php @@ -39,19 +39,12 @@ class NoticesearchrssAction extends Rss10Action { # lcase it for comparison $q = strtolower($q); - if(common_config('db','type')=='mysql') { - $notice->whereAdd('MATCH(content) against (\''.addslashes($q).'\')'); - } else { - $notice->whereAdd('to_tsvector(\'english\',content) @@ plainto_tsquery(\''.addslashes($q).'\')'); - } - $notice->orderBy('created DESC, notice.id DESC'); - - # Ask for an extra to see if there's more. - - if ($limit != 0) { - $notice->limit(0, $limit); - } + $search_engine = $notice->getSearchEngine('identica_notices'); + $search_engine->set_sort_mode('chron'); + if (!$limit) $limit = 20; + $search_engine->limit(0, $limit, true); + $search_engine->query($q); $notice->find(); while ($notice->fetch()) { diff --git a/actions/peoplesearch.php b/actions/peoplesearch.php index fa54dc9f7c..f4886fd835 100644 --- a/actions/peoplesearch.php +++ b/actions/peoplesearch.php @@ -20,7 +20,7 @@ if (!defined('LACONICA')) { exit(1); } require_once(INSTALLDIR.'/lib/searchaction.php'); -require_once(INSTALLDIR.'/lib/profilelist.php'); +define('PROFILES_PER_PAGE', 10); class PeoplesearchAction extends SearchAction { @@ -42,13 +42,15 @@ class PeoplesearchAction extends SearchAction { $search_engine = $profile->getSearchEngine('identica_people'); - $search_engine->query($q); - + $search_engine->set_sort_mode('chron'); # Ask for an extra to see if there's more. $search_engine->limit((($page-1)*PROFILES_PER_PAGE), PROFILES_PER_PAGE + 1); - - $cnt = $profile->find(); - + if (false === $search_engine->query($q)) { + $cnt = 0; + } + else { + $cnt = $profile->find(); + } if ($cnt > 0) { $terms = preg_split('/[\s,]+/', $q); $results = new PeopleSearchResults($profile, $terms); diff --git a/classes/SearchEngines.php b/classes/SearchEngines.php index 253e0028be..96fd7da90d 100644 --- a/classes/SearchEngines.php +++ b/classes/SearchEngines.php @@ -20,54 +20,85 @@ if (!defined('LACONICA')) { exit(1); } class SearchEngine { - protected $profile; + protected $target; protected $table; - function __construct($profile, $table) { - $this->profile = $profile; + function __construct($target, $table) { + $this->target = $target; $this->table = $table; } function query($q) { } - function limit($offset, $count) { - return $this->profile->limit($offset, $count); + function limit($offset, $count, $rss = false) { + return $this->target->limit($offset, $count); + } + + function set_sort_mode($mode) { + if ('chron' === $mode) + return $this->target->orderBy('created desc'); } } class SphinxSearch extends SearchEngine { private $sphinx; - function __construct($profile, $table) { - parent::__construct($profile, $table); + function __construct($target, $table) { + parent::__construct($target, $table); $this->sphinx = new SphinxClient; $this->sphinx->setServer(common_config('sphinx', 'server'), common_config('sphinx', 'port')); } - function limit($offset, $count) { - $this->sphinx->setLimits($offset, $count); - $this->profile->limit($offset, $count); + function limit($offset, $count, $rss = false) { + //FIXME without LARGEST_POSSIBLE, the most recent results aren't returned + // this probably has a large impact on performance + $LARGEST_POSSIBLE = 1e6; + + if ($rss) { + $this->sphinx->setLimits($offset, $count, $count, $LARGEST_POSSIBLE); + } + else { + // return at most 50 pages of results + $this->sphinx->setLimits($offset, $count, 50 * ($count - 1), $LARGEST_POSSIBLE); + } + + return $this->target->limit(0, $count); } function query($q) { $result = $this->sphinx->query($q, $this->table); if (!isset($result['matches'])) return false; $id_set = join(', ', array_keys($result['matches'])); - return $this->profile->whereAdd("id in ($id_set)"); + $this->target->whereAdd("id in ($id_set)"); + return true; } + + function set_sort_mode($mode) { + if ('chron' === $mode) { + $this->sphinx->SetSortMode(SPH_SORT_ATTR_DESC, 'created_ts'); + return $this->target->orderBy('created desc'); + } + } } class MySQLSearch extends SearchEngine { function query($q) { - return $this->profile->whereAdd('MATCH(nickname, fullname, location, bio, homepage) ' . + if ('identica_people' === $this->table) + return $this->target->whereAdd('MATCH(nickname, fullname, location, bio, homepage) ' . + 'against (\''.addslashes($q).'\')'); + if ('identica_notices' === $this->table) + return $this->target->whereAdd('MATCH(content) ' . 'against (\''.addslashes($q).'\')'); } } class PGSearch extends SearchEngine { function query($q) { - $this->profile->whereAdd('textsearch @@ plainto_tsquery(\''.addslashes($q).'\')'); + if ('identica_people' === $this->table) + return $this->target->whereAdd('textsearch @@ plainto_tsquery(\''.addslashes($q).'\')'); + if ('identica_notices' === $this->table) + return $this->target->whereAdd('to_tsvector(\'english\', content) @@ plainto_tsquery(\''.addslashes($q).'\')'); } } diff --git a/lib/common.php b/lib/common.php index acea1252cc..9b4751c9ff 100644 --- a/lib/common.php +++ b/lib/common.php @@ -27,6 +27,7 @@ define('AVATAR_MINI_SIZE', 24); define('MAX_AVATAR_SIZE', 256 * 1024); define('NOTICES_PER_PAGE', 20); +define('PROFILES_PER_PAGE', 10); define('FOREIGN_NOTICE_SEND', 1); define('FOREIGN_NOTICE_RECV', 2);