sphinx integration for notices and profiles

darcs-hash:20081123185136-099f7-ca600dbdfff09762802e1cc58cf8777c45a24402.gz
This commit is contained in:
millette 2008-11-23 13:51:36 -05:00
parent bf72cde96f
commit 6b29f6ed70
5 changed files with 65 additions and 35 deletions

View File

@ -20,7 +20,6 @@
if (!defined('LACONICA')) { exit(1); } if (!defined('LACONICA')) { exit(1); }
require_once(INSTALLDIR.'/lib/searchaction.php'); require_once(INSTALLDIR.'/lib/searchaction.php');
define('NOTICES_PER_PAGE', 20);
# XXX common parent for people and content search? # XXX common parent for people and content search?
@ -42,13 +41,17 @@ class NoticesearchAction extends SearchAction {
$q = strtolower($q); $q = strtolower($q);
$search_engine = $notice->getSearchEngine('identica_notices'); $search_engine = $notice->getSearchEngine('identica_notices');
$search_engine->query($q);
$search_engine->set_sort_mode('chron');
# Ask for an extra to see if there's more. # Ask for an extra to see if there's more.
$search_engine->limit((($page-1)*NOTICES_PER_PAGE), NOTICES_PER_PAGE + 1); $search_engine->limit((($page-1)*NOTICES_PER_PAGE), NOTICES_PER_PAGE + 1);
$cnt = $notice->find(); if (false === $search_engine->query($q)) {
$cnt = 0;
}
else {
$cnt = $notice->find();
}
if ($cnt > 0) { if ($cnt > 0) {
$terms = preg_split('/[\s,]+/', $q); $terms = preg_split('/[\s,]+/', $q);
common_element_start('ul', array('id' => 'notices')); common_element_start('ul', array('id' => 'notices'));

View File

@ -39,19 +39,12 @@ class NoticesearchrssAction extends Rss10Action {
# lcase it for comparison # lcase it for comparison
$q = strtolower($q); $q = strtolower($q);
if(common_config('db','type')=='mysql') { $search_engine = $notice->getSearchEngine('identica_notices');
$notice->whereAdd('MATCH(content) against (\''.addslashes($q).'\')'); $search_engine->set_sort_mode('chron');
} else {
$notice->whereAdd('to_tsvector(\'english\',content) @@ plainto_tsquery(\''.addslashes($q).'\')');
}
$notice->orderBy('created DESC, notice.id DESC');
# Ask for an extra to see if there's more.
if ($limit != 0) {
$notice->limit(0, $limit);
}
if (!$limit) $limit = 20;
$search_engine->limit(0, $limit, true);
$search_engine->query($q);
$notice->find(); $notice->find();
while ($notice->fetch()) { while ($notice->fetch()) {

View File

@ -20,7 +20,7 @@
if (!defined('LACONICA')) { exit(1); } if (!defined('LACONICA')) { exit(1); }
require_once(INSTALLDIR.'/lib/searchaction.php'); require_once(INSTALLDIR.'/lib/searchaction.php');
require_once(INSTALLDIR.'/lib/profilelist.php'); define('PROFILES_PER_PAGE', 10);
class PeoplesearchAction extends SearchAction { class PeoplesearchAction extends SearchAction {
@ -42,13 +42,15 @@ class PeoplesearchAction extends SearchAction {
$search_engine = $profile->getSearchEngine('identica_people'); $search_engine = $profile->getSearchEngine('identica_people');
$search_engine->query($q); $search_engine->set_sort_mode('chron');
# Ask for an extra to see if there's more. # Ask for an extra to see if there's more.
$search_engine->limit((($page-1)*PROFILES_PER_PAGE), PROFILES_PER_PAGE + 1); $search_engine->limit((($page-1)*PROFILES_PER_PAGE), PROFILES_PER_PAGE + 1);
if (false === $search_engine->query($q)) {
$cnt = $profile->find(); $cnt = 0;
}
else {
$cnt = $profile->find();
}
if ($cnt > 0) { if ($cnt > 0) {
$terms = preg_split('/[\s,]+/', $q); $terms = preg_split('/[\s,]+/', $q);
$results = new PeopleSearchResults($profile, $terms); $results = new PeopleSearchResults($profile, $terms);

View File

@ -20,54 +20,85 @@
if (!defined('LACONICA')) { exit(1); } if (!defined('LACONICA')) { exit(1); }
class SearchEngine { class SearchEngine {
protected $profile; protected $target;
protected $table; protected $table;
function __construct($profile, $table) { function __construct($target, $table) {
$this->profile = $profile; $this->target = $target;
$this->table = $table; $this->table = $table;
} }
function query($q) { function query($q) {
} }
function limit($offset, $count) { function limit($offset, $count, $rss = false) {
return $this->profile->limit($offset, $count); return $this->target->limit($offset, $count);
}
function set_sort_mode($mode) {
if ('chron' === $mode)
return $this->target->orderBy('created desc');
} }
} }
class SphinxSearch extends SearchEngine { class SphinxSearch extends SearchEngine {
private $sphinx; private $sphinx;
function __construct($profile, $table) { function __construct($target, $table) {
parent::__construct($profile, $table); parent::__construct($target, $table);
$this->sphinx = new SphinxClient; $this->sphinx = new SphinxClient;
$this->sphinx->setServer(common_config('sphinx', 'server'), common_config('sphinx', 'port')); $this->sphinx->setServer(common_config('sphinx', 'server'), common_config('sphinx', 'port'));
} }
function limit($offset, $count) { function limit($offset, $count, $rss = false) {
$this->sphinx->setLimits($offset, $count); //FIXME without LARGEST_POSSIBLE, the most recent results aren't returned
$this->profile->limit($offset, $count); // this probably has a large impact on performance
$LARGEST_POSSIBLE = 1e6;
if ($rss) {
$this->sphinx->setLimits($offset, $count, $count, $LARGEST_POSSIBLE);
}
else {
// return at most 50 pages of results
$this->sphinx->setLimits($offset, $count, 50 * ($count - 1), $LARGEST_POSSIBLE);
}
return $this->target->limit(0, $count);
} }
function query($q) { function query($q) {
$result = $this->sphinx->query($q, $this->table); $result = $this->sphinx->query($q, $this->table);
if (!isset($result['matches'])) return false; if (!isset($result['matches'])) return false;
$id_set = join(', ', array_keys($result['matches'])); $id_set = join(', ', array_keys($result['matches']));
return $this->profile->whereAdd("id in ($id_set)"); $this->target->whereAdd("id in ($id_set)");
return true;
} }
function set_sort_mode($mode) {
if ('chron' === $mode) {
$this->sphinx->SetSortMode(SPH_SORT_ATTR_DESC, 'created_ts');
return $this->target->orderBy('created desc');
}
}
} }
class MySQLSearch extends SearchEngine { class MySQLSearch extends SearchEngine {
function query($q) { function query($q) {
return $this->profile->whereAdd('MATCH(nickname, fullname, location, bio, homepage) ' . if ('identica_people' === $this->table)
return $this->target->whereAdd('MATCH(nickname, fullname, location, bio, homepage) ' .
'against (\''.addslashes($q).'\')');
if ('identica_notices' === $this->table)
return $this->target->whereAdd('MATCH(content) ' .
'against (\''.addslashes($q).'\')'); 'against (\''.addslashes($q).'\')');
} }
} }
class PGSearch extends SearchEngine { class PGSearch extends SearchEngine {
function query($q) { function query($q) {
$this->profile->whereAdd('textsearch @@ plainto_tsquery(\''.addslashes($q).'\')'); if ('identica_people' === $this->table)
return $this->target->whereAdd('textsearch @@ plainto_tsquery(\''.addslashes($q).'\')');
if ('identica_notices' === $this->table)
return $this->target->whereAdd('to_tsvector(\'english\', content) @@ plainto_tsquery(\''.addslashes($q).'\')');
} }
} }

View File

@ -27,6 +27,7 @@ define('AVATAR_MINI_SIZE', 24);
define('MAX_AVATAR_SIZE', 256 * 1024); define('MAX_AVATAR_SIZE', 256 * 1024);
define('NOTICES_PER_PAGE', 20); define('NOTICES_PER_PAGE', 20);
define('PROFILES_PER_PAGE', 10);
define('FOREIGN_NOTICE_SEND', 1); define('FOREIGN_NOTICE_SEND', 1);
define('FOREIGN_NOTICE_RECV', 2); define('FOREIGN_NOTICE_RECV', 2);