Bringing Sphinx search support up to code: broken out to a plugin, now supports multiple sites on a single server.

Upgrade notes:
* Index names have changed from hardcoded 'Identica_people' and 'Identica_notices' to use the database name and actual table names. Must reindex.

New events:
* GetSearchEngine to override default search engine class selection from plugins

New scripts:
* gen_config.php generates a sphinx.conf from database configuration (with theoretical support for status_network table, but it doesn't seem to be cleanly queriable right now without knowing the db setup info for that. Needs generalized support.)
* Replaced old sphinx-indexer.sh and sphinx-cron.sh with index_update.php

Other fixes:
* sphinx.conf.sample better matches our live config, skipping unused stopword list and using a more realistic indexer memory limit

Further notes:
* Probably doesn't work right with PostgreSQL yet; Sphinx can pull from PG but the extraction queries currently look like they use some MySQL-specific functions.
This commit is contained in:
Brion Vibber 2009-11-03 16:57:39 -08:00
parent 1cd6650ae4
commit 53c86c43c4
20 changed files with 539 additions and 173 deletions

31
README
View File

@ -389,20 +389,16 @@ the server first.
Sphinx Sphinx
------ ------
To use a Sphinx server to search users and notices, you also need To use a Sphinx server to search users and notices, you'll need to
to install, compile and enable the sphinx pecl extension for php on the enable the SphinxSearch plugin. Add to your config.php:
client side, which itself depends on the sphinx development files.
"pecl install sphinx" should take care of that. Add "extension=sphinx.so"
to your php.ini and reload apache to enable it.
You can update your MySQL or Postgresql databases to drop their fulltext addPlugin('SphinxSearch');
search indexes, since they're now provided by sphinx. $config['sphinx']['server'] = 'searchhost.local';
On the sphinx server side, a script reads the main database and build You also need to install, compile and enable the sphinx pecl extension for
the keyword index. A cron job reads the database and keeps the sphinx php on the client side, which itself depends on the sphinx development files.
indexes up to date. scripts/sphinx-cron.sh should be called by cron
every 5 minutes, for example. scripts/sphinx.sh is an init.d script See plugins/SphinxSearch/README for more details and server setup.
to start and stop the sphinx search daemon.
SMS SMS
--- ---
@ -1168,17 +1164,6 @@ base: memcached uses key-value pairs to store data. We build long,
StatusNet site using your memcached server. StatusNet site using your memcached server.
port: Port to connect to; defaults to 11211. port: Port to connect to; defaults to 11211.
sphinx
------
You can get a significant boost in performance using Sphinx Search
instead of your database server to search for users and notices.
<http://sphinxsearch.com/>.
enabled: Set to true to enable. Default false.
server: a string with the hostname of the sphinx server.
port: an integer with the port number of the sphinx server.
emailpost emailpost
--------- ---------

View File

@ -104,7 +104,7 @@ class NoticesearchAction extends SearchAction
{ {
$notice = new Notice(); $notice = new Notice();
$search_engine = $notice->getSearchEngine('identica_notices'); $search_engine = $notice->getSearchEngine('notice');
$search_engine->set_sort_mode('chron'); $search_engine->set_sort_mode('chron');
// Ask for an extra to see if there's more. // Ask for an extra to see if there's more.
$search_engine->limit((($page-1)*NOTICES_PER_PAGE), NOTICES_PER_PAGE + 1); $search_engine->limit((($page-1)*NOTICES_PER_PAGE), NOTICES_PER_PAGE + 1);

View File

@ -62,7 +62,7 @@ class NoticesearchrssAction extends Rss10Action
$notice = new Notice(); $notice = new Notice();
$search_engine = $notice->getSearchEngine('identica_notices'); $search_engine = $notice->getSearchEngine('notice');
$search_engine->set_sort_mode('chron'); $search_engine->set_sort_mode('chron');
if (!$limit) $limit = 20; if (!$limit) $limit = 20;

View File

@ -61,7 +61,7 @@ class PeoplesearchAction extends SearchAction
function showResults($q, $page) function showResults($q, $page)
{ {
$profile = new Profile(); $profile = new Profile();
$search_engine = $profile->getSearchEngine('identica_people'); $search_engine = $profile->getSearchEngine('profile');
$search_engine->set_sort_mode('chron'); $search_engine->set_sort_mode('chron');
// Ask for an extra to see if there's more. // Ask for an extra to see if there's more.
$search_engine->limit((($page-1)*PROFILES_PER_PAGE), PROFILES_PER_PAGE + 1); $search_engine->limit((($page-1)*PROFILES_PER_PAGE), PROFILES_PER_PAGE + 1);

View File

@ -161,7 +161,7 @@ class TwitapisearchatomAction extends ApiAction
// lcase it for comparison // lcase it for comparison
$q = strtolower($this->query); $q = strtolower($this->query);
$search_engine = $notice->getSearchEngine('identica_notices'); $search_engine = $notice->getSearchEngine('notice');
$search_engine->set_sort_mode('chron'); $search_engine->set_sort_mode('chron');
$search_engine->limit(($this->page - 1) * $this->rpp, $search_engine->limit(($this->page - 1) * $this->rpp,
$this->rpp + 1, true); $this->rpp + 1, true);

View File

@ -121,7 +121,7 @@ class TwitapisearchjsonAction extends ApiAction
// lcase it for comparison // lcase it for comparison
$q = strtolower($this->query); $q = strtolower($this->query);
$search_engine = $notice->getSearchEngine('identica_notices'); $search_engine = $notice->getSearchEngine('notice');
$search_engine->set_sort_mode('chron'); $search_engine->set_sort_mode('chron');
$search_engine->limit(($this->page - 1) * $this->rpp, $this->rpp + 1, true); $search_engine->limit(($this->page - 1) * $this->rpp, $this->rpp + 1, true);
if (false === $search_engine->query($q)) { if (false === $search_engine->query($q)) {

View File

@ -184,27 +184,20 @@ class Memcached_DataObject extends DB_DataObject
require_once INSTALLDIR.'/lib/search_engines.php'; require_once INSTALLDIR.'/lib/search_engines.php';
static $search_engine; static $search_engine;
if (!isset($search_engine)) { if (!isset($search_engine)) {
$connected = false; if (Event::handle('GetSearchEngine', array($this, $table, &$search_engine))) {
if (common_config('sphinx', 'enabled')) { if ('mysql' === common_config('db', 'type')) {
$search_engine = new SphinxSearch($this, $table); $type = common_config('search', 'type');
$connected = $search_engine->is_connected(); if ($type == 'like') {
} $search_engine = new MySQLLikeSearch($this, $table);
} else if ($type == 'fulltext') {
// unable to connect to sphinx' search daemon $search_engine = new MySQLSearch($this, $table);
if (!$connected) {
if ('mysql' === common_config('db', 'type')) {
$type = common_config('search', 'type');
if ($type == 'like') {
$search_engine = new MySQLLikeSearch($this, $table);
} else if ($type == 'fulltext') {
$search_engine = new MySQLSearch($this, $table);
} else {
throw new ServerException('Unknown search type: ' . $type);
}
} else { } else {
$search_engine = new PGSearch($this, $table); throw new ServerException('Unknown search type: ' . $type);
} }
} else {
$search_engine = new PGSearch($this, $table);
} }
}
} }
return $search_engine; return $search_engine;
} }

View File

@ -57,14 +57,16 @@ class Status_network extends DB_DataObject
$config['db']['ini_'.$dbname] = INSTALLDIR.'/classes/status_network.ini'; $config['db']['ini_'.$dbname] = INSTALLDIR.'/classes/status_network.ini';
$config['db']['table_status_network'] = $dbname; $config['db']['table_status_network'] = $dbname;
self::$cache = new Memcache(); if (class_exists('Memcache')) {
self::$cache = new Memcache();
if (is_array($servers)) { if (is_array($servers)) {
foreach($servers as $server) { foreach($servers as $server) {
self::$cache->addServer($server); self::$cache->addServer($server);
}
} else {
self::$cache->addServer($servers);
} }
} else {
self::$cache->addServer($servers);
} }
self::$base = $dbname; self::$base = $dbname;
@ -76,6 +78,10 @@ class Status_network extends DB_DataObject
static function memGet($k, $v) static function memGet($k, $v)
{ {
if (!self::$cache) {
return self::staticGet($k, $v);
}
$ck = self::cacheKey($k, $v); $ck = self::cacheKey($k, $v);
$sn = self::$cache->get($ck); $sn = self::$cache->get($ck);
@ -92,10 +98,12 @@ class Status_network extends DB_DataObject
function decache() function decache()
{ {
$keys = array('nickname', 'hostname', 'pathname'); if (self::$cache) {
foreach ($keys as $k) { $keys = array('nickname', 'hostname', 'pathname');
$ck = self::cacheKey($k, $this->$k); foreach ($keys as $k) {
self::$cache->delete($ck); $ck = self::cacheKey($k, $this->$k);
self::$cache->delete($ck);
}
} }
} }

View File

@ -125,10 +125,6 @@ $default =
'public' => array()), # JIDs of users who want to receive the public stream 'public' => array()), # JIDs of users who want to receive the public stream
'invite' => 'invite' =>
array('enabled' => true), array('enabled' => true),
'sphinx' =>
array('enabled' => false,
'server' => 'localhost',
'port' => 3312),
'tag' => 'tag' =>
array('dropoff' => 864000.0), array('dropoff' => 864000.0),
'popular' => 'popular' =>

View File

@ -46,70 +46,11 @@ class SearchEngine
} }
} }
class SphinxSearch extends SearchEngine
{
private $sphinx;
private $connected;
function __construct($target, $table)
{
$fp = @fsockopen(common_config('sphinx', 'server'), common_config('sphinx', 'port'));
if (!$fp) {
$this->connected = false;
return;
}
fclose($fp);
parent::__construct($target, $table);
$this->sphinx = new SphinxClient;
$this->sphinx->setServer(common_config('sphinx', 'server'), common_config('sphinx', 'port'));
$this->connected = true;
}
function is_connected()
{
return $this->connected;
}
function limit($offset, $count, $rss = false)
{
//FIXME without LARGEST_POSSIBLE, the most recent results aren't returned
// this probably has a large impact on performance
$LARGEST_POSSIBLE = 1e6;
if ($rss) {
$this->sphinx->setLimits($offset, $count, $count, $LARGEST_POSSIBLE);
}
else {
// return at most 50 pages of results
$this->sphinx->setLimits($offset, $count, 50 * ($count - 1), $LARGEST_POSSIBLE);
}
return $this->target->limit(0, $count);
}
function query($q)
{
$result = $this->sphinx->query($q, $this->table);
if (!isset($result['matches'])) return false;
$id_set = join(', ', array_keys($result['matches']));
$this->target->whereAdd("id in ($id_set)");
return true;
}
function set_sort_mode($mode)
{
if ('chron' === $mode) {
$this->sphinx->SetSortMode(SPH_SORT_ATTR_DESC, 'created_ts');
return $this->target->orderBy('created desc');
}
}
}
class MySQLSearch extends SearchEngine class MySQLSearch extends SearchEngine
{ {
function query($q) function query($q)
{ {
if ('identica_people' === $this->table) { if ('profile' === $this->table) {
$this->target->whereAdd('MATCH(nickname, fullname, location, bio, homepage) ' . $this->target->whereAdd('MATCH(nickname, fullname, location, bio, homepage) ' .
'AGAINST (\''.addslashes($q).'\' IN BOOLEAN MODE)'); 'AGAINST (\''.addslashes($q).'\' IN BOOLEAN MODE)');
if (strtolower($q) != $q) { if (strtolower($q) != $q) {
@ -117,7 +58,7 @@ class MySQLSearch extends SearchEngine
'AGAINST (\''.addslashes(strtolower($q)).'\' IN BOOLEAN MODE)', 'OR'); 'AGAINST (\''.addslashes(strtolower($q)).'\' IN BOOLEAN MODE)', 'OR');
} }
return true; return true;
} else if ('identica_notices' === $this->table) { } else if ('notice' === $this->table) {
// Don't show imported notices // Don't show imported notices
$this->target->whereAdd('notice.is_local != ' . Notice::GATEWAY); $this->target->whereAdd('notice.is_local != ' . Notice::GATEWAY);
@ -143,13 +84,13 @@ class MySQLLikeSearch extends SearchEngine
{ {
function query($q) function query($q)
{ {
if ('identica_people' === $this->table) { if ('profile' === $this->table) {
$qry = sprintf('(nickname LIKE "%%%1$s%%" OR '. $qry = sprintf('(nickname LIKE "%%%1$s%%" OR '.
' fullname LIKE "%%%1$s%%" OR '. ' fullname LIKE "%%%1$s%%" OR '.
' location LIKE "%%%1$s%%" OR '. ' location LIKE "%%%1$s%%" OR '.
' bio LIKE "%%%1$s%%" OR '. ' bio LIKE "%%%1$s%%" OR '.
' homepage LIKE "%%%1$s%%")', addslashes($q)); ' homepage LIKE "%%%1$s%%")', addslashes($q));
} else if ('identica_notices' === $this->table) { } else if ('notice' === $this->table) {
$qry = sprintf('content LIKE "%%%1$s%%"', addslashes($q)); $qry = sprintf('content LIKE "%%%1$s%%"', addslashes($q));
} else { } else {
throw new ServerException('Unknown table: ' . $this->table); throw new ServerException('Unknown table: ' . $this->table);
@ -165,9 +106,9 @@ class PGSearch extends SearchEngine
{ {
function query($q) function query($q)
{ {
if ('identica_people' === $this->table) { if ('profile' === $this->table) {
return $this->target->whereAdd('textsearch @@ plainto_tsquery(\''.addslashes($q).'\')'); return $this->target->whereAdd('textsearch @@ plainto_tsquery(\''.addslashes($q).'\')');
} else if ('identica_notices' === $this->table) { } else if ('notice' === $this->table) {
// XXX: We need to filter out gateway notices (notice.is_local = -2) --Zach // XXX: We need to filter out gateway notices (notice.is_local = -2) --Zach

View File

@ -0,0 +1,45 @@
You can get a significant boost in performance using Sphinx Search
instead of your database server to search for users and notices.
<http://sphinxsearch.com/>.
Configuration
-------------
In StatusNet's configuration, you can adjust the following settings
under 'sphinx':
enabled: Set to true to enable. Default false.
server: a string with the hostname of the sphinx server.
port: an integer with the port number of the sphinx server.
Requirements
------------
To use a Sphinx server to search users and notices, you also need
to install, compile and enable the sphinx pecl extension for php on the
client side, which itself depends on the sphinx development files.
"pecl install sphinx" should take care of that. Add "extension=sphinx.so"
to your php.ini and reload apache to enable it.
You can update your MySQL or Postgresql databases to drop their fulltext
search indexes, since they're now provided by sphinx.
You will also need a Sphinx server to serve the search queries.
On the sphinx server side, a script reads the main database and build
the keyword index. A cron job reads the database and keeps the sphinx
indexes up to date. scripts/sphinx-cron.sh should be called by cron
every 5 minutes, for example. scripts/sphinx.sh is an init.d script
to start and stop the sphinx search daemon.
Server configuration
--------------------
scripts/gen_config.php can generate a sphinx.conf file listing MySQL
data sources for your databases. You may need to tweak paths afterwards.
$ plugins/SphinxSearch/scripts/gen_config.php > sphinx.conf
If you wish, you can build a full config yourself based on sphinx.conf.sample

View File

@ -0,0 +1,100 @@
<?php
/**
* StatusNet, the distributed open-source microblogging tool
*
* PHP version 5
*
* LICENCE: This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* @category Plugin
* @package StatusNet
* @author Brion Vibber <brion@status.net>
* @copyright 2009 Control Yourself, Inc.
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
* @link http://laconi.ca/
*/
if (!defined('STATUSNET')) {
exit(1);
}
// Set defaults if not already set in the config array...
global $config;
$sphinxDefaults =
array('enabled' => true,
'server' => 'localhost',
'port' => 3312);
foreach($sphinxDefaults as $key => $val) {
if (!isset($config['sphinx'][$key])) {
$config['sphinx'][$key] = $val;
}
}
/**
* Plugin for Sphinx search backend.
*
* @category Plugin
* @package StatusNet
* @author Brion Vibber <brion@status.net>
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
* @link http://laconi.ca/
* @link http://twitter.com/
*/
class SphinxSearchPlugin extends Plugin
{
/**
* Automatically load any classes used
*
* @param string $cls the class
* @return boolean hook return
*/
function onAutoload($cls)
{
switch ($cls) {
case 'SphinxSearch':
include_once INSTALLDIR . '/plugins/SphinxSearch/' .
strtolower($cls) . '.php';
return false;
default:
return true;
}
}
/**
* Create sphinx search engine object for the given table type.
*
* @param Memcached_DataObject $target
* @param string $table
* @param out &$search_engine SearchEngine object on output if successful
* @ return boolean hook return
*/
function onGetSearchEngine(Memcached_DataObject $target, $table, &$search_engine)
{
if (common_config('sphinx', 'enabled')) {
if (!class_exists('SphinxClient')) {
throw new ServerException('Sphinx PHP extension must be installed.');
}
$engine = new SphinxSearch($target, $table);
if ($engine->is_connected()) {
$search_engine = $engine;
return false;
}
}
// Sphinx disabled or disconnected
return true;
}
}

View File

@ -0,0 +1,126 @@
#!/usr/bin/env php
<?php
/*
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2009, StatusNet, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../..'));
$longoptions = array('base=', 'network');
$helptext = <<<END_OF_TRIM_HELP
Generates sphinx.conf file based on StatusNet configuration.
--base Base dir to Sphinx install
(default /usr/local)
--network Use status_network global config table
(non-functional at present)
END_OF_TRIM_HELP;
require_once INSTALLDIR . '/scripts/commandline.inc';
require dirname(__FILE__) . '/sphinx-utils.php';
$timestamp = date('r');
print <<<END
#
# Sphinx configuration for StatusNet
# Generated {$timestamp}
#
END;
sphinx_iterate_sites('sphinx_site_template');
print <<<END
indexer
{
mem_limit = 300M
}
searchd
{
port = 3312
log = {$base}/log/searchd.log
query_log = {$base}/log/query.log
read_timeout = 5
max_children = 30
pid_file = {$base}/log/searchd.pid
max_matches = 1000
seamless_rotate = 1
preopen_indexes = 0
unlink_old = 1
}
END;
/**
* Build config entries for a single site
* @fixme we only seem to have master DB currently available...
*/
function sphinx_site_template($sn)
{
return
sphinx_template($sn,
'profile',
'SELECT id, UNIX_TIMESTAMP(created) as created_ts, nickname, fullname, location, bio, homepage FROM profile',
'SELECT * FROM profile where id = $id') .
sphinx_template($sn,
'notice',
'SELECT id, UNIX_TIMESTAMP(created) as created_ts, content FROM notice',
'SELECT * FROM notice where notice.id = $id AND notice.is_local != -2');
}
function sphinx_template($sn, $table, $query, $query_info)
{
$base = sphinx_base();
$dbtype = common_config('db', 'type');
print <<<END
#
# {$sn->sitename}
#
source {$sn->dbname}_src_{$table}
{
type = {$dbtype}
sql_host = {$sn->dbhost}
sql_user = {$sn->dbuser}
sql_pass = {$sn->dbpass}
sql_db = {$sn->dbname}
sql_query_pre = SET NAMES utf8;
sql_query = {$query}
sql_query_info = {$query_info}
sql_attr_timestamp = created_ts
}
index {$sn->dbname}_{$table}
{
source = {$sn->dbname}_src_{$table}
path = {$base}/data/{$sn->dbname}_{$table}
docinfo = extern
charset_type = utf-8
min_word_len = 3
}
END;
}

View File

@ -0,0 +1,61 @@
#!/usr/bin/env php
<?php
/*
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2009, StatusNet, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../..'));
$longoptions = array('base=', 'network');
$helptext = <<<END_OF_TRIM_HELP
Runs Sphinx search indexer.
--rotate Have Sphinx run index update in background and
rotate updated indexes into place as they finish.
--base Base dir to Sphinx install
(default /usr/local)
--network Use status_network global config table for site list
(non-functional at present)
END_OF_TRIM_HELP;
require_once INSTALLDIR . '/scripts/commandline.inc';
require dirname(__FILE__) . '/sphinx-utils.php';
sphinx_iterate_sites('sphinx_index_update');
function sphinx_index_update($sn)
{
$base = sphinx_base();
$baseIndexes = array('notice', 'profile');
$params = array();
if (have_option('rotate')) {
$params[] = '--rotate';
}
foreach ($baseIndexes as $index) {
$params[] = "{$sn->dbname}_{$index}";
}
$params = implode(' ', $params);
$cmd = "$base/bin/indexer --config $base/etc/sphinx.conf $params";
print "$cmd\n";
system($cmd);
}

View File

@ -0,0 +1,63 @@
<?php
/*
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2009, StatusNet, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
function sphinx_use_network()
{
return have_option('network');
}
function sphinx_base()
{
if (have_option('base')) {
return get_option_value('base');
} else {
return "/usr/local/sphinx";
}
}
function sphinx_iterate_sites($callback)
{
if (sphinx_use_network()) {
// @fixme this should use, like, some kind of config
Status_network::setupDB('localhost', 'statusnet', 'statuspass', 'statusnet');
$sn = new Status_network();
if (!$sn->find()) {
die("Confused... no sites in status_network table or lookup failed.\n");
}
while ($sn->fetch()) {
$callback($sn);
}
} else {
if (preg_match('!^(mysqli?|pgsql)://(.*?):(.*?)@(.*?)/(.*?)$!',
common_config('db', 'database'), $matches)) {
list(/*all*/, $dbtype, $dbuser, $dbpass, $dbhost, $dbname) = $matches;
$sn = (object)array(
'sitename' => common_config('site', 'name'),
'dbhost' => $dbhost,
'dbuser' => $dbuser,
'dbpass' => $dbpass,
'dbname' => $dbname);
$callback($sn);
} else {
print "Unrecognized database configuration string in config.php\n";
exit(1);
}
}
}

View File

@ -0,0 +1,96 @@
<?php
/*
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2008, 2009, StatusNet, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
if (!defined('STATUSNET')) {
exit(1);
}
class SphinxSearch extends SearchEngine
{
private $sphinx;
private $connected;
function __construct($target, $table)
{
$fp = @fsockopen(common_config('sphinx', 'server'), common_config('sphinx', 'port'));
if (!$fp) {
$this->connected = false;
return;
}
fclose($fp);
parent::__construct($target, $table);
$this->sphinx = new SphinxClient;
$this->sphinx->setServer(common_config('sphinx', 'server'), common_config('sphinx', 'port'));
$this->connected = true;
}
function is_connected()
{
return $this->connected;
}
function limit($offset, $count, $rss = false)
{
//FIXME without LARGEST_POSSIBLE, the most recent results aren't returned
// this probably has a large impact on performance
$LARGEST_POSSIBLE = 1e6;
if ($rss) {
$this->sphinx->setLimits($offset, $count, $count, $LARGEST_POSSIBLE);
}
else {
// return at most 50 pages of results
$this->sphinx->setLimits($offset, $count, 50 * ($count - 1), $LARGEST_POSSIBLE);
}
return $this->target->limit(0, $count);
}
function query($q)
{
$result = $this->sphinx->query($q, $this->remote_table());
if (!isset($result['matches'])) return false;
$id_set = join(', ', array_keys($result['matches']));
$this->target->whereAdd("id in ($id_set)");
return true;
}
function set_sort_mode($mode)
{
if ('chron' === $mode) {
$this->sphinx->SetSortMode(SPH_SORT_ATTR_DESC, 'created_ts');
return $this->target->orderBy('created desc');
}
}
function remote_table()
{
return $this->dbname() . '_' . $this->table;
}
function dbname()
{
// @fixme there should be a less dreadful way to do this.
// DB objects won't give database back until they connect, it's confusing
if (preg_match('!^.*?://.*?:.*?@.*?/(.*?)$!', common_config('db', 'database'), $matches)) {
return $matches[1];
}
throw new ServerException("Sphinx search could not identify database name");
}
}

View File

@ -1,24 +0,0 @@
#!/bin/sh
# StatusNet - a distributed open-source microblogging tool
# Copyright (C) 2008, 2009, StatusNet, Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# This program tries to start the daemons for StatusNet.
# Note that the 'maildaemon' needs to run as a mail filter.
/usr/local/bin/indexer --config /usr/local/etc/sphinx.conf --all --rotate

View File

@ -1,24 +0,0 @@
#!/bin/sh
# StatusNet - a distributed open-source microblogging tool
# Copyright (C) 2008, 2009, StatusNet, Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# This program tries to start the daemons for StatusNet.
# Note that the 'maildaemon' needs to run as a mail filter.
/usr/local/bin/indexer --config /usr/local/etc/sphinx.conf --all