From 80ad09dd3b216c44b51f71582674b6fceb3cf684 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Tue, 20 Dec 2011 12:24:22 -0500 Subject: [PATCH 01/47] Initial version gets score at end of notice save --- ActivitySpamPlugin.php | 163 +++++++++++++++++++++++++++++++++++++++++ Spam_score.php | 96 ++++++++++++++++++++++++ 2 files changed, 259 insertions(+) create mode 100644 ActivitySpamPlugin.php create mode 100644 Spam_score.php diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php new file mode 100644 index 0000000000..473ba9ded0 --- /dev/null +++ b/ActivitySpamPlugin.php @@ -0,0 +1,163 @@ +. + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2011 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + // This check helps protect against security problems; + // your code file can't be executed directly from the web. + exit(1); +} + +/** + * Check new notices with activity spam service. + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2011 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ +class ActivitySpamPlugin extends Plugin +{ + public $server = null; + + public $username = null; + public $password = null; + + /** + * Initializer + * + * @return boolean hook value; true means continue processing, false means stop. + */ + function initialize() + { + foreach (array('username', 'password', 'server') as $attr) { + if (!$this->$attr) { + $this->$attr = common_config('activityspam', $attr); + } + } + + return true; + } + + /** + * Database schema setup + * + * @see Schema + * @see ColumnDef + * + * @return boolean hook value; true means continue processing, false means stop. + */ + + function onCheckSchema() + { + $this->log(LOG_INFO, "Checking schema"); + + $schema = Schema::get(); + $schema->ensureTable('spam_score', Spam_score::schemaDef()); + + $this->log(LOG_INFO, "Checked schema"); + + return true; + } + + /** + * Load related modules when needed + * + * @param string $cls Name of the class to be loaded + * + * @return boolean hook value; true means continue processing, false means stop. + */ + + function onAutoload($cls) + { + $dir = dirname(__FILE__); + + switch ($cls) + { + case 'Spam_score': + include_once $dir . '/'.$cls.'.php'; + return false; + default: + return true; + } + } + + /** + * This should probably be done in its own queue handler + */ + + function onEndNoticeSave($notice) + { + // FIXME: need this to autoload ActivityStreamsMediaLink + $doc = new ActivityStreamJSONDocument(); + + $activity = $notice->asActivity(null); + + $client = new HTTPClient($this->server . "/is-this-spam"); + + $client->setMethod('POST'); + $client->setAuth($this->username, $this->password); + $client->setHeader('Content-Type', 'application/json'); + $client->setBody(json_encode($activity->asArray())); + + $response = $client->send(); + + if (!$response->isOK()) { + $this->log(LOG_ERR, "Error " . $response->getStatus() . " checking spam score: " . $response->getBody()); + return true; + } + + $result = json_decode($response->getBody()); + + $score = new Spam_score(); + + $score->notice_id = $notice->id; + $score->score = $result->probability; + $score->created = common_sql_now(); + + $score->insert(); + + $this->log(LOG_INFO, "Notice " . $notice->id . " has spam score " . $score->score); + + return true; + } + + function onPluginVersion(&$versions) + { + $versions[] = array('name' => 'ActivitySpam', + 'version' => STATUSNET_VERSION, + 'author' => 'Evan Prodromou', + 'homepage' => 'http://status.net/wiki/Plugin:ActivitySpam', + 'description' => + _m('Test notices against the Activity Spam service.')); + return true; + } +} diff --git a/Spam_score.php b/Spam_score.php new file mode 100644 index 0000000000..fde2876d0c --- /dev/null +++ b/Spam_score.php @@ -0,0 +1,96 @@ +. + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2011 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * Score of a notice per the activity spam service + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3 + * @link http://status.net/ + * + * @see DB_DataObject + */ + +class Spam_score extends Managed_DataObject +{ + public $__table = 'spam_score'; // table name + + public $notice_id; // int + public $score; // float + public $created; // datetime + + /** + * Get an instance by key + * + * @param string $k Key to use to lookup (usually 'notice_id' for this class) + * @param mixed $v Value to lookup + * + * @return Spam_score object found, or null for no hits + * + */ + function staticGet($k, $v=null) + { + return Managed_DataObject::staticGet('Spam_score', $k, $v); + } + + /** + * The One True Thingy that must be defined and declared. + */ + public static function schemaDef() + { + return array( + 'description' => 'score of the notice per activityspam', + 'fields' => array( + 'notice_id' => array('type' => 'int', + 'not null' => true, + 'description' => 'notice getting scored'), + 'score' => array('type' => 'double', + 'not null' => true, + 'description' => 'score for the notice (0.0, 1.0)'), + 'created' => array('type' => 'datetime', + 'not null' => true, + 'description' => 'date this record was created'), + ), + 'primary key' => array('notice_id'), + 'foreign keys' => array( + 'spam_score_notice_id_fkey' => array('notice', array('notice_id' => 'id')), + ), + 'indexes' => array( + 'spam_score_created_idx' => array('created'), + ), + ); + } +} From 24feeefcc383feb8019f1badd9159add9521d683 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Tue, 20 Dec 2011 12:59:40 -0500 Subject: [PATCH 02/47] remove unnecessary log() calls --- ActivitySpamPlugin.php | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index 473ba9ded0..5fe5360fde 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -78,13 +78,9 @@ class ActivitySpamPlugin extends Plugin function onCheckSchema() { - $this->log(LOG_INFO, "Checking schema"); - $schema = Schema::get(); $schema->ensureTable('spam_score', Spam_score::schemaDef()); - $this->log(LOG_INFO, "Checked schema"); - return true; } From 77ab07920fa14f39bc21c6a48726072f80ccbde0 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Fri, 2 Mar 2012 10:20:01 -0600 Subject: [PATCH 03/47] more upgrade-friendly scores --- ActivitySpamPlugin.php | 21 +++++++++----- Spam_score.php | 64 ++++++++++++++++++++++++------------------ 2 files changed, 50 insertions(+), 35 deletions(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index 5fe5360fde..62cfbda22f 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -1,7 +1,7 @@ - * @copyright 2011 StatusNet, Inc. + * @copyright 2011,2012 StatusNet, Inc. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 * @link http://status.net/ */ @@ -40,7 +40,7 @@ if (!defined('STATUSNET')) { * @category Spam * @package StatusNet * @author Evan Prodromou - * @copyright 2011 StatusNet, Inc. + * @copyright 2011,2012 StatusNet, Inc. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 * @link http://status.net/ */ @@ -107,7 +107,11 @@ class ActivitySpamPlugin extends Plugin } /** - * This should probably be done in its own queue handler + * When a notice is saved, check its spam score + * + * @param Notice $notice Notice that was just saved + * + * @return boolean hook value; true means continue processing, false means stop. */ function onEndNoticeSave($notice) @@ -135,9 +139,12 @@ class ActivitySpamPlugin extends Plugin $score = new Spam_score(); - $score->notice_id = $notice->id; - $score->score = $result->probability; - $score->created = common_sql_now(); + $score->notice_id = $notice->id; + $score->score = $result->probability; + $score->is_spam = $result->isSpam; + $score->scaled = (int) ($result->probability * Spam_score::MAX_SCALED); + $score->created = common_sql_now(); + $score->notice_created = $notice->created; $score->insert(); diff --git a/Spam_score.php b/Spam_score.php index fde2876d0c..440edb6b7f 100644 --- a/Spam_score.php +++ b/Spam_score.php @@ -1,32 +1,32 @@ . - * - * @category Spam - * @package StatusNet - * @author Evan Prodromou - * @copyright 2011 StatusNet, Inc. - * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 - * @link http://status.net/ - */ + /** + * StatusNet - the distributed open-source microblogging tool + * Copyright (C) 2011, StatusNet, Inc. + * + * Score of a notice by activity spam service + * + * PHP version 5 + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2011 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ if (!defined('STATUSNET')) { exit(1); @@ -46,6 +46,7 @@ if (!defined('STATUSNET')) { class Spam_score extends Managed_DataObject { + const MAX_SCALED = 1000000; public $__table = 'spam_score'; // table name public $notice_id; // int @@ -80,9 +81,15 @@ class Spam_score extends Managed_DataObject 'score' => array('type' => 'double', 'not null' => true, 'description' => 'score for the notice (0.0, 1.0)'), + 'scaled' => array('type' => 'int', + 'description' => 'scaled score for the notice (0, 1000000)'), + 'is_spam' => array('type' => 'tinyint', + 'description' => 'flag for spamosity'), 'created' => array('type' => 'datetime', 'not null' => true, 'description' => 'date this record was created'), + 'notice_created' => array('type' => 'datetime', + 'description' => 'date the notice was created'), ), 'primary key' => array('notice_id'), 'foreign keys' => array( @@ -90,6 +97,7 @@ class Spam_score extends Managed_DataObject ), 'indexes' => array( 'spam_score_created_idx' => array('created'), + 'spam_score_scaled_idx' => array('scaled'), ), ); } From fb64cb63b6ba54966cc2784c5e5bba0e7483e28e Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Fri, 2 Mar 2012 10:37:20 -0600 Subject: [PATCH 04/47] upgrade script for spam score --- ActivitySpamPlugin.php | 4 ++- Spam_score.php | 62 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index 62cfbda22f..63ac94ef5f 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -81,6 +81,8 @@ class ActivitySpamPlugin extends Plugin $schema = Schema::get(); $schema->ensureTable('spam_score', Spam_score::schemaDef()); + Spam_score::upgrade(); + return true; } @@ -142,7 +144,7 @@ class ActivitySpamPlugin extends Plugin $score->notice_id = $notice->id; $score->score = $result->probability; $score->is_spam = $result->isSpam; - $score->scaled = (int) ($result->probability * Spam_score::MAX_SCALED); + $score->scaled = Spam_score::scale($score->score); $score->created = common_sql_now(); $score->notice_created = $notice->created; diff --git a/Spam_score.php b/Spam_score.php index 440edb6b7f..32eaba803a 100644 --- a/Spam_score.php +++ b/Spam_score.php @@ -46,7 +46,7 @@ if (!defined('STATUSNET')) { class Spam_score extends Managed_DataObject { - const MAX_SCALED = 1000000; + const MAX_SCALED = 10000; public $__table = 'spam_score'; // table name public $notice_id; // int @@ -82,7 +82,7 @@ class Spam_score extends Managed_DataObject 'not null' => true, 'description' => 'score for the notice (0.0, 1.0)'), 'scaled' => array('type' => 'int', - 'description' => 'scaled score for the notice (0, 1000000)'), + 'description' => 'scaled score for the notice (0, 10000)'), 'is_spam' => array('type' => 'tinyint', 'description' => 'flag for spamosity'), 'created' => array('type' => 'datetime', @@ -101,4 +101,62 @@ class Spam_score extends Managed_DataObject ), ); } + + public static function upgrade() + { + Spam_score::upgradeScaled(); + Spam_score::upgradeIsSpam(); + Spam_score::upgradeNoticeCreated(); + } + + protected static function upgradeScaled() + { + $score = new Spam_score(); + $score->whereAdd('scaled IS NULL'); + + if ($score->find()) { + while ($score->fetch()) { + $orig = clone($score); + $score->scaled = Spam_score::scale($score->score); + $score->update($orig); + } + } + } + + protected static function upgradeIsSpam() + { + $score = new Spam_score(); + $score->whereAdd('is_spam IS NULL'); + + if ($score->find()) { + while ($score->fetch()) { + $orig = clone($score); + $score->is_spam = ($score->score >= 0.90) ? 1 : 0; + $score->update($orig); + } + } + } + + protected static function upgradeNoticeCreated() + { + $score = new Spam_score(); + $score->whereAdd('notice_created IS NULL'); + + if ($score->find()) { + while ($score->fetch()) { + $notice = Notice::staticGet('id', $score->notice_id); + if (!empty($notice)) { + $orig = clone($score); + $score->notice_created = $notice->created; + $score->update($orig); + } + } + } + } + + protected static function scale($score) + { + $raw = round($score * Spam_score::MAX_SCALE); + return max(0, min(Spam_score::MAX_SCALE, $raw)); + } } From ec7a3c80e565831e5ffad3f690017583e92b8507 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 4 Mar 2012 08:49:07 -0600 Subject: [PATCH 05/47] fix MAX_SCALE constant --- Spam_score.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Spam_score.php b/Spam_score.php index 32eaba803a..4d2aef4152 100644 --- a/Spam_score.php +++ b/Spam_score.php @@ -46,7 +46,7 @@ if (!defined('STATUSNET')) { class Spam_score extends Managed_DataObject { - const MAX_SCALED = 10000; + const MAX_SCALE = 10000; public $__table = 'spam_score'; // table name public $notice_id; // int From 1d46a1288f3b0fb25bd851272ed320d806330d49 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 5 Mar 2012 09:09:14 -0600 Subject: [PATCH 06/47] delete spam score on deleted notice --- ActivitySpamPlugin.php | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index 63ac94ef5f..6c1f8df472 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -155,6 +155,14 @@ class ActivitySpamPlugin extends Plugin return true; } + function onNoticeDeleteRelated($notice) { + $score = Spam_score::staticGet('notice_id', $notice->id); + if (!empty($score)) { + $score->delete(); + } + return true; + } + function onPluginVersion(&$versions) { $versions[] = array('name' => 'ActivitySpam', From 8a1911322f548fd45b83eb81583ca8d9592aebfd Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 5 Mar 2012 09:32:25 -0600 Subject: [PATCH 07/47] make scale() public --- Spam_score.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Spam_score.php b/Spam_score.php index 4d2aef4152..c815b42e36 100644 --- a/Spam_score.php +++ b/Spam_score.php @@ -154,7 +154,7 @@ class Spam_score extends Managed_DataObject } } - protected static function scale($score) + public static function scale($score) { $raw = round($score * Spam_score::MAX_SCALE); return max(0, min(Spam_score::MAX_SCALE, $raw)); From 1121b101289774a4e9251542ec0b25624e3b02a7 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 5 Mar 2012 09:58:57 -0600 Subject: [PATCH 08/47] New SpamFilter class --- ActivitySpamPlugin.php | 41 ++++------- Spam_score.php | 16 +++++ spamfilter.php | 156 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 184 insertions(+), 29 deletions(-) create mode 100644 spamfilter.php diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index 6c1f8df472..fcc6673ce2 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -47,7 +47,6 @@ if (!defined('STATUSNET')) { class ActivitySpamPlugin extends Plugin { public $server = null; - public $username = null; public $password = null; @@ -64,6 +63,8 @@ class ActivitySpamPlugin extends Plugin } } + $this->filter = new SpamFilter($this->server, $this->username, $this->password); + return true; } @@ -103,6 +104,9 @@ class ActivitySpamPlugin extends Plugin case 'Spam_score': include_once $dir . '/'.$cls.'.php'; return false; + case 'SpamFilter': + include_once $dir . '/'.strtolower($cls).'.php'; + return false; default: return true; } @@ -118,40 +122,19 @@ class ActivitySpamPlugin extends Plugin function onEndNoticeSave($notice) { - // FIXME: need this to autoload ActivityStreamsMediaLink - $doc = new ActivityStreamJSONDocument(); + try { - $activity = $notice->asActivity(null); + $result = $this->filter->test($notice); - $client = new HTTPClient($this->server . "/is-this-spam"); + $score = Spam_score::saveNew($notice, $result); - $client->setMethod('POST'); - $client->setAuth($this->username, $this->password); - $client->setHeader('Content-Type', 'application/json'); - $client->setBody(json_encode($activity->asArray())); + $this->log(LOG_INFO, "Notice " . $notice->id . " has spam score " . $score->score); - $response = $client->send(); - - if (!$response->isOK()) { - $this->log(LOG_ERR, "Error " . $response->getStatus() . " checking spam score: " . $response->getBody()); - return true; + } catch (Exception $e) { + // Log but continue + $this->log(LOG_ERR, $e->getMessage()); } - $result = json_decode($response->getBody()); - - $score = new Spam_score(); - - $score->notice_id = $notice->id; - $score->score = $result->probability; - $score->is_spam = $result->isSpam; - $score->scaled = Spam_score::scale($score->score); - $score->created = common_sql_now(); - $score->notice_created = $notice->created; - - $score->insert(); - - $this->log(LOG_INFO, "Notice " . $notice->id . " has spam score " . $score->score); - return true; } diff --git a/Spam_score.php b/Spam_score.php index c815b42e36..08887d06b3 100644 --- a/Spam_score.php +++ b/Spam_score.php @@ -67,6 +67,22 @@ class Spam_score extends Managed_DataObject return Managed_DataObject::staticGet('Spam_score', $k, $v); } + function saveNew($notice, $result) { + + $score = new Spam_score(); + + $score->notice_id = $notice->id; + $score->score = $result->probability; + $score->is_spam = $result->isSpam; + $score->scaled = Spam_score::scale($score->score); + $score->created = common_sql_now(); + $score->notice_created = $notice->created; + + $score->insert(); + + return $score; + } + /** * The One True Thingy that must be defined and declared. */ diff --git a/spamfilter.php b/spamfilter.php new file mode 100644 index 0000000000..0e321ebc8b --- /dev/null +++ b/spamfilter.php @@ -0,0 +1,156 @@ +. + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2012 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + // This check helps protect against security problems; + // your code file can't be executed directly from the web. + exit(1); +} + +/** + * Spam filter class + * + * Local proxy for remote filter + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2012 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +class SpamFilter { + + const HAM = 'ham'; + const SPAM = 'spam'; + + public $server; + public $username; + public $password; + + function __construct($server, $username, $password) { + + $this->server = $server; + $this->username = $username; + $this->password = $password; + } + + protected function toActivity($notice) { + // FIXME: need this to autoload ActivityStreamsMediaLink + $doc = new ActivityStreamJSONDocument(); + + $activity = $notice->asActivity(null); + + return $activity; + } + + public function test($notice) { + + $activity = $this->toActivity($notice); + return $this->testActivity($activity); + } + + public function testActivity($activity) { + + $client = new HTTPClient($this->server . "/is-this-spam"); + + $client->setMethod('POST'); + $client->setAuth($this->username, $this->password); + $client->setHeader('Content-Type', 'application/json'); + $client->setBody(json_encode($activity->asArray())); + + $response = $client->send(); + + if (!$response->isOK()) { + throw new Exception("Error " . $response->getStatus() . " checking spam score: " . $response->getBody()); + } + + $result = json_decode($response->getBody()); + + return $result; + } + + public function train($notice, $category) { + + $activity = $this->toActivity($notice); + return $this->trainActivity($activity, $category); + + } + + public function trainActivity($activity, $category) { + + switch ($category) { + case self::HAM: + $endpoint = '/this-is-ham'; + break; + case self::SPAM: + $endpoint = '/this-is-spam'; + break; + default: + throw new Exception("Unknown category: " + $category); + } + + $client = new HTTPClient($this->server . $endpoint); + + $client->setMethod('POST'); + $client->setAuth($this->username, $this->password); + $client->setHeader('Content-Type', 'application/json'); + $client->setBody(json_encode($activity->asArray())); + + $response = $client->send(); + + if (!$response->isOK()) { + throw new Exception("Error " . $response->getStatus() . " checking spam score: " . $response->getBody()); + } + + // We don't do much with the results + return true; + } + + public function trainOnError($notice, $category) { + + $activity = $this->toActivity($notice); + + return $this->trainActivityOnError($activity, $category); + } + + public function trainActivityOnError($activity, $category) { + + $result = $this->testActivity($activity); + + if (($category === self::SPAM && $result->isSpam) || + ($category === self::HAM && !$result->isSpam)) { + return true; + } else { + return $this->trainActivity($activity, $category); + } + } +} From 4ae3e4aaaf0202d580e942dbd4843e41235ea3eb Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 07:24:28 -0600 Subject: [PATCH 09/47] Added forms to train notices as spam/ham --- ActivitySpamPlugin.php | 47 ++++++++++++ Spam_score.php | 27 +++++++ train.php | 165 +++++++++++++++++++++++++++++++++++++++++ trainhamform.php | 147 ++++++++++++++++++++++++++++++++++++ trainspamform.php | 147 ++++++++++++++++++++++++++++++++++++ 5 files changed, 533 insertions(+) create mode 100644 train.php create mode 100644 trainhamform.php create mode 100644 trainspamform.php diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index fcc6673ce2..8a6ee89b3f 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -50,6 +50,9 @@ class ActivitySpamPlugin extends Plugin public $username = null; public $password = null; + const REVIEWSPAM = 'ActivitySpamPlugin::REVIEWSPAM'; + const TRAINSPAM = 'ActivitySpamPlugin::TRAINSPAM'; + /** * Initializer * @@ -101,10 +104,15 @@ class ActivitySpamPlugin extends Plugin switch ($cls) { + case 'TrainAction': + include_once $dir . '/' . strtolower(mb_substr($cls, 0, -6)) . '.php'; + return false; case 'Spam_score': include_once $dir . '/'.$cls.'.php'; return false; case 'SpamFilter': + case 'TrainSpamForm': + case 'TrainHamForm': include_once $dir . '/'.strtolower($cls).'.php'; return false; default: @@ -146,6 +154,45 @@ class ActivitySpamPlugin extends Plugin return true; } + function onUserRightsCheck($profile, $right, &$result) { + switch ($right) { + case self::REVIEWSPAM: + case self::TRAINSPAM: + $result = ($profile->hasRole(Profile_role::MODERATOR) || $profile->hasRole('modhelper')); + return false; + default: + return true; + } + } + + function onGetSpamFilter(&$filter) { + $filter = $this->filter; + return false; + } + + function onEndShowNoticeOptions(&$nli) + { + $notice = $nli->notice; + $out = $nli->out; + + if (!empty($notice)) { + + $score = Spam_score::staticGet('notice_id', $notice->id); + + if (empty($score)) { + // XXX: show a question-mark or something + } else if ($score->is_spam) { + $form = new TrainHamForm($out, $notice); + $form->show(); + } else if (!$score->is_spam) { + $form = new TrainSpamForm($out, $notice); + $form->show(); + } + } + + return true; + } + function onPluginVersion(&$versions) { $versions[] = array('name' => 'ActivitySpam', diff --git a/Spam_score.php b/Spam_score.php index 08887d06b3..d6a05e2261 100644 --- a/Spam_score.php +++ b/Spam_score.php @@ -83,6 +83,33 @@ class Spam_score extends Managed_DataObject return $score; } + function save($notice, $result) { + + $score = Spam_score::staticGet('notice_id', $notice->id); + + if (empty($score)) { + $orig = null; + $score = new Spam_score(); + } else { + $orig = clone($score); + } + + $score->notice_id = $notice->id; + $score->score = $result->probability; + $score->is_spam = $result->isSpam; + $score->scaled = Spam_score::scale($score->score); + $score->created = common_sql_now(); + $score->notice_created = $notice->created; + + if (empty($orig)) { + $score->insert(); + } else { + $score->update($orig); + } + + return $score; + } + /** * The One True Thingy that must be defined and declared. */ diff --git a/train.php b/train.php new file mode 100644 index 0000000000..46579935bc --- /dev/null +++ b/train.php @@ -0,0 +1,165 @@ +. + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2012 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + // This check helps protect against security problems; + // your code file can't be executed directly from the web. + exit(1); +} + +/** + * Train a notice as spam + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2012 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +class TrainAction extends Action +{ + protected $notice = null; + protected $filter = null; + protected $category = null; + + /** + * For initializing members of the class. + * + * @param array $argarray misc. arguments + * + * @return boolean true + */ + + function prepare($argarray) + { + parent::prepare($argarray); + + // User must be logged in. + + $user = common_current_user(); + + if (empty($user)) { + throw new ClientException(_("You must be logged in to train spam."), 403); + } + + // It must be a "real" login, not saved cookie login + + if (!common_is_real_login()) { + common_set_returnto($this->selfUrl()); + if (Event::handle('RedirectToLogin', array($this, $user))) { + common_redirect(common_local_url('login'), 303); + return false; + } + } + + // User must have the right to review spam + + if (!$user->hasRight(ActivitySpamPlugin::TRAINSPAM)) { + throw new ClientException(_('You cannot review spam on this site.'), 403); + } + + $id = $this->trim('notice'); + + $this->notice = Notice::staticGet('id', $id); + + if (empty($this->notice)) { + throw new ClientException(_("No such notice.")); + } + + $this->checkSessionToken(); + + $filter = null; + + Event::handle('GetSpamFilter', &$filter); + + if (empty($filter)) { + throw new ServerException(_("No spam filter configured.")); + } + + $this->filter = $filter; + + $this->category = $this->trim('category'); + + if ($this->category !== SpamFilter::SPAM && + $this->category !== SpamFilter::HAM) + { + throw new ClientException(_("No such category.")); + } + + return true; + } + + /** + * Handler method + * + * @param array $argarray is ignored since it's now passed in in prepare() + * + * @return void + */ + + function handle($argarray=null) + { + // Train + + $this->filter->trainOnError($this->notice, $this->category); + + // Re-test + + $result = $this->filter->test($this->notice); + + // Update or insert + + $score = Spam_score::save($notice, $result); + + // Show new toggle form + + if ($this->category === SpamFilter::SPAM) { + $form = new TrainHamForm($this, $this->notice); + } else { + $form = new TrainSpamForm($this, $this->notice); + } + + if ($this->boolean('ajax')) { + $this->startHTML('text/xml;charset=utf-8'); + $this->elementStart('head'); + // TRANS: Page title for page on which favorite notices can be unfavourited. + $this->element('title', null, _('Disfavor favorite.')); + $this->elementEnd('head'); + $this->elementStart('body'); + $form->show(); + $this->elementEnd('body'); + $this->elementEnd('html'); + } else { + common_redirect(common_local_url('spam'), 303); + } + } +} diff --git a/trainhamform.php b/trainhamform.php new file mode 100644 index 0000000000..71bd650a3f --- /dev/null +++ b/trainhamform.php @@ -0,0 +1,147 @@ +. + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2011 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + // This check helps protect against security problems; + // your code file can't be executed directly from the web. + exit(1); +} + +/** + * Form + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2011 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +class TrainHamForm extends Form { + + var $notice = null; + + function __construct($out, $notice) { + parent::__construct($out); + $this->notice = $notice; + } + + /** + * Name of the form + * + * Sub-classes should overload this with the name of their form. + * + * @return void + */ + + function formLegend() + { + return _("Train ham"); + } + + /** + * Visible or invisible data elements + * + * Display the form fields that make up the data of the form. + * Sub-classes should overload this to show their data. + * + * @return void + */ + + function formData() + { + $this->hidden('category', SpamFilter::HAM); + $this->hidden('notice', $this->notice->id); + } + + /** + * Buttons for form actions + * + * Submit and cancel buttons (or whatever) + * Sub-classes should overload this to show their own buttons. + * + * @return void + */ + + function formActions() + { + $this->submit('submit', + _('Train ham'), + 'submit', + null, + _("Mark as ham")); + } + + /** + * ID of the form + * + * Should be unique on the page. Sub-classes should overload this + * to show their own IDs. + * + * @return int ID of the form + */ + + function id() + { + return 'train_ham_' . $this->notice->id; + } + + /** + * Action of the form. + * + * URL to post to. Should be overloaded by subclasses to give + * somewhere to post to. + * + * @return string URL to post to + */ + + function action() + { + return common_local_url('train'); + } + + /** + * Class of the form. May include space-separated list of multiple classes. + * + * If 'ajax' is included, the form will automatically be submitted with + * an 'ajax=1' parameter added, and the resulting form or error message + * will replace the form after submission. + * + * It's up to you to make sure that the target action supports this! + * + * @return string the form's class + */ + + function formClass() + { + return 'form_train_ham ajax'; + } +} diff --git a/trainspamform.php b/trainspamform.php new file mode 100644 index 0000000000..07942f4546 --- /dev/null +++ b/trainspamform.php @@ -0,0 +1,147 @@ +. + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2011 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + // This check helps protect against security problems; + // your code file can't be executed directly from the web. + exit(1); +} + +/** + * Form + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2011 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +class TrainSpamForm extends Form { + + var $notice = null; + + function __construct($out, $notice) { + parent::__construct($out); + $this->notice = $notice; + } + + /** + * Name of the form + * + * Sub-classes should overload this with the name of their form. + * + * @return void + */ + + function formLegend() + { + return _("Train spam"); + } + + /** + * Visible or invisible data elements + * + * Display the form fields that make up the data of the form. + * Sub-classes should overload this to show their data. + * + * @return void + */ + + function formData() + { + $this->hidden('category', SpamFilter::SPAM); + $this->hidden('notice', $this->notice->id); + } + + /** + * Buttons for form actions + * + * Submit and cancel buttons (or whatever) + * Sub-classes should overload this to show their own buttons. + * + * @return void + */ + + function formActions() + { + $this->submit('submit', + _('Train spam'), + 'submit', + null, + _("Mark as spam")); + } + + /** + * ID of the form + * + * Should be unique on the page. Sub-classes should overload this + * to show their own IDs. + * + * @return int ID of the form + */ + + function id() + { + return 'train_spam_' . $this->notice->id; + } + + /** + * Action of the form. + * + * URL to post to. Should be overloaded by subclasses to give + * somewhere to post to. + * + * @return string URL to post to + */ + + function action() + { + return common_local_url('train'); + } + + /** + * Class of the form. May include space-separated list of multiple classes. + * + * If 'ajax' is included, the form will automatically be submitted with + * an 'ajax=1' parameter added, and the resulting form or error message + * will replace the form after submission. + * + * It's up to you to make sure that the target action supports this! + * + * @return string the form's class + */ + + function formClass() + { + return 'form_train_spam ajax'; + } +} From c6c879bdb486507230913daac6b4efbc37a4271a Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 08:09:22 -0600 Subject: [PATCH 10/47] not a reference --- ActivitySpamPlugin.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index 8a6ee89b3f..26a0f5c7a5 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -170,7 +170,7 @@ class ActivitySpamPlugin extends Plugin return false; } - function onEndShowNoticeOptions(&$nli) + function onEndShowNoticeOptions($nli) { $notice = $nli->notice; $out = $nli->out; From 0b87e769454e6f15e0f0ae38c5396a34ece30ba5 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 09:04:55 -0600 Subject: [PATCH 11/47] debug log for missing score --- ActivitySpamPlugin.php | 1 + 1 file changed, 1 insertion(+) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index 26a0f5c7a5..6950d54554 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -180,6 +180,7 @@ class ActivitySpamPlugin extends Plugin $score = Spam_score::staticGet('notice_id', $notice->id); if (empty($score)) { + $this->debug("No score for notice " . $notice->id); // XXX: show a question-mark or something } else if ($score->is_spam) { $form = new TrainHamForm($out, $notice); From 55a18f1d17b7a4879e2fe8b11d2b3daf5e73e99f Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 09:08:05 -0600 Subject: [PATCH 12/47] routes for training --- ActivitySpamPlugin.php | 17 +++++++++++++++++ trainhamform.php | 3 +-- trainspamform.php | 3 +-- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index 6950d54554..cb9477d1ad 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -194,6 +194,23 @@ class ActivitySpamPlugin extends Plugin return true; } + /** + * Map URLs to actions + * + * @param Net_URL_Mapper $m path-to-action mapper + * + * @return boolean hook value; true means continue processing, false means stop. + */ + + function onRouterInitialized($m) + { + $m->connect('main/train/spam', + array('action' => 'train', 'category' => 'spam')); + $m->connect('main/train/ham', + array('action' => 'train', 'category' => 'ham')); + return true; + } + function onPluginVersion(&$versions) { $versions[] = array('name' => 'ActivitySpam', diff --git a/trainhamform.php b/trainhamform.php index 71bd650a3f..410b3206b8 100644 --- a/trainhamform.php +++ b/trainhamform.php @@ -78,7 +78,6 @@ class TrainHamForm extends Form { function formData() { - $this->hidden('category', SpamFilter::HAM); $this->hidden('notice', $this->notice->id); } @@ -125,7 +124,7 @@ class TrainHamForm extends Form { function action() { - return common_local_url('train'); + return common_local_url('train', array('category' => 'ham')); } /** diff --git a/trainspamform.php b/trainspamform.php index 07942f4546..ecf7b04190 100644 --- a/trainspamform.php +++ b/trainspamform.php @@ -78,7 +78,6 @@ class TrainSpamForm extends Form { function formData() { - $this->hidden('category', SpamFilter::SPAM); $this->hidden('notice', $this->notice->id); } @@ -125,7 +124,7 @@ class TrainSpamForm extends Form { function action() { - return common_local_url('train'); + return common_local_url('train', array('category' => 'spam')); } /** From 5160562567f14644bdb1048a57499270345d12de Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 09:12:53 -0600 Subject: [PATCH 13/47] don't try to force 'real' login for spam training --- train.php | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/train.php b/train.php index 46579935bc..9b6a6a746d 100644 --- a/train.php +++ b/train.php @@ -71,16 +71,6 @@ class TrainAction extends Action throw new ClientException(_("You must be logged in to train spam."), 403); } - // It must be a "real" login, not saved cookie login - - if (!common_is_real_login()) { - common_set_returnto($this->selfUrl()); - if (Event::handle('RedirectToLogin', array($this, $user))) { - common_redirect(common_local_url('login'), 303); - return false; - } - } - // User must have the right to review spam if (!$user->hasRight(ActivitySpamPlugin::TRAINSPAM)) { From 32008901eced574c68b7a8b7054eb4f24a547323 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 09:14:49 -0600 Subject: [PATCH 14/47] trim() -> trimmed() --- train.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/train.php b/train.php index 9b6a6a746d..3a89c8c6bc 100644 --- a/train.php +++ b/train.php @@ -77,7 +77,7 @@ class TrainAction extends Action throw new ClientException(_('You cannot review spam on this site.'), 403); } - $id = $this->trim('notice'); + $id = $this->trimmed('notice'); $this->notice = Notice::staticGet('id', $id); @@ -97,7 +97,7 @@ class TrainAction extends Action $this->filter = $filter; - $this->category = $this->trim('category'); + $this->category = $this->trimmed('category'); if ($this->category !== SpamFilter::SPAM && $this->category !== SpamFilter::HAM) From bf617fd4559ad35e9a27f359a931bc011d1620fd Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 09:16:27 -0600 Subject: [PATCH 15/47] summon a spam filter correctly --- train.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.php b/train.php index 3a89c8c6bc..3a31a4e194 100644 --- a/train.php +++ b/train.php @@ -89,7 +89,7 @@ class TrainAction extends Action $filter = null; - Event::handle('GetSpamFilter', &$filter); + Event::handle('GetSpamFilter', array(&$filter)); if (empty($filter)) { throw new ServerException(_("No spam filter configured.")); From 6cabea6ae6c941a367017d3a0fe9a7d54fe85e2e Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 13:21:34 -0600 Subject: [PATCH 16/47] try a wrapper div for training form --- ActivitySpamPlugin.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index cb9477d1ad..e497b9a744 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -184,10 +184,14 @@ class ActivitySpamPlugin extends Plugin // XXX: show a question-mark or something } else if ($score->is_spam) { $form = new TrainHamForm($out, $notice); + $out->elementStart('div', 'notice-options'); $form->show(); + $out->elementEnd('div'); } else if (!$score->is_spam) { $form = new TrainSpamForm($out, $notice); + $out->elementStart('div', 'notice-options'); $form->show(); + $out->elementEnd('div'); } } From e9566af3d9c6c730caa5102d27f3432cc503fc7e Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 14:32:05 -0600 Subject: [PATCH 17/47] Add icons for spam toggle --- ActivitySpamPlugin.php | 8 ++++++++ icons/bullet_black.png | Bin 0 -> 211 bytes icons/exclamation.png | Bin 0 -> 701 bytes 3 files changed, 8 insertions(+) create mode 100644 icons/bullet_black.png create mode 100644 icons/exclamation.png diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index e497b9a744..b62401ce1b 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -215,6 +215,14 @@ class ActivitySpamPlugin extends Plugin return true; } + function onEndShowStyles($action) + { + $action->element('style', null, + 'form_train_spam input.submit { background: url("'.$this->path('icons/bullet_black.png').'") no-repeat 0px 0px } ' . "\n" . + 'form_train_spam input.submit { background: url("'.$this->path('icons/exclamation.png').'") no-repeat 0px 0px } '); + return true; + } + function onPluginVersion(&$versions) { $versions[] = array('name' => 'ActivitySpam', diff --git a/icons/bullet_black.png b/icons/bullet_black.png new file mode 100644 index 0000000000000000000000000000000000000000..57619706d10d9736b1849a83f2c5694fbe09c53b GIT binary patch literal 211 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!60wlNoGJgf6SkfJR9T^zbpD<_bdI{u9mbgZg z1m~xflqVLYGB~E>C#5QQ<|d}62BjvZR2H60wE-$h^>lFz(Kw&{<9vg>5sw~gS5O!4 zr|{HuUFIBKiQyL}eBJ-L{`UVT|6_O~L{G%N{Wbre{kQtZ_0LvEhN#0$9Ug7g~-`rQ^qx~m@y2OU8A z#zh~=7n#Z$Z*fx-GOtDf07cgx0suCz_W(2~Y(0tf@FX@P6EPuM_dgn$vj9LucO)%W zw%HgMW>=#oL>nZ>M&NEf08>)#)k<{$fCT_r>rPi=BV=hFh6WS^qqze>C6Ek}o{M5% za|@JGowu0t{&hgNzySHZxy@LTNh);YzZ2zSp_ zl$^T&Dnc|NLb&RD_!4>pt@VHdP)ZGER%5ZmWEe$lryR&y;2u^3cOkO4#6c%-(EY6a{600000NkvXXu0mjfxS2AI literal 0 HcmV?d00001 From 765bd6142f7c884b5c5967db76d0719f2b9bee8b Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 14:46:58 -0600 Subject: [PATCH 18/47] use - instead of _ for forms, and unique ids for submits --- ActivitySpamPlugin.php | 4 ++-- trainhamform.php | 6 +++--- trainspamform.php | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index b62401ce1b..3efa92c935 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -218,8 +218,8 @@ class ActivitySpamPlugin extends Plugin function onEndShowStyles($action) { $action->element('style', null, - 'form_train_spam input.submit { background: url("'.$this->path('icons/bullet_black.png').'") no-repeat 0px 0px } ' . "\n" . - 'form_train_spam input.submit { background: url("'.$this->path('icons/exclamation.png').'") no-repeat 0px 0px } '); + '.form-train-spam input.submit { background: url("'.$this->path('icons/bullet_black.png').'") no-repeat 0px 0px } ' . "\n" . + '.form-train-ham input.submit { background: url("'.$this->path('icons/exclamation.png').'") no-repeat 0px 0px } '); return true; } diff --git a/trainhamform.php b/trainhamform.php index 410b3206b8..05cadaa953 100644 --- a/trainhamform.php +++ b/trainhamform.php @@ -92,7 +92,7 @@ class TrainHamForm extends Form { function formActions() { - $this->submit('submit', + $this->submit('train-ham-submit-' . $this->notice->id, _('Train ham'), 'submit', null, @@ -110,7 +110,7 @@ class TrainHamForm extends Form { function id() { - return 'train_ham_' . $this->notice->id; + return 'train-ham-' . $this->notice->id; } /** @@ -141,6 +141,6 @@ class TrainHamForm extends Form { function formClass() { - return 'form_train_ham ajax'; + return 'form-train-ham ajax'; } } diff --git a/trainspamform.php b/trainspamform.php index ecf7b04190..ee1ecd2a74 100644 --- a/trainspamform.php +++ b/trainspamform.php @@ -92,7 +92,7 @@ class TrainSpamForm extends Form { function formActions() { - $this->submit('submit', + $this->submit('train-spam-submit-' . $this->notice->id, _('Train spam'), 'submit', null, @@ -110,7 +110,7 @@ class TrainSpamForm extends Form { function id() { - return 'train_spam_' . $this->notice->id; + return 'train-spam-' . $this->notice->id; } /** @@ -141,6 +141,6 @@ class TrainSpamForm extends Form { function formClass() { - return 'form_train_spam ajax'; + return 'form-train-spam ajax'; } } From 5d5aab831b50144501e14c859e5e4aebf4b23b6f Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 14:50:31 -0600 Subject: [PATCH 19/47] float lefterer --- ActivitySpamPlugin.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index 3efa92c935..d060525e92 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -170,7 +170,7 @@ class ActivitySpamPlugin extends Plugin return false; } - function onEndShowNoticeOptions($nli) + function onStartShowNoticeOptions($nli) { $notice = $nli->notice; $out = $nli->out; From 74746eb63e997a5687d5abf580446216e311b703 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 15:06:27 -0600 Subject: [PATCH 20/47] No quotes for url() in CSS --- ActivitySpamPlugin.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index d060525e92..4dbd24c16a 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -218,8 +218,8 @@ class ActivitySpamPlugin extends Plugin function onEndShowStyles($action) { $action->element('style', null, - '.form-train-spam input.submit { background: url("'.$this->path('icons/bullet_black.png').'") no-repeat 0px 0px } ' . "\n" . - '.form-train-ham input.submit { background: url("'.$this->path('icons/exclamation.png').'") no-repeat 0px 0px } '); + '.form-train-spam input.submit { background: url('.$this->path('icons/bullet_black.png').') no-repeat 0px 0px } ' . "\n" . + '.form-train-ham input.submit { background: url('.$this->path('icons/exclamation.png').') no-repeat 0px 0px } '); return true; } From c6e7c1739604ad2ec8ea18f58ad576414f0fd806 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 15:09:30 -0600 Subject: [PATCH 21/47] 'Mark as ham' => 'Clear spam' --- trainhamform.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/trainhamform.php b/trainhamform.php index 05cadaa953..5a4c9c07af 100644 --- a/trainhamform.php +++ b/trainhamform.php @@ -93,10 +93,10 @@ class TrainHamForm extends Form { function formActions() { $this->submit('train-ham-submit-' . $this->notice->id, - _('Train ham'), + _('Clear spam'), 'submit', null, - _("Mark as ham")); + _("Clear spam")); } /** From f139199cf11a3a49040a164dc5ecb6972732dd7c Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 15:40:24 -0600 Subject: [PATCH 22/47] New stream for reviewing notices marked as spam Especially with a new spam filter, it's nice to be able to review stuff marked as spam (and correct it). --- ActivitySpamPlugin.php | 18 +++++ Spam_score.php | 13 +++- spam.php | 171 +++++++++++++++++++++++++++++++++++++++++ spamnoticestream.php | 101 ++++++++++++++++++++++++ 4 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 spam.php create mode 100644 spamnoticestream.php diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index 4dbd24c16a..b429a3ef9d 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -212,6 +212,8 @@ class ActivitySpamPlugin extends Plugin array('action' => 'train', 'category' => 'spam')); $m->connect('main/train/ham', array('action' => 'train', 'category' => 'ham')); + $m->connect('main/spam', + array('action' => 'spam')); return true; } @@ -223,6 +225,22 @@ class ActivitySpamPlugin extends Plugin return true; } + function onEndPublicGroupNav($nav) + { + $user = common_current_user(); + + if (!empty($user) && $user->hasRight(self::REVIEWSPAM)) { + $nav->out->menuItem(common_local_url('spam'), + _m('MENU','Spam'), + // TRANS: Menu item title in search group navigation panel. + _('Notices marked as spam'), + $nav->actionName == 'spam', + 'nav_timeline_spam'); + } + + return true; + } + function onPluginVersion(&$versions) { $versions[] = array('name' => 'ActivitySpam', diff --git a/Spam_score.php b/Spam_score.php index d6a05e2261..36d172f396 100644 --- a/Spam_score.php +++ b/Spam_score.php @@ -79,7 +79,9 @@ class Spam_score extends Managed_DataObject $score->notice_created = $notice->created; $score->insert(); - + + self::blow('spam_score:notice_ids'); + return $score; } @@ -107,9 +109,18 @@ class Spam_score extends Managed_DataObject $score->update($orig); } + self::blow('spam_score:notice_ids'); + return $score; } + function delete() + { + self::blow('spam_score:notice_ids'); + self::blow('spam_score:notice_ids;last'); + parent::delete(); + } + /** * The One True Thingy that must be defined and declared. */ diff --git a/spam.php b/spam.php new file mode 100644 index 0000000000..502e3776af --- /dev/null +++ b/spam.php @@ -0,0 +1,171 @@ +. + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2012 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + // This check helps protect against security problems; + // your code file can't be executed directly from the web. + exit(1); +} + +require_once INSTALLDIR.'/lib/noticelist.php'; + +/** + * SpamAction + * + * Shows the latest spam on the service + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2012 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +class SpamAction extends Action +{ + var $page = null; + var $notices = null; + + /** + * For initializing members of the class. + * + * @param array $argarray misc. arguments + * + * @return boolean true + */ + + function prepare($argarray) + { + parent::prepare($argarray); + + $this->page = ($this->arg('page')) ? ($this->arg('page')+0) : 1; + + // User must be logged in. + + $user = common_current_user(); + + if (empty($user)) { + throw new ClientException(_("You must be logged in to review."), 403); + } + + // It must be a "real" login, not saved cookie login + + if (!common_is_real_login()) { + common_set_returnto($this->selfUrl()); + if (Event::handle('RedirectToLogin', array($this, $user))) { + common_redirect(common_local_url('login'), 303); + return; + } + } + + // User must have the right to review spam + + if (!$user->hasRight(ActivitySpamPlugin::REVIEWSPAM)) { + throw new ClientException(_('You cannot review spam on this site.'), 403); + } + + $stream = new SpamNoticeStream($user->getProfile()); + + $this->notices = $stream->getNotices(($this->page-1)*NOTICES_PER_PAGE, + NOTICES_PER_PAGE + 1); + + if($this->page > 1 && $this->notices->N == 0) { + throw new ClientException(_('No such page.'), 404); + } + + return true; + } + + /** + * Handler method + * + * @param array $argarray is ignored since it's now passed in in prepare() + * + * @return void + */ + + function handle($argarray=null) + { + parent::handle($args); + + $this->showPage(); + } + + /** + * Fill the content area + * + * Shows a list of the notices in the public stream, with some pagination + * controls. + * + * @return void + */ + + function showContent() + { + $nl = new NoticeList($this->notices, $this); + + $cnt = $nl->show(); + + if ($cnt == 0) { + $this->showEmptyList(); + } + + $this->pagination($this->page > 1, + $cnt > NOTICES_PER_PAGE, + $this->page, + 'spam'); + } + + function showEmptyList() + { + // TRANS: Text displayed for public feed when there are no public notices. + $message = _('This is the timeline of spam messages for %%site.name%% but none have been detected yet.'); + + $this->elementStart('div', 'guide'); + $this->raw(common_markup_to_html($message)); + $this->elementEnd('div'); + } + + /** + * Return true if read only. + * + * MAY override + * + * @param array $args other arguments + * + * @return boolean is read only action? + */ + + function isReadOnly($args) + { + return true; + } +} diff --git a/spamnoticestream.php b/spamnoticestream.php new file mode 100644 index 0000000000..ffb8d08025 --- /dev/null +++ b/spamnoticestream.php @@ -0,0 +1,101 @@ +. + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2012 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + // This check helps protect against security problems; + // your code file can't be executed directly from the web. + exit(1); +} + +/** + * Spam notice stream + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2012 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +class SpamNoticeStream extends ScopingNoticeStream +{ + function __construct($tag, $profile = -1) + { + if (is_int($profile) && $profile == -1) { + $profile = Profile::current(); + } + parent::__construct(new CachingNoticeStream(new RawSpamNoticeStream(), + 'spam_score:notice_ids')); + } +} + +/** + * Raw stream of spammy notices + * + * @category Stream + * @package StatusNet + * @author Evan Prodromou + * @copyright 2011 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +class RawSpamNoticeStream extends NoticeStream +{ + function getNoticeIds($offset, $limit, $since_id, $max_id) + { + $ss = new Spam_score(); + + $ss->is_spam = 1; + + $ss->selectAdd(); + $ss->selectAdd('notice_id'); + + Notice::addWhereSinceId($ss, $since_id, 'notice_id'); + Notice::addWhereMaxId($ss, $max_id, 'notice_id'); + + $ss->orderBy('notice_created DESC, notice_id DESC'); + + if (!is_null($offset)) { + $ss->limit($offset, $limit); + } + + $ids = array(); + + if ($ss->find()) { + while ($ss->fetch()) { + $ids[] = $ss->notice_id; + } + } + + return $ids; + } +} From 495880c817bb27b0e8bd2e1ba6686bfe82c04a6e Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 15:46:21 -0600 Subject: [PATCH 23/47] autoload for SpamAction and SpamNoticeStream --- ActivitySpamPlugin.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index b429a3ef9d..faf2ad0ca5 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -105,12 +105,14 @@ class ActivitySpamPlugin extends Plugin switch ($cls) { case 'TrainAction': + case 'SpamAction': include_once $dir . '/' . strtolower(mb_substr($cls, 0, -6)) . '.php'; return false; case 'Spam_score': include_once $dir . '/'.$cls.'.php'; return false; case 'SpamFilter': + case 'SpamNoticeStream': case 'TrainSpamForm': case 'TrainHamForm': include_once $dir . '/'.strtolower($cls).'.php'; From 1573b5d1320ba633e411a4750ae93f9c122cb76a Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 15:58:32 -0600 Subject: [PATCH 24/47] Correctly pass notice to Spam_score::save() --- train.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.php b/train.php index 3a31a4e194..f5c82361cc 100644 --- a/train.php +++ b/train.php @@ -128,7 +128,7 @@ class TrainAction extends Action // Update or insert - $score = Spam_score::save($notice, $result); + $score = Spam_score::save($this->notice, $result); // Show new toggle form From 783df54086d35defda603ea9f7ed56681ad28b83 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 15:58:47 -0600 Subject: [PATCH 25/47] move the null assignment of $orig up a bit --- Spam_score.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Spam_score.php b/Spam_score.php index 36d172f396..997a9f83ad 100644 --- a/Spam_score.php +++ b/Spam_score.php @@ -87,10 +87,10 @@ class Spam_score extends Managed_DataObject function save($notice, $result) { + $orig = null; $score = Spam_score::staticGet('notice_id', $notice->id); if (empty($score)) { - $orig = null; $score = new Spam_score(); } else { $orig = clone($score); From 6b6ede859870c2a4758ea3fec5798a0769cd2ab4 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 16:02:56 -0600 Subject: [PATCH 26/47] title for the spam stream --- spam.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/spam.php b/spam.php index 502e3776af..a6cb1bf661 100644 --- a/spam.php +++ b/spam.php @@ -54,6 +54,10 @@ class SpamAction extends Action var $page = null; var $notices = null; + function title() { + return _("Latest Spam"); + } + /** * For initializing members of the class. * From e18d45cf66da4c1e86d182053145c619d5b00196 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 7 Mar 2012 16:07:34 -0600 Subject: [PATCH 27/47] use accessors for NLI --- ActivitySpamPlugin.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index faf2ad0ca5..f68d2ad605 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -174,8 +174,8 @@ class ActivitySpamPlugin extends Plugin function onStartShowNoticeOptions($nli) { - $notice = $nli->notice; - $out = $nli->out; + $notice = $nli->getNotice(); + $out = $nli->getOut(); if (!empty($notice)) { From 5efcfb85987fd73c98b39ebd25052473b3637668 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 8 Mar 2012 07:01:21 -0600 Subject: [PATCH 28/47] Don't require real login to view spam --- spam.php | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/spam.php b/spam.php index a6cb1bf661..a66b73a829 100644 --- a/spam.php +++ b/spam.php @@ -80,16 +80,6 @@ class SpamAction extends Action throw new ClientException(_("You must be logged in to review."), 403); } - // It must be a "real" login, not saved cookie login - - if (!common_is_real_login()) { - common_set_returnto($this->selfUrl()); - if (Event::handle('RedirectToLogin', array($this, $user))) { - common_redirect(common_local_url('login'), 303); - return; - } - } - // User must have the right to review spam if (!$user->hasRight(ActivitySpamPlugin::REVIEWSPAM)) { From e8e0b8c05333f8bfeda8b1fb838e324a6f87520e Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 8 Mar 2012 07:01:37 -0600 Subject: [PATCH 29/47] Use new event to show forms --- ActivitySpamPlugin.php | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index f68d2ad605..ad33f6e998 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -172,7 +172,7 @@ class ActivitySpamPlugin extends Plugin return false; } - function onStartShowNoticeOptions($nli) + function onEndShowNoticeOptionItems($nli) { $notice = $nli->getNotice(); $out = $nli->getOut(); @@ -186,14 +186,10 @@ class ActivitySpamPlugin extends Plugin // XXX: show a question-mark or something } else if ($score->is_spam) { $form = new TrainHamForm($out, $notice); - $out->elementStart('div', 'notice-options'); $form->show(); - $out->elementEnd('div'); } else if (!$score->is_spam) { $form = new TrainSpamForm($out, $notice); - $out->elementStart('div', 'notice-options'); $form->show(); - $out->elementEnd('div'); } } From 259acbf9663f1d7af461e45d23c72cc21a588144 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 8 Mar 2012 07:46:50 -0600 Subject: [PATCH 30/47] script to retest a user's notices --- scripts/testuser.php | 95 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 scripts/testuser.php diff --git a/scripts/testuser.php b/scripts/testuser.php new file mode 100644 index 0000000000..8e70a5985f --- /dev/null +++ b/scripts/testuser.php @@ -0,0 +1,95 @@ +. + */ + +define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../../..')); + +$shortoptions = 'i:n:a'; +$longoptions = array('id=', 'nickname=', 'all'); + +$helptext = <<orderBy('created'); + $user->limit($offset, $limit); + + $found = $user->find(); + + if ($found) { + while ($user->fetch()) { + testUser($filter, $user); + } + $offset += $found; + } + + } while ($found > 0); +} + +function testUser($filter, $user) { + + $profile = $user->getProfile(); + + $str = new ProfileNoticeStream($profile, $profile); + + $offset = 0; + $limit = 100; + + do { + $notice = $str->getNotices($offset, $limit); + while ($notice->fetch()) { + print "Testing notice " . $notice->id . "..."; + $result = $filter->test($notice); + Spam_score::save($notice, $result); + print (($result->isSpam) ? "SPAM" : "HAM")."\n"; + } + $offset += $notice->N; + } while ($notice->N > 0); +} + +try { + $filter = null; + Event::handle('GetSpamFilter', array(&$filter)); + if (empty($filter)) { + throw new Exception(_("No spam filter.")); + } + if (get_option('a', 'all')) { + testAllUsers($filter); + } else { + $user = getUser(); + testUser($filter, $user); + } +} catch (Exception $e) { + print $e->getMessage()."\n"; + exit(1); +} From b35fdf13c1d4a03061eaffa185dfce353087f594 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 8 Mar 2012 07:49:30 -0600 Subject: [PATCH 31/47] use printfv and printfnq --- scripts/testuser.php | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/testuser.php b/scripts/testuser.php index 8e70a5985f..028f58c0bb 100644 --- a/scripts/testuser.php +++ b/scripts/testuser.php @@ -58,6 +58,8 @@ function testAllUsers() { function testUser($filter, $user) { + printfnq("Testing user %s\n", $user->nickname); + $profile = $user->getProfile(); $str = new ProfileNoticeStream($profile, $profile); @@ -68,10 +70,10 @@ function testUser($filter, $user) { do { $notice = $str->getNotices($offset, $limit); while ($notice->fetch()) { - print "Testing notice " . $notice->id . "..."; + printfv("Testing notice %d...", $notice->id); $result = $filter->test($notice); Spam_score::save($notice, $result); - print (($result->isSpam) ? "SPAM" : "HAM")."\n"; + printfv("%s\n", ($result->isSpam) ? "SPAM" : "HAM"); } $offset += $notice->N; } while ($notice->N > 0); From ddcac1d1eb3151db7bbad8010f7ad30bc1375dc7 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 8 Mar 2012 07:50:51 -0600 Subject: [PATCH 32/47] use have_option() --- scripts/testuser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/testuser.php b/scripts/testuser.php index 028f58c0bb..221f962842 100644 --- a/scripts/testuser.php +++ b/scripts/testuser.php @@ -85,7 +85,7 @@ try { if (empty($filter)) { throw new Exception(_("No spam filter.")); } - if (get_option('a', 'all')) { + if (have_option('a', 'all')) { testAllUsers($filter); } else { $user = getUser(); From f5ba2f6b8e49e07d9e9b39a97c3a6813963d2fc7 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 8 Mar 2012 08:17:48 -0600 Subject: [PATCH 33/47] don't use getProfile() when looping --- scripts/testuser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/testuser.php b/scripts/testuser.php index 221f962842..dc79548bdb 100644 --- a/scripts/testuser.php +++ b/scripts/testuser.php @@ -60,7 +60,7 @@ function testUser($filter, $user) { printfnq("Testing user %s\n", $user->nickname); - $profile = $user->getProfile(); + $profile = Profile::staticGet('id', $user->id); $str = new ProfileNoticeStream($profile, $profile); From bd2c53414fd4186dd0904257b16e58c5e1a61e35 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 8 Mar 2012 08:18:28 -0600 Subject: [PATCH 34/47] don't lose filter --- scripts/testuser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/testuser.php b/scripts/testuser.php index dc79548bdb..707ba409f9 100644 --- a/scripts/testuser.php +++ b/scripts/testuser.php @@ -33,7 +33,7 @@ END_OF_TESTUSER_HELP; require_once INSTALLDIR.'/scripts/commandline.inc'; -function testAllUsers() { +function testAllUsers($filter) { $found = false; $offset = 0; $limit = 1000; From 7fe0341375e4da7e0237c22d5fe801641c003321 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 8 Mar 2012 08:33:51 -0600 Subject: [PATCH 35/47] recover and keep testing --- scripts/testuser.php | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/scripts/testuser.php b/scripts/testuser.php index 707ba409f9..f28a929c46 100644 --- a/scripts/testuser.php +++ b/scripts/testuser.php @@ -48,7 +48,11 @@ function testAllUsers($filter) { if ($found) { while ($user->fetch()) { - testUser($filter, $user); + try { + testUser($filter, $user); + } catch (Exception $e) { + printfnq("ERROR testing user %s\n: %s", $user->nickname, $e->getMessage()); + } } $offset += $found; } @@ -70,10 +74,14 @@ function testUser($filter, $user) { do { $notice = $str->getNotices($offset, $limit); while ($notice->fetch()) { - printfv("Testing notice %d...", $notice->id); - $result = $filter->test($notice); - Spam_score::save($notice, $result); - printfv("%s\n", ($result->isSpam) ? "SPAM" : "HAM"); + try { + printfv("Testing notice %d...", $notice->id); + $result = $filter->test($notice); + Spam_score::save($notice, $result); + printfv("%s\n", ($result->isSpam) ? "SPAM" : "HAM"); + } catch (Exception $e) { + printfnq("ERROR testing notice %d\n: %s", $notice->id, $e->getMessage()); + } } $offset += $notice->N; } while ($notice->N > 0); From e70f4e1c03a60712ab8c3e6d573e8c2e757d6762 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Fri, 9 Mar 2012 06:20:07 -0600 Subject: [PATCH 36/47] train a whole user --- scripts/trainuser.php | 81 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 scripts/trainuser.php diff --git a/scripts/trainuser.php b/scripts/trainuser.php new file mode 100644 index 0000000000..8ee6387081 --- /dev/null +++ b/scripts/trainuser.php @@ -0,0 +1,81 @@ +. + */ + +define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../../..')); + +$shortoptions = 'i:n:'; +$longoptions = array('id=', 'nickname='); + +$helptext = <<nickname); + + $profile = Profile::staticGet('id', $user->id); + + $str = new ProfileNoticeStream($profile, $profile); + + $offset = 0; + $limit = 100; + + do { + $notice = $str->getNotices($offset, $limit); + while ($notice->fetch()) { + try { + printfv("Training notice %d...", $notice->id); + $filter->trainOnError($notice, $category); + $result = $filter->test($notice); + $score = Spam_score::save($notice, $result); + printfv("%s\n", ($result->isSpam) ? "SPAM" : "HAM"); + } catch (Exception $e) { + printfnq("ERROR training notice %d\n: %s", $notice->id, $e->getMessage()); + } + } + $offset += $notice->N; + } while ($notice->N > 0); +} + +try { + $filter = null; + Event::handle('GetSpamFilter', array(&$filter)); + if (empty($filter)) { + throw new Exception(_("No spam filter.")); + } + $user = getUser(); + $category = get_option_value('t', 'category'); + if ($category !== SpamFilter::HAM && + $category !== SpamFilter::SPAM) { + throw new Exception(_("No such category.")); + } + trainUser($filter, $user, $category); +} catch (Exception $e) { + print $e->getMessage()."\n"; + exit(1); +} From 441ac3faf6d8846b44c94ad6c6666ff9f95de626 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Fri, 9 Mar 2012 06:27:34 -0600 Subject: [PATCH 37/47] command opts for trainuser --- scripts/trainuser.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/trainuser.php b/scripts/trainuser.php index 8ee6387081..3399e751ba 100644 --- a/scripts/trainuser.php +++ b/scripts/trainuser.php @@ -19,8 +19,8 @@ define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../../..')); -$shortoptions = 'i:n:'; -$longoptions = array('id=', 'nickname='); +$shortoptions = 'i:n:t:'; +$longoptions = array('id=', 'nickname=', 'category='); $helptext = << Date: Sat, 17 Mar 2012 01:02:41 -0400 Subject: [PATCH 38/47] Change to use OAuth for authentication --- ActivitySpamPlugin.php | 12 +--- spamfilter.php | 127 ++++++++++++++++++++++++----------------- 2 files changed, 78 insertions(+), 61 deletions(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index ad33f6e998..b6871d4cd9 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -47,8 +47,6 @@ if (!defined('STATUSNET')) { class ActivitySpamPlugin extends Plugin { public $server = null; - public $username = null; - public $password = null; const REVIEWSPAM = 'ActivitySpamPlugin::REVIEWSPAM'; const TRAINSPAM = 'ActivitySpamPlugin::TRAINSPAM'; @@ -60,13 +58,9 @@ class ActivitySpamPlugin extends Plugin */ function initialize() { - foreach (array('username', 'password', 'server') as $attr) { - if (!$this->$attr) { - $this->$attr = common_config('activityspam', $attr); - } - } - - $this->filter = new SpamFilter($this->server, $this->username, $this->password); + $this->filter = new SpamFilter(common_config('activityspam', 'server'), + common_config('activityspam', 'consumerkey'), + common_config('activityspam', 'secret')); return true; } diff --git a/spamfilter.php b/spamfilter.php index 0e321ebc8b..47246b35c5 100644 --- a/spamfilter.php +++ b/spamfilter.php @@ -1,32 +1,32 @@ . - * - * @category Spam - * @package StatusNet - * @author Evan Prodromou - * @copyright 2012 StatusNet, Inc. - * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 - * @link http://status.net/ - */ + /** + * StatusNet - the distributed open-source microblogging tool + * Copyright (C) 2012, StatusNet, Inc. + * + * Spam filter class + * + * PHP version 5 + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * @category Spam + * @package StatusNet + * @author Evan Prodromou + * @copyright 2012 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ if (!defined('STATUSNET')) { // This check helps protect against security problems; @@ -47,20 +47,16 @@ if (!defined('STATUSNET')) { * @link http://status.net/ */ -class SpamFilter { +class SpamFilter extends OAuthClient { const HAM = 'ham'; const SPAM = 'spam'; public $server; - public $username; - public $password; - function __construct($server, $username, $password) { - - $this->server = $server; - $this->username = $username; - $this->password = $password; + function __construct($server, $consumerKey, $secret) { + parent::__construct($consumerKey, $secret); + $this->server = $server; } protected function toActivity($notice) { @@ -80,14 +76,7 @@ class SpamFilter { public function testActivity($activity) { - $client = new HTTPClient($this->server . "/is-this-spam"); - - $client->setMethod('POST'); - $client->setAuth($this->username, $this->password); - $client->setHeader('Content-Type', 'application/json'); - $client->setBody(json_encode($activity->asArray())); - - $response = $client->send(); + $response = $this->postJSON($this->server . "/is-this-spam", $activity->asArray()); if (!$response->isOK()) { throw new Exception("Error " . $response->getStatus() . " checking spam score: " . $response->getBody()); @@ -118,14 +107,7 @@ class SpamFilter { throw new Exception("Unknown category: " + $category); } - $client = new HTTPClient($this->server . $endpoint); - - $client->setMethod('POST'); - $client->setAuth($this->username, $this->password); - $client->setHeader('Content-Type', 'application/json'); - $client->setBody(json_encode($activity->asArray())); - - $response = $client->send(); + $response = $this->postJSON($this->server . $endpoint, $activity->asArray()); if (!$response->isOK()) { throw new Exception("Error " . $response->getStatus() . " checking spam score: " . $response->getBody()); @@ -153,4 +135,45 @@ class SpamFilter { return $this->trainActivity($activity, $category); } } + + function postJSON($url, $body) + { + $request = OAuthRequest::from_consumer_and_token($this->consumer, + $this->token, + 'POST', + $url); + + $request->sign_request($this->sha1_method, + $this->consumer, + $this->token); + + $hclient = new HTTPClient($url); + + $hclient->setConfig(array('connect_timeout' => 120, + 'timeout' => 120, + 'follow_redirects' => true, + 'ssl_verify_peer' => false, + 'ssl_verify_host' => false)); + + $hclient->setMethod(HTTP_Request2::METHOD_POST); + $hclient->setBody(json_encode($body)); + $hclient->setHeader('Content-Type', 'application/json'); + $hclient->setHeader($request->to_header()); + + // Twitter is strict about accepting invalid "Expect" headers + // No reason not to clear it still here -ESP + + $hclient->setHeader('Expect', ''); + + try { + $response = $hclient->send(); + $code = $response->getStatus(); + if ($code < 200 || $code >= 400) { + throw new OAuthClientException($response->getBody(), $code); + } + return $response->getBody(); + } catch (Exception $e) { + throw new OAuthClientException($e->getMessage(), $e->getCode()); + } + } } From c4cc7dbe64bcb021c2cf082039e826eef617b0af Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 19 Mar 2012 00:55:51 -0400 Subject: [PATCH 39/47] better error output in testuser.php --- scripts/testuser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/testuser.php b/scripts/testuser.php index f28a929c46..357e04a7c2 100644 --- a/scripts/testuser.php +++ b/scripts/testuser.php @@ -80,7 +80,7 @@ function testUser($filter, $user) { Spam_score::save($notice, $result); printfv("%s\n", ($result->isSpam) ? "SPAM" : "HAM"); } catch (Exception $e) { - printfnq("ERROR testing notice %d\n: %s", $notice->id, $e->getMessage()); + printfnq("ERROR testing notice %d: %s\n", $notice->id, $e->getMessage()); } } $offset += $notice->N; From 6a49e2632118203fbd8526ea626bf6e2c4b3bc3c Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 19 Mar 2012 01:59:26 -0400 Subject: [PATCH 40/47] better propagation of HTTP response --- spamfilter.php | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/spamfilter.php b/spamfilter.php index 47246b35c5..3ddfdad039 100644 --- a/spamfilter.php +++ b/spamfilter.php @@ -78,10 +78,6 @@ class SpamFilter extends OAuthClient { $response = $this->postJSON($this->server . "/is-this-spam", $activity->asArray()); - if (!$response->isOK()) { - throw new Exception("Error " . $response->getStatus() . " checking spam score: " . $response->getBody()); - } - $result = json_decode($response->getBody()); return $result; @@ -109,10 +105,6 @@ class SpamFilter extends OAuthClient { $response = $this->postJSON($this->server . $endpoint, $activity->asArray()); - if (!$response->isOK()) { - throw new Exception("Error " . $response->getStatus() . " checking spam score: " . $response->getBody()); - } - // We don't do much with the results return true; } @@ -168,10 +160,10 @@ class SpamFilter extends OAuthClient { try { $response = $hclient->send(); $code = $response->getStatus(); - if ($code < 200 || $code >= 400) { + if (!$response->isOK()) { throw new OAuthClientException($response->getBody(), $code); } - return $response->getBody(); + return $response; } catch (Exception $e) { throw new OAuthClientException($e->getMessage(), $e->getCode()); } From 717bb5f456fd2b994f4e17f33540557e1fff90d3 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 19 Mar 2012 10:15:33 -0400 Subject: [PATCH 41/47] Get the score if needed on-demand --- ActivitySpamPlugin.php | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index b6871d4cd9..35e12dc33d 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -173,7 +173,7 @@ class ActivitySpamPlugin extends Plugin if (!empty($notice)) { - $score = Spam_score::staticGet('notice_id', $notice->id); + $score = $this->getScore($notice); if (empty($score)) { $this->debug("No score for notice " . $notice->id); @@ -243,4 +243,19 @@ class ActivitySpamPlugin extends Plugin _m('Test notices against the Activity Spam service.')); return true; } + + function getScore($notice) + { + $score = Spam_score::staticGet('notice_id', $notice->id); + + if (!empty($score)) { + return $score; + } + + $result = $this->filter->test($notice); + + $score = Spam_score::saveNew($notice, $result); + + return $score; + } } From cfef7af2ae69322eb65394dbe493d242bb632903 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 19 Mar 2012 10:30:54 -0400 Subject: [PATCH 42/47] Only show training buttons if you can train --- ActivitySpamPlugin.php | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index 35e12dc33d..5e13d3610f 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -168,22 +168,27 @@ class ActivitySpamPlugin extends Plugin function onEndShowNoticeOptionItems($nli) { - $notice = $nli->getNotice(); - $out = $nli->getOut(); + $profile = Profile::current(); - if (!empty($notice)) { + if (!empty($profile) && $profile->hasRight(self::TRAINSPAM)) { - $score = $this->getScore($notice); + $notice = $nli->getNotice(); + $out = $nli->getOut(); - if (empty($score)) { - $this->debug("No score for notice " . $notice->id); - // XXX: show a question-mark or something - } else if ($score->is_spam) { - $form = new TrainHamForm($out, $notice); - $form->show(); - } else if (!$score->is_spam) { - $form = new TrainSpamForm($out, $notice); - $form->show(); + if (!empty($notice)) { + + $score = $this->getScore($notice); + + if (empty($score)) { + $this->debug("No score for notice " . $notice->id); + // XXX: show a question-mark or something + } else if ($score->is_spam) { + $form = new TrainHamForm($out, $notice); + $form->show(); + } else if (!$score->is_spam) { + $form = new TrainSpamForm($out, $notice); + $form->show(); + } } } From 76e6dd00adae7d1ba1d74f77ff753b8a0383281d Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Mon, 19 Mar 2012 11:53:53 -0400 Subject: [PATCH 43/47] always rw --- ActivitySpamPlugin.php | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index 5e13d3610f..fddad37028 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -257,10 +257,25 @@ class ActivitySpamPlugin extends Plugin return $score; } - $result = $this->filter->test($notice); + try { - $score = Spam_score::saveNew($notice, $result); + $result = $this->filter->test($notice); + + $score = Spam_score::saveNew($notice, $result); + + $this->log(LOG_INFO, "Notice " . $notice->id . " has spam score " . $score->score); + + } catch (Exception $e) { + // Log but continue + $this->log(LOG_ERR, $e->getMessage()); + $score = null; + } return $score; } + + function onStartReadWriteTables(&$alwaysRW, &$rwdb) { + $alwaysRW[] = 'spam_score'; + return true; + } } From d016ce846e9befc9c38f623c6ab9488d4a347fdd Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Tue, 20 Mar 2012 20:48:47 -0400 Subject: [PATCH 44/47] Hide spam notices --- ActivitySpamPlugin.php | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index fddad37028..b294016259 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -278,4 +278,22 @@ class ActivitySpamPlugin extends Plugin $alwaysRW[] = 'spam_score'; return true; } + + function onEndNoticeInScope($notice, $profile, &$bResult) + { + if ($bResult) { + + $score = $this->getScore($notice); + + if ($score->is_spam) { + if (empty($profile) || + ($profile->id !== $notice->profile_id && + !$profile->hasRight(self::REVIEWSPAM))) { + $bResult = false; + } + } + } + + return $true; + } } From 6f2f302004d4508c5aefec61942c54f2450dd824 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Tue, 20 Mar 2012 21:37:35 -0400 Subject: [PATCH 45/47] Only hide spam if set --- ActivitySpamPlugin.php | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index b294016259..6dde92759d 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -47,6 +47,7 @@ if (!defined('STATUSNET')) { class ActivitySpamPlugin extends Plugin { public $server = null; + public $hideSpam = false; const REVIEWSPAM = 'ActivitySpamPlugin::REVIEWSPAM'; const TRAINSPAM = 'ActivitySpamPlugin::TRAINSPAM'; @@ -62,6 +63,8 @@ class ActivitySpamPlugin extends Plugin common_config('activityspam', 'consumerkey'), common_config('activityspam', 'secret')); + $this->hideSpam = common_config('activityspam', 'hidespam'); + return true; } @@ -281,15 +284,17 @@ class ActivitySpamPlugin extends Plugin function onEndNoticeInScope($notice, $profile, &$bResult) { - if ($bResult) { + if ($this->hideSpam) { + if ($bResult) { - $score = $this->getScore($notice); + $score = $this->getScore($notice); - if ($score->is_spam) { - if (empty($profile) || - ($profile->id !== $notice->profile_id && - !$profile->hasRight(self::REVIEWSPAM))) { - $bResult = false; + if ($score->is_spam) { + if (empty($profile) || + ($profile->id !== $notice->profile_id && + !$profile->hasRight(self::REVIEWSPAM))) { + $bResult = false; + } } } } From a1c2ec2c631ef319b179ee1949b054c6d6922137 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 21 Mar 2012 12:37:00 -0400 Subject: [PATCH 46/47] Bad variable --- ActivitySpamPlugin.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index 6dde92759d..d80f19cf31 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -299,6 +299,6 @@ class ActivitySpamPlugin extends Plugin } } - return $true; + return true; } } From 69ec86a3dce3760abeafa5d7c8916057fb97faf6 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Wed, 21 Mar 2012 13:21:14 -0400 Subject: [PATCH 47/47] Use only stored scores and pre-cache them --- ActivitySpamPlugin.php | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/ActivitySpamPlugin.php b/ActivitySpamPlugin.php index d80f19cf31..a905e72cca 100644 --- a/ActivitySpamPlugin.php +++ b/ActivitySpamPlugin.php @@ -282,14 +282,15 @@ class ActivitySpamPlugin extends Plugin return true; } + function onEndNoticeInScope($notice, $profile, &$bResult) { if ($this->hideSpam) { if ($bResult) { - $score = $this->getScore($notice); + $score = Spam_score::staticGet('notice_id', $notice->id); - if ($score->is_spam) { + if (!empty($score) && $score->is_spam) { if (empty($profile) || ($profile->id !== $notice->profile_id && !$profile->hasRight(self::REVIEWSPAM))) { @@ -301,4 +302,17 @@ class ActivitySpamPlugin extends Plugin return true; } + + /** + * Pre-cache our spam scores if needed. + */ + function onEndNoticeListPrefill(&$notices, &$profiles, $avatarSize) { + if ($this->hideSpam) { + foreach ($notices as $notice) { + $ids[] = $notice->id; + } + Memcached_DataObject::multiGet('Spam_score', 'notice_id', $ids); + } + return true; + } }