New SpamFilter class
This commit is contained in:
@@ -47,7 +47,6 @@ if (!defined('STATUSNET')) {
|
|||||||
class ActivitySpamPlugin extends Plugin
|
class ActivitySpamPlugin extends Plugin
|
||||||
{
|
{
|
||||||
public $server = null;
|
public $server = null;
|
||||||
|
|
||||||
public $username = null;
|
public $username = null;
|
||||||
public $password = null;
|
public $password = null;
|
||||||
|
|
||||||
@@ -64,6 +63,8 @@ class ActivitySpamPlugin extends Plugin
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$this->filter = new SpamFilter($this->server, $this->username, $this->password);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -103,6 +104,9 @@ class ActivitySpamPlugin extends Plugin
|
|||||||
case 'Spam_score':
|
case 'Spam_score':
|
||||||
include_once $dir . '/'.$cls.'.php';
|
include_once $dir . '/'.$cls.'.php';
|
||||||
return false;
|
return false;
|
||||||
|
case 'SpamFilter':
|
||||||
|
include_once $dir . '/'.strtolower($cls).'.php';
|
||||||
|
return false;
|
||||||
default:
|
default:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -118,40 +122,19 @@ class ActivitySpamPlugin extends Plugin
|
|||||||
|
|
||||||
function onEndNoticeSave($notice)
|
function onEndNoticeSave($notice)
|
||||||
{
|
{
|
||||||
// FIXME: need this to autoload ActivityStreamsMediaLink
|
try {
|
||||||
$doc = new ActivityStreamJSONDocument();
|
|
||||||
|
|
||||||
$activity = $notice->asActivity(null);
|
$result = $this->filter->test($notice);
|
||||||
|
|
||||||
$client = new HTTPClient($this->server . "/is-this-spam");
|
$score = Spam_score::saveNew($notice, $result);
|
||||||
|
|
||||||
$client->setMethod('POST');
|
$this->log(LOG_INFO, "Notice " . $notice->id . " has spam score " . $score->score);
|
||||||
$client->setAuth($this->username, $this->password);
|
|
||||||
$client->setHeader('Content-Type', 'application/json');
|
|
||||||
$client->setBody(json_encode($activity->asArray()));
|
|
||||||
|
|
||||||
$response = $client->send();
|
} catch (Exception $e) {
|
||||||
|
// Log but continue
|
||||||
if (!$response->isOK()) {
|
$this->log(LOG_ERR, $e->getMessage());
|
||||||
$this->log(LOG_ERR, "Error " . $response->getStatus() . " checking spam score: " . $response->getBody());
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$result = json_decode($response->getBody());
|
|
||||||
|
|
||||||
$score = new Spam_score();
|
|
||||||
|
|
||||||
$score->notice_id = $notice->id;
|
|
||||||
$score->score = $result->probability;
|
|
||||||
$score->is_spam = $result->isSpam;
|
|
||||||
$score->scaled = Spam_score::scale($score->score);
|
|
||||||
$score->created = common_sql_now();
|
|
||||||
$score->notice_created = $notice->created;
|
|
||||||
|
|
||||||
$score->insert();
|
|
||||||
|
|
||||||
$this->log(LOG_INFO, "Notice " . $notice->id . " has spam score " . $score->score);
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -67,6 +67,22 @@ class Spam_score extends Managed_DataObject
|
|||||||
return Managed_DataObject::staticGet('Spam_score', $k, $v);
|
return Managed_DataObject::staticGet('Spam_score', $k, $v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function saveNew($notice, $result) {
|
||||||
|
|
||||||
|
$score = new Spam_score();
|
||||||
|
|
||||||
|
$score->notice_id = $notice->id;
|
||||||
|
$score->score = $result->probability;
|
||||||
|
$score->is_spam = $result->isSpam;
|
||||||
|
$score->scaled = Spam_score::scale($score->score);
|
||||||
|
$score->created = common_sql_now();
|
||||||
|
$score->notice_created = $notice->created;
|
||||||
|
|
||||||
|
$score->insert();
|
||||||
|
|
||||||
|
return $score;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The One True Thingy that must be defined and declared.
|
* The One True Thingy that must be defined and declared.
|
||||||
*/
|
*/
|
||||||
|
156
spamfilter.php
Normal file
156
spamfilter.php
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* StatusNet - the distributed open-source microblogging tool
|
||||||
|
* Copyright (C) 2012, StatusNet, Inc.
|
||||||
|
*
|
||||||
|
* Spam filter class
|
||||||
|
*
|
||||||
|
* PHP version 5
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* @category Spam
|
||||||
|
* @package StatusNet
|
||||||
|
* @author Evan Prodromou <evan@status.net>
|
||||||
|
* @copyright 2012 StatusNet, Inc.
|
||||||
|
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
|
||||||
|
* @link http://status.net/
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (!defined('STATUSNET')) {
|
||||||
|
// This check helps protect against security problems;
|
||||||
|
// your code file can't be executed directly from the web.
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Spam filter class
|
||||||
|
*
|
||||||
|
* Local proxy for remote filter
|
||||||
|
*
|
||||||
|
* @category Spam
|
||||||
|
* @package StatusNet
|
||||||
|
* @author Evan Prodromou <evan@status.net>
|
||||||
|
* @copyright 2012 StatusNet, Inc.
|
||||||
|
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
|
||||||
|
* @link http://status.net/
|
||||||
|
*/
|
||||||
|
|
||||||
|
class SpamFilter {
|
||||||
|
|
||||||
|
const HAM = 'ham';
|
||||||
|
const SPAM = 'spam';
|
||||||
|
|
||||||
|
public $server;
|
||||||
|
public $username;
|
||||||
|
public $password;
|
||||||
|
|
||||||
|
function __construct($server, $username, $password) {
|
||||||
|
|
||||||
|
$this->server = $server;
|
||||||
|
$this->username = $username;
|
||||||
|
$this->password = $password;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function toActivity($notice) {
|
||||||
|
// FIXME: need this to autoload ActivityStreamsMediaLink
|
||||||
|
$doc = new ActivityStreamJSONDocument();
|
||||||
|
|
||||||
|
$activity = $notice->asActivity(null);
|
||||||
|
|
||||||
|
return $activity;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function test($notice) {
|
||||||
|
|
||||||
|
$activity = $this->toActivity($notice);
|
||||||
|
return $this->testActivity($activity);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testActivity($activity) {
|
||||||
|
|
||||||
|
$client = new HTTPClient($this->server . "/is-this-spam");
|
||||||
|
|
||||||
|
$client->setMethod('POST');
|
||||||
|
$client->setAuth($this->username, $this->password);
|
||||||
|
$client->setHeader('Content-Type', 'application/json');
|
||||||
|
$client->setBody(json_encode($activity->asArray()));
|
||||||
|
|
||||||
|
$response = $client->send();
|
||||||
|
|
||||||
|
if (!$response->isOK()) {
|
||||||
|
throw new Exception("Error " . $response->getStatus() . " checking spam score: " . $response->getBody());
|
||||||
|
}
|
||||||
|
|
||||||
|
$result = json_decode($response->getBody());
|
||||||
|
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function train($notice, $category) {
|
||||||
|
|
||||||
|
$activity = $this->toActivity($notice);
|
||||||
|
return $this->trainActivity($activity, $category);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public function trainActivity($activity, $category) {
|
||||||
|
|
||||||
|
switch ($category) {
|
||||||
|
case self::HAM:
|
||||||
|
$endpoint = '/this-is-ham';
|
||||||
|
break;
|
||||||
|
case self::SPAM:
|
||||||
|
$endpoint = '/this-is-spam';
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Exception("Unknown category: " + $category);
|
||||||
|
}
|
||||||
|
|
||||||
|
$client = new HTTPClient($this->server . $endpoint);
|
||||||
|
|
||||||
|
$client->setMethod('POST');
|
||||||
|
$client->setAuth($this->username, $this->password);
|
||||||
|
$client->setHeader('Content-Type', 'application/json');
|
||||||
|
$client->setBody(json_encode($activity->asArray()));
|
||||||
|
|
||||||
|
$response = $client->send();
|
||||||
|
|
||||||
|
if (!$response->isOK()) {
|
||||||
|
throw new Exception("Error " . $response->getStatus() . " checking spam score: " . $response->getBody());
|
||||||
|
}
|
||||||
|
|
||||||
|
// We don't do much with the results
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function trainOnError($notice, $category) {
|
||||||
|
|
||||||
|
$activity = $this->toActivity($notice);
|
||||||
|
|
||||||
|
return $this->trainActivityOnError($activity, $category);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function trainActivityOnError($activity, $category) {
|
||||||
|
|
||||||
|
$result = $this->testActivity($activity);
|
||||||
|
|
||||||
|
if (($category === self::SPAM && $result->isSpam) ||
|
||||||
|
($category === self::HAM && !$result->isSpam)) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return $this->trainActivity($activity, $category);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Reference in New Issue
Block a user