FeedPoller plugin, for hubless feeds

This commit is contained in:
Mikael Nordfeldth 2015-01-14 01:16:28 +01:00
parent 57d8eb8a53
commit 8594a2ba16
5 changed files with 243 additions and 1 deletions

View File

@ -0,0 +1,61 @@
<?php
/**
* GNU social feed polling plugin, to avoid using external PuSH hubs
*
* @category Feed
* @package GNUsocial
* @author Mikael Nordfeldth <mmn@hethane.se>
* @copyright 2013 Free Software Foundation, Inc.
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
* @link http://www.gnu.org/software/social/
*/
if (!defined('GNUSOCIAL')) { exit(1); }
class FeedPollerPlugin extends Plugin {
public $interval = 5; // interval in minutes for feed checks
public function onEndInitializeQueueManager(QueueManager $qm)
{
$qm->connect(FeedPoll::QUEUE_CHECK, 'FeedPollQueueHandler');
return true;
}
public function onCronMinutely()
{
$args = array('interval'=>$this->interval);
FeedPoll::enqueueNewFeeds($args);
return true;
}
public function onFeedSubscribe(FeedSub $feedsub)
{
if (!$feedsub->isPuSH()) {
FeedPoll::setupFeedSub($feedsub, $this->interval*60);
return false; // We're polling this feed, so stop processing FeedSubscribe
}
return true;
}
public function onFeedUnsubscribe(FeedSub $feedsub)
{
if (!$feedsub->isPuSH()) {
// removes sub_state setting and such
$feedsub->confirmUnsubscribe();
return false;
}
return true;
}
public function onPluginVersion(&$versions)
{
$versions[] = array('name' => 'FeedPoller',
'version' => GNUSOCIAL_VERSION,
'author' => 'Mikael Nordfeldth',
'homepage' => 'http://www.gnu.org/software/social/',
'description' =>
// TRANS: Plugin description.
_m('Feed polling plugin to avoid using external push hubs.'));
return true;
}
}

View File

@ -0,0 +1,69 @@
<?php
/**
* Store last poll time in db, then check if they should be renewed (if so, enqueue).
* Can be called from a queue handler on a per-feed status to poll stuff.
*
* Used as internal feed polling mechanism (atom/rss)
*
* @category OStatus
* @package GNUsocial
* @author Mikael Nordfeldth <mmn@hethane.se>
* @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3
* @link http://www.gnu.org/software/social/
*/
if (!defined('GNUSOCIAL')) { exit(1); }
class FeedPoll {
const DEFAULT_INTERVAL = 5; // in minutes
const QUEUE_CHECK = 'feedpoll-check';
// TODO: Find some smart way to add feeds only once, so they don't get more than 1 feedpoll in the queue each
// probably through sub_start sub_end trickery.
public static function enqueueNewFeeds(array $args=array()) {
if (!isset($args['interval']) || !is_int($args['interval']) || $args['interval']<=0) {
$args['interval'] = self::DEFAULT_INTERVAL;
}
$args['interval'] *= 60; // minutes to seconds
$feedsub = new FeedSub();
$feedsub->sub_state = 'nohub';
// Find feeds that haven't been polled within the desired interval,
// though perhaps we're abusing the "last_update" field here?
$feedsub->whereAdd(sprintf('last_update < "%s"', common_sql_date(time()-$args['interval'])));
$feedsub->find();
$qm = QueueManager::get();
while ($feedsub->fetch()) {
$orig = clone($feedsub);
$item = array('id' => $feedsub->id);
$qm->enqueue($item, self::QUEUE_CHECK);
common_debug('Enqueueing FeedPoll feeds, currently: '.$feedsub->uri);
$feedsub->last_update = common_sql_now();
$feedsub->update($orig);
}
}
public function setupFeedSub(FeedSub $feedsub, $interval=300)
{
$orig = clone($feedsub);
$feedsub->sub_state = 'nohub';
$feedsub->sub_start = common_sql_date(time());
$feedsub->sub_end = '';
$feedsub->last_update = common_sql_date(time()-$interval); // force polling as soon as we can
$feedsub->update($orig);
}
public function checkUpdates(FeedSub $feedsub)
{
$request = new HTTPClient();
common_debug('Enqueueing FeedPoll feeds went well, now checking updates for: '.$feedsub->getUri());
$feed = $request->get($feedsub->uri);
if (!$feed->isOk()) {
throw new ServerException('FeedSub could not fetch id='.$feedsub->id.' (Error '.$feed->getStatus().': '.$feed->getBody());
}
$feedsub->receive($feed->getBody(), null);
}
}

View File

@ -0,0 +1,41 @@
<?php
if (!defined('GNUSOCIAL')) { exit(1); }
/**
* Poll a feed based on its urlhash, the full url is in the feedsub table
*
* @author Mikael Nordfeldth <mmn@hethane.se>
*/
class FeedPollQueueHandler extends QueueHandler
{
public function transport()
{
return FeedPoll::QUEUE_CHECK;
}
public function handle($item)
{
common_debug('Enqueueing FeedPoll feeds but actually running the queue handler!');
$feedsub = FeedSub::getKV('id', $item['id']);
if (!$feedsub instanceof FeedSub) {
// Removed from the feedsub table I guess
return true;
}
if (!$feedsub->sub_state == 'nohub') {
// We're not supposed to poll this (either it's PuSH or it's unsubscribed)
return true;
}
common_debug('Enqueueing FeedPoll feeds but actually checking updates');
try {
FeedPoll::checkUpdates($feedsub);
} catch (Exception $e) {
common_log(LOG_ERR, "Failed to check feedsub id= ".$feedsub->id.' ("'.$e->getMessage().'")');
}
common_debug('Enqueueing FeedPoll feeds but actually done with '.$feedsub->id);
return true;
}
}

View File

@ -0,0 +1,71 @@
#!/usr/bin/env php
<?php
/*
* StatusNet - a distributed open-source microblogging tool
* Copyright (C) 2010, StatusNet, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../..'));
$helptext = <<<END_OF_HELP
pollfeed.php feeduri
Poll the feed, assuming it has sub_state 'nohub'.
END_OF_HELP;
require_once INSTALLDIR.'/scripts/commandline.inc';
require_once(__DIR__ . '/../lib/feedpoll.php');
if (empty($args[0]) || !Validate::uri($args[0])) {
echo "$helptext\n";
exit(1);
}
$uri = $args[0];
$feedsub = FeedSub::getKV('uri', $uri);
if (!$feedsub instanceof FeedSub) {
echo "No FeedSub feed known for URI $uri\n";
exit(1);
}
if ($feedsub->sub_state != 'nohub') {
echo "Feed is a PuSH feed, so we will not poll it.\n";
exit(1);
}
showSub($feedsub);
try {
FeedPoll::checkUpdates($feedsub);
} catch (Exception $e) {
echo "Could not check updates for feed: ".$e->getMessage();
echo $e->getTraceAsString();
exit(1);
}
function showSub(FeedSub $sub)
{
echo " Subscription state: $sub->sub_state\n";
echo " Signature secret: $sub->secret\n";
echo " Sub start date: $sub->sub_start\n";
echo " Record created: $sub->created\n";
echo " Record modified: $sub->modified\n";
}

View File

@ -435,7 +435,7 @@ class FeedSub extends Managed_DataObject
{
common_log(LOG_INFO, __METHOD__ . ": packet for \"" . $this->getUri() . "\"! $hmac $post");
if ($this->sub_state != 'active') {
if (!in_array($this->sub_state, array('active', 'nohub'))) {
common_log(LOG_ERR, __METHOD__ . ": ignoring PuSH for inactive feed " . $this->getUri() . " (in state '$this->sub_state')");
return;
}