gnu-social/scripts/importtwitteratom.php

164 lines
4.5 KiB
PHP
Raw Permalink Normal View History

#!/usr/bin/env php
<?php
/*
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2010 StatusNet, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
define('INSTALLDIR', dirname(__DIR__));
define('PUBLICDIR', INSTALLDIR . DIRECTORY_SEPARATOR . 'public');
$shortoptions = 'i:n:f:';
$longoptions = array('id=', 'nickname=', 'file=');
$helptext = <<<END_OF_IMPORTTWITTERATOM_HELP
importtwitteratom.php [options]
import an Atom feed from Twitter as notices by a user
-i --id ID of user to update
-n --nickname nickname of the user to update
-f --file file to import (Atom-only for now)
END_OF_IMPORTTWITTERATOM_HELP;
require_once INSTALLDIR.'/scripts/commandline.inc';
function getAtomFeedDocument()
{
$filename = get_option_value('f', 'file');
if (empty($filename)) {
show_help();
exit(1);
}
if (!file_exists($filename)) {
throw new Exception("No such file '$filename'.");
}
if (!is_file($filename)) {
throw new Exception("Not a regular file: '$filename'.");
}
if (!is_readable($filename)) {
throw new Exception("File '$filename' not readable.");
}
$xml = file_get_contents($filename);
$dom = DOMDocument::loadXML($xml);
if ($dom->documentElement->namespaceURI != Activity::ATOM ||
$dom->documentElement->localName != 'feed') {
throw new Exception("'$filename' is not an Atom feed.");
}
return $dom;
}
function importActivityStream($user, $doc)
{
$feed = $doc->documentElement;
$entries = $feed->getElementsByTagNameNS(Activity::ATOM, 'entry');
for ($i = $entries->length - 1; $i >= 0; $i--) {
$entry = $entries->item($i);
$activity = new Activity($entry, $feed);
$object = $activity->objects[0];
if (!have_option('q', 'quiet')) {
print $activity->content . "\n";
}
$html = common_purify(getTweetHtml($object->link));
$content = html_entity_decode(strip_tags($html), ENT_QUOTES, 'UTF-8');
$notice = Notice::saveNew($user->id,
$content,
'importtwitter',
array('uri' => $object->id,
'url' => $object->link,
'rendered' => $html,
'created' => common_sql_date($activity->time),
'replies' => array(),
'groups' => array()));
}
}
function getTweetHtml($url)
{
try {
$client = new HTTPClient();
$response = $client->get($url);
} catch (Exception $e) {
print "ERROR: HTTP response " . $e->getMessage() . "\n";
return false;
}
if (!$response->isOk()) {
print "ERROR: HTTP response " . $response->getCode() . "\n";
return false;
}
$body = $response->getBody();
return tweetHtmlFromBody($body);
}
function tweetHtmlFromBody($body)
{
$doc = DOMDocument::loadHTML($body);
$xpath = new DOMXPath($doc);
$spans = $xpath->query('//span[@class="entry-content"]');
if ($spans->length == 0) {
print "ERROR: No content in tweet page.\n";
return '';
}
$span = $spans->item(0);
$children = $span->childNodes;
$text = '';
for ($i = 0; $i < $children->length; $i++) {
$child = $children->item($i);
if ($child instanceof DOMElement &&
$child->tagName == 'a' &&
!preg_match('#^https?://#', $child->getAttribute('href'))) {
$child->setAttribute('href', 'http://twitter.com' . $child->getAttribute('href'));
}
$text .= $doc->saveXML($child);
}
return $text;
}
try {
$doc = getAtomFeedDocument();
$user = getUser();
importActivityStream($user, $doc);
} catch (Exception $e) {
print $e->getMessage()."\n";
exit(1);
}