#!/usr/bin/env php <?php /* * StatusNet - the distributed open-source microblogging tool * Copyright (C) 2010 StatusNet, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ define('INSTALLDIR', realpath(dirname(__FILE__) . '/..')); $shortoptions = 'i:n:f:'; $longoptions = array('id=', 'nickname=', 'file='); $helptext = <<<END_OF_IMPORTTWITTERATOM_HELP importtwitteratom.php [options] import an Atom feed from Twitter as notices by a user -i --id ID of user to update -n --nickname nickname of the user to update -f --file file to import (Atom-only for now) END_OF_IMPORTTWITTERATOM_HELP; require_once INSTALLDIR.'/scripts/commandline.inc'; require_once INSTALLDIR.'/extlib/htmLawed/htmLawed.php'; function getUser() { $user = null; if (have_option('i', 'id')) { $id = get_option_value('i', 'id'); $user = User::staticGet('id', $id); if (empty($user)) { throw new Exception("Can't find user with id '$id'."); } } else if (have_option('n', 'nickname')) { $nickname = get_option_value('n', 'nickname'); $user = User::staticGet('nickname', $nickname); if (empty($user)) { throw new Exception("Can't find user with nickname '$nickname'"); } } else { show_help(); exit(1); } return $user; } function getAtomFeedDocument() { $filename = get_option_value('f', 'file'); if (empty($filename)) { show_help(); exit(1); } if (!file_exists($filename)) { throw new Exception("No such file '$filename'."); } if (!is_file($filename)) { throw new Exception("Not a regular file: '$filename'."); } if (!is_readable($filename)) { throw new Exception("File '$filename' not readable."); } $xml = file_get_contents($filename); $dom = DOMDocument::loadXML($xml); if ($dom->documentElement->namespaceURI != Activity::ATOM || $dom->documentElement->localName != 'feed') { throw new Exception("'$filename' is not an Atom feed."); } return $dom; } function importActivityStream($user, $doc) { $feed = $doc->documentElement; $entries = $feed->getElementsByTagNameNS(Activity::ATOM, 'entry'); for ($i = $entries->length - 1; $i >= 0; $i--) { $entry = $entries->item($i); $activity = new Activity($entry, $feed); $object = $activity->objects[0]; if (!have_option('q', 'quiet')) { print $activity->content . "\n"; } $html = getTweetHtml($object->link); $config = array('safe' => 1, 'deny_attribute' => 'class,rel,id,style,on*'); $html = htmLawed($html, $config); $content = html_entity_decode(strip_tags($html)); $notice = Notice::saveNew($user->id, $content, 'importtwitter', array('uri' => $object->id, 'url' => $object->link, 'rendered' => $html, 'created' => common_sql_date($activity->time), 'replies' => array(), 'groups' => array())); } } function getTweetHtml($url) { try { $client = new HTTPClient(); $response = $client->get($url); } catch (HTTP_Request2_Exception $e) { print "ERROR: HTTP response " . $e->getMessage() . "\n"; return false; } if (!$response->isOk()) { print "ERROR: HTTP response " . $response->getCode() . "\n"; return false; } $body = $response->getBody(); return tweetHtmlFromBody($body); } function tweetHtmlFromBody($body) { $doc = DOMDocument::loadHTML($body); $xpath = new DOMXPath($doc); $spans = $xpath->query('//span[@class="entry-content"]'); if ($spans->length == 0) { print "ERROR: No content in tweet page.\n"; return ''; } $span = $spans->item(0); $children = $span->childNodes; $text = ''; for ($i = 0; $i < $children->length; $i++) { $child = $children->item($i); if ($child instanceof DOMElement && $child->tagName == 'a' && !preg_match('#^https?://#', $child->getAttribute('href'))) { $child->setAttribute('href', 'http://twitter.com' . $child->getAttribute('href')); } $text .= $doc->saveXML($child); } return $text; } try { $doc = getAtomFeedDocument(); $user = getUser(); importActivityStream($user, $doc); } catch (Exception $e) { print $e->getMessage()."\n"; exit(1); }