From 3b304fc0efd0bb4d75b964fe5585703d113d9c99 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 4 Oct 2010 18:28:54 -0700 Subject: [PATCH 01/18] Initial stub code for pulling data from Twitter's User Streams and Site Streams interfaces. This should allow us to make a much more efficient background importer which can use a relatively small number of connections getting push data for either a single user or for many users with credentials on the site. --- plugins/TwitterBridge/jsonstreamreader.php | 224 ++++++++++++++++++ plugins/TwitterBridge/scripts/streamtest.php | 142 +++++++++++ plugins/TwitterBridge/twitterstreamreader.php | 161 +++++++++++++ 3 files changed, 527 insertions(+) create mode 100644 plugins/TwitterBridge/jsonstreamreader.php create mode 100644 plugins/TwitterBridge/scripts/streamtest.php create mode 100644 plugins/TwitterBridge/twitterstreamreader.php diff --git a/plugins/TwitterBridge/jsonstreamreader.php b/plugins/TwitterBridge/jsonstreamreader.php new file mode 100644 index 0000000000..ad8e2626ad --- /dev/null +++ b/plugins/TwitterBridge/jsonstreamreader.php @@ -0,0 +1,224 @@ +. + * + * @category Plugin + * @package StatusNet + * @author Brion Vibber + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class OAuthData +{ + public $consumer_key, $consumer_secret, $token, $token_secret; +} + +/** + * + */ +abstract class JsonStreamReader +{ + const CRLF = "\r\n"; + + public $id; + protected $socket = null; + protected $state = 'init'; // 'init', 'connecting', 'waiting', 'headers', 'active' + + public function __construct() + { + $this->id = get_class($this) . '.' . substr(md5(mt_rand()), 0, 8); + } + + /** + * Starts asynchronous connect operation... + * + * @param $url + */ + public function connect($url) + { + common_log(LOG_DEBUG, "$this->id opening connection to $url"); + + $scheme = parse_url($url, PHP_URL_SCHEME); + if ($scheme == 'http') { + $rawScheme = 'tcp'; + } else if ($scheme == 'https') { + $rawScheme = 'ssl'; + } else { + throw new ServerException('Invalid URL scheme for HTTP stream reader'); + } + + $host = parse_url($url, PHP_URL_HOST); + $port = parse_url($url, PHP_URL_PORT); + if (!$port) { + if ($scheme == 'https') { + $port = 443; + } else { + $port = 80; + } + } + + $path = parse_url($url, PHP_URL_PATH); + $query = parse_url($url, PHP_URL_QUERY); + if ($query) { + $path .= '?' . $query; + } + + $errno = $errstr = null; + $timeout = 5; + //$flags = STREAM_CLIENT_CONNECT | STREAM_CLIENT_ASYNC_CONNECT; + $flags = STREAM_CLIENT_CONNECT; + // @fixme add SSL params + $this->socket = stream_socket_client("$rawScheme://$host:$port", $errno, $errstr, $timeout, $flags); + + $this->send($this->httpOpen($host, $path)); + + stream_set_blocking($this->socket, false); + $this->state = 'waiting'; + } + + function send($buffer) + { + echo "Writing...\n"; + var_dump($buffer); + fwrite($this->socket, $buffer); + } + + function read() + { + echo "Reading...\n"; + $buffer = fread($this->socket, 65536); + var_dump($buffer); + return $buffer; + } + + protected function httpOpen($host, $path) + { + $lines = array( + "GET $path HTTP/1.1", + "Host: $host", + "User-Agent: StatusNet/" . STATUSNET_VERSION . " (TwitterBridgePlugin)", + "Connection: close", + "", + "" + ); + return implode(self::CRLF, $lines); + } + + /** + * Close the current connection, if open. + */ + public function close() + { + if ($this->isConnected()) { + common_log(LOG_DEBUG, "$this->id closing connection."); + fclose($this->socket); + $this->socket = null; + } + } + + /** + * Are we currently connected? + * + * @return boolean + */ + public function isConnected() + { + return $this->socket !== null; + } + + /** + * Send any sockets we're listening on to the IO manager + * to wait for input. + * + * @return array of resources + */ + public function getSockets() + { + if ($this->isConnected()) { + return array($this->socket); + } + return array(); + } + + /** + * Take a chunk of input over the horn and go go go! :D + * @param string $buffer + */ + function handleInput($socket) + { + if ($this->socket !== $socket) { + throw new Exception('Got input from unexpected socket!'); + } + + $buffer = $this->read(); + switch ($this->state) + { + case 'waiting': + $this->handleInputWaiting($buffer); + break; + case 'headers': + $this->handleInputHeaders($buffer); + break; + case 'active': + $this->handleInputActive($buffer); + break; + default: + throw new Exception('Invalid state in handleInput: ' . $this->state); + } + } + + function handleInputWaiting($buffer) + { + common_log(LOG_DEBUG, "$this->id Does this happen? " . $buffer); + $this->state = 'headers'; + $this->handleInputHeaders($buffer); + } + + function handleInputHeaders($buffer) + { + $lines = explode(self::CRLF, $buffer); + foreach ($lines as $line) { + if ($line == '') { + $this->state = 'active'; + common_log(LOG_DEBUG, "$this->id connection is active!"); + } else { + common_log(LOG_DEBUG, "$this->id read HTTP header: $line"); + $this->responseHeaders[] = $line; + } + } + } + + function handleInputActive($buffer) + { + // One JSON object on each line... + // Will we always deliver on packet boundaries? + $lines = explode("\n", $buffer); + foreach ($lines as $line) { + $data = json_decode($line, true); + if ($data) { + $this->handleJson($data); + } else { + common_log(LOG_ERR, "$this->id received bogus JSON data: " . $line); + } + } + } + + abstract function handleJson(array $data); +} diff --git a/plugins/TwitterBridge/scripts/streamtest.php b/plugins/TwitterBridge/scripts/streamtest.php new file mode 100644 index 0000000000..04d91ef439 --- /dev/null +++ b/plugins/TwitterBridge/scripts/streamtest.php @@ -0,0 +1,142 @@ +. + * + * @category Plugin + * @package StatusNet + * @author Brion Vibber + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../..')); + +$shortoptions = 'n:'; +$longoptions = array('nick='); + +$helptext = << + +Attempts a User Stream connection to Twitter as the given user, dumping +data as it comes. + +ENDOFHELP; + +require_once INSTALLDIR.'/scripts/commandline.inc'; +require_once dirname(dirname(__FILE__)) . '/jsonstreamreader.php'; +require_once dirname(dirname(__FILE__)) . '/twitterstreamreader.php'; + +if (have_option('n')) { + $nickname = get_option_value('n'); +} else if (have_option('nick')) { + $nickname = get_option_value('nickname'); +} else { + show_help($helptext); + exit(0); +} + +/** + * + * @param User $user + * @return TwitterOAuthClient + */ +function twitterAuthForUser(User $user) +{ + $flink = Foreign_link::getByUserID($user->id, + TWITTER_SERVICE); + if (!$flink) { + throw new ServerException("No Twitter config for this user."); + } + + $token = TwitterOAuthClient::unpackToken($flink->credentials); + if (!$token) { + throw new ServerException("No Twitter OAuth credentials for this user."); + } + + return new TwitterOAuthClient($token->key, $token->secret); +} + +function homeStreamForUser(User $user) +{ + $auth = twitterAuthForUser($user); + return new TwitterUserStream($auth); +} + +$user = User::staticGet('nickname', $nickname); +$stream = homeStreamForUser($user); +$stream->hookEvent('raw', function($data) { + var_dump($data); +}); + +class TwitterManager extends IoManager +{ + function __construct(TwitterStreamReader $stream) + { + $this->stream = $stream; + } + + function getSockets() + { + return $this->stream->getSockets(); + } + + function handleInput($data) + { + $this->stream->handleInput($data); + return true; + } + + function start() + { + $this->stream->connect(); + return true; + } + + function finish() + { + $this->stream->close(); + return true; + } + + public static function get() + { + throw new Exception('not a singleton'); + } +} + +class TwitterStreamMaster extends IoMaster +{ + function __construct($id, $ioManager) + { + parent::__construct($id); + $this->ioManager = $ioManager; + } + + /** + * Initialize IoManagers which are appropriate to this instance. + */ + function initManagers() + { + $this->instantiate($this->ioManager); + } +} + +$master = new TwitterStreamMaster('TwitterStream', new TwitterManager($stream)); +$master->init(); +$master->service(); diff --git a/plugins/TwitterBridge/twitterstreamreader.php b/plugins/TwitterBridge/twitterstreamreader.php new file mode 100644 index 0000000000..e746228a38 --- /dev/null +++ b/plugins/TwitterBridge/twitterstreamreader.php @@ -0,0 +1,161 @@ +. + * + * @category Plugin + * @package StatusNet + * @author Brion Vibber + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +// A single stream connection +abstract class TwitterStreamReader extends JsonStreamReader +{ + protected $callbacks = array(); + + function __construct(TwitterOAuthClient $auth, $baseUrl) + { + $this->baseUrl = $baseUrl; + $this->oauth = $auth; + } + + public function connect($method) + { + $url = $this->oAuthUrl($this->baseUrl . '/' . $method); + return parent::connect($url); + } + + /** + * Sign our target URL with OAuth auth stuff. + * + * @param string $url + * @param array $params + * @return string + */ + function oAuthUrl($url, $params=array()) + { + // In an ideal world this would be better encapsulated. :) + $request = OAuthRequest::from_consumer_and_token($this->oauth->consumer, + $this->oauth->token, 'GET', $url, $params); + $request->sign_request($this->oauth->sha1_method, + $this->oauth->consumer, $this->oauth->token); + + return $request->to_url(); + } + /** + * Add an event callback. Available event names include + * 'raw' (all data), 'friends', 'delete', 'scrubgeo', etc + * + * @param string $event + * @param callable $callback + */ + public function hookEvent($event, $callback) + { + $this->callbacks[$event][] = $callback; + } + + /** + * Call event handler callbacks for the given event. + * + * @param string $event + * @param mixed $arg1 ... one or more params to pass on + */ + public function fireEvent($event, $arg1) + { + if (array_key_exists($event, $this->callbacks)) { + $args = array_slice(func_get_args(), 1); + foreach ($this->callbacks[$event] as $callback) { + call_user_func_array($callback, $args); + } + } + } + + function handleJson(array $data) + { + $this->routeMessage($data); + } + + abstract function routeMessage($data); + + function handleMessage($data, $forUserId=null) + { + $this->fireEvent('raw', $data, $forUserId); + $known = array('friends'); + foreach ($known as $key) { + if (isset($data[$key])) { + $this->fireEvent($key, $data[$key], $forUserId); + } + } + } +} + +class TwitterSiteStream extends TwitterStreamReader +{ + protected $userIds; + + public function __construct(TwitterOAuthClient $auth, $baseUrl='https://stream.twitter.com') + { + parent::__construct($auth, $baseUrl); + } + + public function connect($method='2b/site.json') + { + return parent::connect($method); + } + + function followUsers($userIds) + { + $this->userIds = $userIds; + } + + /** + * Each message in the site stream tells us which user ID it should be + * routed to; we'll need that to let the caller know what to do. + * + * @param array $data + */ + function routeMessage($data) + { + parent::handleMessage($data['message'], $data['for_user']); + } +} + +class TwitterUserStream extends TwitterStreamReader +{ + public function __construct(TwitterOAuthClient $auth, $baseUrl='https://userstream.twitter.com') + { + parent::__construct($auth, $baseUrl); + } + + public function connect($method='2/user.json') + { + return parent::connect($method); + } + + /** + * Each message in the user stream is just ready to go. + * + * @param array $data + */ + function routeMessage($data) + { + parent::handleMessage($data, $this->userId); + } +} From 5058e8fd14340fc846b224ccff4e3a27dafc3134 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 5 Oct 2010 12:17:16 -0700 Subject: [PATCH 02/18] Twitter streaming API reader: Cleanup input handling & split from HTTP headers to body --- plugins/TwitterBridge/jsonstreamreader.php | 51 +++++++++++++------ plugins/TwitterBridge/twitterstreamreader.php | 17 +++++-- 2 files changed, 49 insertions(+), 19 deletions(-) diff --git a/plugins/TwitterBridge/jsonstreamreader.php b/plugins/TwitterBridge/jsonstreamreader.php index ad8e2626ad..898766357f 100644 --- a/plugins/TwitterBridge/jsonstreamreader.php +++ b/plugins/TwitterBridge/jsonstreamreader.php @@ -93,18 +93,24 @@ abstract class JsonStreamReader $this->state = 'waiting'; } + /** + * Send some fun data off to the server. + * + * @param string $buffer + */ function send($buffer) { - echo "Writing...\n"; - var_dump($buffer); fwrite($this->socket, $buffer); } + /** + * Read next packet of data from the socket. + * + * @return string + */ function read() { - echo "Reading...\n"; $buffer = fread($this->socket, 65536); - var_dump($buffer); return $buffer; } @@ -195,12 +201,16 @@ abstract class JsonStreamReader { $lines = explode(self::CRLF, $buffer); foreach ($lines as $line) { - if ($line == '') { - $this->state = 'active'; - common_log(LOG_DEBUG, "$this->id connection is active!"); - } else { - common_log(LOG_DEBUG, "$this->id read HTTP header: $line"); - $this->responseHeaders[] = $line; + if ($this->state == 'headers') { + if ($line == '') { + $this->state = 'active'; + common_log(LOG_DEBUG, "$this->id connection is active!"); + } else { + common_log(LOG_DEBUG, "$this->id read HTTP header: $line"); + $this->responseHeaders[] = $line; + } + } else if ($this->state == 'active') { + $this->handleLineActive($line); } } } @@ -211,12 +221,21 @@ abstract class JsonStreamReader // Will we always deliver on packet boundaries? $lines = explode("\n", $buffer); foreach ($lines as $line) { - $data = json_decode($line, true); - if ($data) { - $this->handleJson($data); - } else { - common_log(LOG_ERR, "$this->id received bogus JSON data: " . $line); - } + $this->handleLineActive($line); + } + } + + function handleLineActive($line) + { + if ($line == '') { + // Server sends empty lines as keepalive. + return; + } + $data = json_decode($line, true); + if ($data) { + $this->handleJson($data); + } else { + common_log(LOG_ERR, "$this->id received bogus JSON data: " . $line); } } diff --git a/plugins/TwitterBridge/twitterstreamreader.php b/plugins/TwitterBridge/twitterstreamreader.php index e746228a38..d440bdd4ef 100644 --- a/plugins/TwitterBridge/twitterstreamreader.php +++ b/plugins/TwitterBridge/twitterstreamreader.php @@ -94,11 +94,22 @@ abstract class TwitterStreamReader extends JsonStreamReader abstract function routeMessage($data); - function handleMessage($data, $forUserId=null) + /** + * Send the decoded JSON object out to any event listeners. + * + * @param array $data + * @param int $forUserId + */ + function handleMessage(array $data, $forUserId=null) { $this->fireEvent('raw', $data, $forUserId); - $known = array('friends'); - foreach ($known as $key) { + + if (isset($data['id']) && isset($data['text']) && isset($data['user'])) { + $this->fireEvent('status', $data); + } + + $knownMeta = array('friends', 'delete', 'scrubgeo', 'limit', 'event', 'direct_message'); + foreach ($knownMeta as $key) { if (isset($data[$key])) { $this->fireEvent($key, $data[$key], $forUserId); } From 76353ede54edd8e99ddb9c209ff408486282c0c5 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 5 Oct 2010 12:42:55 -0700 Subject: [PATCH 03/18] Clean up event handling a bit --- plugins/TwitterBridge/scripts/streamtest.php | 36 ++++++++++++++++++- plugins/TwitterBridge/twitterstreamreader.php | 10 ++++-- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/plugins/TwitterBridge/scripts/streamtest.php b/plugins/TwitterBridge/scripts/streamtest.php index 04d91ef439..96ff33fa52 100644 --- a/plugins/TwitterBridge/scripts/streamtest.php +++ b/plugins/TwitterBridge/scripts/streamtest.php @@ -81,7 +81,41 @@ function homeStreamForUser(User $user) $user = User::staticGet('nickname', $nickname); $stream = homeStreamForUser($user); $stream->hookEvent('raw', function($data) { - var_dump($data); + common_log(LOG_INFO, json_encode($data)); +}); +$stream->hookEvent('friends', function($data) { + printf("Friend list: %s\n", implode(', ', $data)); +}); +$stream->hookEvent('favorite', function($data) { + printf("%s favorited %s's notice: %s\n", + $data['source']['screen_name'], + $data['target']['screen_name'], + $data['target_object']['text']); +}); +$stream->hookEvent('follow', function($data) { + printf("%s friended %s\n", + $data['source']['screen_name'], + $data['target']['screen_name']); +}); +$stream->hookEvent('delete', function($data) { + printf("Deleted status notification: %s\n", + $data['status']['id']); +}); +$stream->hookEvent('scrub_geo', function($data) { + printf("Req to scrub geo data for user id %s up to status ID %s\n", + $data['user_id'], + $data['up_to_status_id']); +}); +$stream->hookEvent('status', function($data) { + printf("Received status update from %s: %s\n", + $data['user']['screen_name'], + $data['text']); +}); +$stream->hookEvent('direct_message', function($data) { + printf("Direct message from %s to %s: %s\n", + $data['sender']['screen_name'], + $data['recipient']['screen_name'], + $data['text']); }); class TwitterManager extends IoManager diff --git a/plugins/TwitterBridge/twitterstreamreader.php b/plugins/TwitterBridge/twitterstreamreader.php index d440bdd4ef..3d6700d70e 100644 --- a/plugins/TwitterBridge/twitterstreamreader.php +++ b/plugins/TwitterBridge/twitterstreamreader.php @@ -104,14 +104,20 @@ abstract class TwitterStreamReader extends JsonStreamReader { $this->fireEvent('raw', $data, $forUserId); - if (isset($data['id']) && isset($data['text']) && isset($data['user'])) { + if (isset($data['text'])) { $this->fireEvent('status', $data); + return; + } + if (isset($data['event'])) { + $this->fireEvent($data['event'], $data); + return; } - $knownMeta = array('friends', 'delete', 'scrubgeo', 'limit', 'event', 'direct_message'); + $knownMeta = array('friends', 'delete', 'scrubgeo', 'limit', 'direct_message'); foreach ($knownMeta as $key) { if (isset($data[$key])) { $this->fireEvent($key, $data[$key], $forUserId); + return; } } } From eb04df583a1a3afb4ca7de5ffd59fee4a6903210 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 5 Oct 2010 13:25:28 -0700 Subject: [PATCH 04/18] Buncha cleanup --- plugins/TwitterBridge/jsonstreamreader.php | 27 ++-- plugins/TwitterBridge/scripts/streamtest.php | 11 ++ plugins/TwitterBridge/twitterstreamreader.php | 139 +++++++++++++++--- 3 files changed, 148 insertions(+), 29 deletions(-) diff --git a/plugins/TwitterBridge/jsonstreamreader.php b/plugins/TwitterBridge/jsonstreamreader.php index 898766357f..0d16b68bd4 100644 --- a/plugins/TwitterBridge/jsonstreamreader.php +++ b/plugins/TwitterBridge/jsonstreamreader.php @@ -202,13 +202,7 @@ abstract class JsonStreamReader $lines = explode(self::CRLF, $buffer); foreach ($lines as $line) { if ($this->state == 'headers') { - if ($line == '') { - $this->state = 'active'; - common_log(LOG_DEBUG, "$this->id connection is active!"); - } else { - common_log(LOG_DEBUG, "$this->id read HTTP header: $line"); - $this->responseHeaders[] = $line; - } + $this->handleLineHeaders($line); } else if ($this->state == 'active') { $this->handleLineActive($line); } @@ -219,15 +213,26 @@ abstract class JsonStreamReader { // One JSON object on each line... // Will we always deliver on packet boundaries? - $lines = explode("\n", $buffer); + $lines = explode(self::CRLF, $buffer); foreach ($lines as $line) { $this->handleLineActive($line); } } - function handleLineActive($line) + function handleLineHeaders($line) { if ($line == '') { + $this->state = 'active'; + common_log(LOG_DEBUG, "$this->id connection is active!"); + } else { + common_log(LOG_DEBUG, "$this->id read HTTP header: $line"); + $this->responseHeaders[] = $line; + } + } + + function handleLineActive($line) + { + if ($line == "") { // Server sends empty lines as keepalive. return; } @@ -235,9 +240,9 @@ abstract class JsonStreamReader if ($data) { $this->handleJson($data); } else { - common_log(LOG_ERR, "$this->id received bogus JSON data: " . $line); + common_log(LOG_ERR, "$this->id received bogus JSON data: " . var_export($line, true)); } } - abstract function handleJson(array $data); + abstract protected function handleJson(array $data); } diff --git a/plugins/TwitterBridge/scripts/streamtest.php b/plugins/TwitterBridge/scripts/streamtest.php index 96ff33fa52..eddec39a7f 100644 --- a/plugins/TwitterBridge/scripts/streamtest.php +++ b/plugins/TwitterBridge/scripts/streamtest.php @@ -92,11 +92,22 @@ $stream->hookEvent('favorite', function($data) { $data['target']['screen_name'], $data['target_object']['text']); }); +$stream->hookEvent('unfavorite', function($data) { + printf("%s unfavorited %s's notice: %s\n", + $data['source']['screen_name'], + $data['target']['screen_name'], + $data['target_object']['text']); +}); $stream->hookEvent('follow', function($data) { printf("%s friended %s\n", $data['source']['screen_name'], $data['target']['screen_name']); }); +$stream->hookEvent('unfollow', function($data) { + printf("%s unfriended %s\n", + $data['source']['screen_name'], + $data['target']['screen_name']); +}); $stream->hookEvent('delete', function($data) { printf("Deleted status notification: %s\n", $data['status']['id']); diff --git a/plugins/TwitterBridge/twitterstreamreader.php b/plugins/TwitterBridge/twitterstreamreader.php index 3d6700d70e..793e239d02 100644 --- a/plugins/TwitterBridge/twitterstreamreader.php +++ b/plugins/TwitterBridge/twitterstreamreader.php @@ -25,7 +25,14 @@ * @link http://status.net/ */ -// A single stream connection +/** + * Base class for reading Twitter's User Streams and Site Streams + * real-time streaming APIs. + * + * Caller can hook event callbacks for various types of messages; + * the data from the stream and some context info will be passed + * on to the callbacks. + */ abstract class TwitterStreamReader extends JsonStreamReader { protected $callbacks = array(); @@ -36,9 +43,9 @@ abstract class TwitterStreamReader extends JsonStreamReader $this->oauth = $auth; } - public function connect($method) + public function connect($method, $params=array()) { - $url = $this->oAuthUrl($this->baseUrl . '/' . $method); + $url = $this->oAuthUrl($this->baseUrl . '/' . $method, $params); return parent::connect($url); } @@ -49,7 +56,7 @@ abstract class TwitterStreamReader extends JsonStreamReader * @param array $params * @return string */ - function oAuthUrl($url, $params=array()) + protected function oAuthUrl($url, $params=array()) { // In an ideal world this would be better encapsulated. :) $request = OAuthRequest::from_consumer_and_token($this->oauth->consumer, @@ -59,9 +66,64 @@ abstract class TwitterStreamReader extends JsonStreamReader return $request->to_url(); } + /** - * Add an event callback. Available event names include - * 'raw' (all data), 'friends', 'delete', 'scrubgeo', etc + * Add an event callback to receive notifications when things come in + * over the wire. + * + * Callbacks should be in the form: function(array $data, array $context) + * where $context may list additional data on some streams, such as the + * user to whom the message should be routed. + * + * Available events: + * + * Messaging: + * + * 'status': $data contains a status update in standard Twitter JSON format. + * $data['user']: sending user in standard Twitter JSON format. + * $data['text']... etc + * + * 'direct_message': $data contains a direct message in standard Twitter JSON format. + * $data['sender']: sending user in standard Twitter JSON format. + * $data['recipient']: receiving user in standard Twitter JSON format. + * $data['text']... etc + * + * + * Out of band events: + * + * 'follow': User has either started following someone, or is being followed. + * $data['source']: following user in standard Twitter JSON format. + * $data['target']: followed user in standard Twitter JSON format. + * + * 'favorite': Someone has favorited a status update. + * $data['source']: user doing the favoriting, in standard Twitter JSON format. + * $data['target']: user whose status was favorited, in standard Twitter JSON format. + * $data['target_object']: the favorited status update in standard Twitter JSON format. + * + * 'unfavorite': Someone has unfavorited a status update. + * $data['source']: user doing the unfavoriting, in standard Twitter JSON format. + * $data['target']: user whose status was unfavorited, in standard Twitter JSON format. + * $data['target_object']: the unfavorited status update in standard Twitter JSON format. + * + * + * Meta information: + * + * 'friends': $data is a list of user IDs of the current user's friends. + * + * 'delete': Advisory that a Twitter status has been deleted; nice clients + * should follow suit. + * $data['id']: ID of status being deleted + * $data['user_id']: ID of its owning user + * + * 'scrub_geo': Advisory that a user is clearing geo data from their status + * stream; nice clients should follow suit. + * $data['user_id']: ID of user + * $data['up_to_status_id']: any notice older than this should be scrubbed. + * + * 'limit': Advisory that tracking has hit a resource limit. + * $data['track'] + * + * 'raw': receives the full JSON data for all message types. * * @param string $event * @param callable $callback @@ -77,7 +139,7 @@ abstract class TwitterStreamReader extends JsonStreamReader * @param string $event * @param mixed $arg1 ... one or more params to pass on */ - public function fireEvent($event, $arg1) + protected function fireEvent($event, $arg1) { if (array_key_exists($event, $this->callbacks)) { $args = array_slice(func_get_args(), 1); @@ -87,42 +149,57 @@ abstract class TwitterStreamReader extends JsonStreamReader } } - function handleJson(array $data) + protected function handleJson(array $data) { $this->routeMessage($data); } - abstract function routeMessage($data); + abstract protected function routeMessage($data); /** * Send the decoded JSON object out to any event listeners. * * @param array $data - * @param int $forUserId + * @param array $context optional additional context data to pass on */ - function handleMessage(array $data, $forUserId=null) + protected function handleMessage(array $data, array $context=array()) { - $this->fireEvent('raw', $data, $forUserId); + $this->fireEvent('raw', $data, $context); if (isset($data['text'])) { - $this->fireEvent('status', $data); + $this->fireEvent('status', $data, $context); return; } if (isset($data['event'])) { - $this->fireEvent($data['event'], $data); + $this->fireEvent($data['event'], $data, $context); return; } $knownMeta = array('friends', 'delete', 'scrubgeo', 'limit', 'direct_message'); foreach ($knownMeta as $key) { if (isset($data[$key])) { - $this->fireEvent($key, $data[$key], $forUserId); + $this->fireEvent($key, $data[$key], $context); return; } } } } +/** + * Multiuser stream listener for Twitter Site Streams API + * http://dev.twitter.com/pages/site_streams + * + * The site streams API allows listening to updates for multiple users. + * Pass in the user IDs to listen to in via followUser() -- note they + * must each have a valid OAuth token for the application ID we're + * connecting as. + * + * You'll need to be connecting with the auth keys for the user who + * owns the application registration. + * + * The user each message is destined for will be passed to event handlers + * in $context['for_user_id']. + */ class TwitterSiteStream extends TwitterStreamReader { protected $userIds; @@ -134,9 +211,21 @@ class TwitterSiteStream extends TwitterStreamReader public function connect($method='2b/site.json') { - return parent::connect($method); + $params = array(); + if ($this->userIds) { + $params['follow'] = implode(',', $this->userIds); + } + return parent::connect($method, $params); } + /** + * Set the users whose home streams should be pulled. + * They all must have valid oauth tokens for this application. + * + * Must be called before connect(). + * + * @param array $userIds + */ function followUsers($userIds) { $this->userIds = $userIds; @@ -150,10 +239,21 @@ class TwitterSiteStream extends TwitterStreamReader */ function routeMessage($data) { - parent::handleMessage($data['message'], $data['for_user']); + $context = array( + 'source' => 'sitestream', + 'for_user' => $data['for_user'] + ); + parent::handleMessage($data['message'], $context); } } +/** + * Stream listener for Twitter User Streams API + * http://dev.twitter.com/pages/user_streams + * + * This will pull the home stream and additional events just for the user + * we've authenticated as. + */ class TwitterUserStream extends TwitterStreamReader { public function __construct(TwitterOAuthClient $auth, $baseUrl='https://userstream.twitter.com') @@ -173,6 +273,9 @@ class TwitterUserStream extends TwitterStreamReader */ function routeMessage($data) { - parent::handleMessage($data, $this->userId); + $context = array( + 'source' => 'userstream' + ); + parent::handleMessage($data, $context); } } From dc6c0f325c20145f7d2b37021d2c76c9a2c07ccb Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 5 Oct 2010 13:41:15 -0700 Subject: [PATCH 05/18] Cleanup on input path --- plugins/TwitterBridge/jsonstreamreader.php | 85 +++++++++++++--------- 1 file changed, 51 insertions(+), 34 deletions(-) diff --git a/plugins/TwitterBridge/jsonstreamreader.php b/plugins/TwitterBridge/jsonstreamreader.php index 0d16b68bd4..427037129c 100644 --- a/plugins/TwitterBridge/jsonstreamreader.php +++ b/plugins/TwitterBridge/jsonstreamreader.php @@ -49,7 +49,9 @@ abstract class JsonStreamReader /** * Starts asynchronous connect operation... * - * @param $url + * @fixme Can we do the open-socket fully async to? (need write select infrastructure) + * + * @param string $url */ public function connect($url) { @@ -114,6 +116,13 @@ abstract class JsonStreamReader return $buffer; } + /** + * Build HTTP request headers. + * + * @param string $host + * @param string $path + * @return string + */ protected function httpOpen($host, $path) { $lines = array( @@ -165,61 +174,69 @@ abstract class JsonStreamReader /** * Take a chunk of input over the horn and go go go! :D + * * @param string $buffer */ - function handleInput($socket) + public function handleInput($socket) { if ($this->socket !== $socket) { throw new Exception('Got input from unexpected socket!'); } - $buffer = $this->read(); + try { + $buffer = $this->read(); + $lines = explode(self::CRLF, $buffer); + foreach ($lines as $line) { + $this->handleLine($line); + } + } catch (Exception $e) { + common_log(LOG_ERR, "$this->id aborting connection due to error: " . $e->getMessage()); + fclose($this->socket); + throw $e; + } + } + + protected function handleLine($line) + { switch ($this->state) { case 'waiting': - $this->handleInputWaiting($buffer); + $this->handleLineWaiting($line); break; case 'headers': - $this->handleInputHeaders($buffer); + $this->handleLineHeaders($line); break; case 'active': - $this->handleInputActive($buffer); + $this->handleLineActive($line); break; default: - throw new Exception('Invalid state in handleInput: ' . $this->state); + throw new Exception('Invalid state in handleLine: ' . $this->state); } } - function handleInputWaiting($buffer) + /** + * + * @param $line + */ + protected function handleLineWaiting($line) { - common_log(LOG_DEBUG, "$this->id Does this happen? " . $buffer); + $bits = explode(' ', $line, 3); + if (count($bits) != 3) { + throw new Exception("Invalid HTTP response line: $line"); + } + + list($http, $status, $text) = $bits; + if (substr($http, 0, 5) != 'HTTP/') { + throw new Exception("Invalid HTTP response line chunk '$http': $line"); + } + if ($status != '200') { + throw new Exception("Bad HTTP response code $status: $line"); + } + common_log(LOG_DEBUG, "$this->id $line"); $this->state = 'headers'; - $this->handleInputHeaders($buffer); } - function handleInputHeaders($buffer) - { - $lines = explode(self::CRLF, $buffer); - foreach ($lines as $line) { - if ($this->state == 'headers') { - $this->handleLineHeaders($line); - } else if ($this->state == 'active') { - $this->handleLineActive($line); - } - } - } - - function handleInputActive($buffer) - { - // One JSON object on each line... - // Will we always deliver on packet boundaries? - $lines = explode(self::CRLF, $buffer); - foreach ($lines as $line) { - $this->handleLineActive($line); - } - } - - function handleLineHeaders($line) + protected function handleLineHeaders($line) { if ($line == '') { $this->state = 'active'; @@ -230,7 +247,7 @@ abstract class JsonStreamReader } } - function handleLineActive($line) + protected function handleLineActive($line) { if ($line == "") { // Server sends empty lines as keepalive. From 0eaa26476cac557484e295666b574340236452ff Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 5 Oct 2010 13:57:32 -0700 Subject: [PATCH 06/18] Split the guts of Twitter status -> notice import from twitterstatusfetcher daemon into TwitterImport class which can be called from other places, letting us reuse code for the streaming API. --- plugins/TwitterBridge/TwitterBridgePlugin.php | 1 + .../daemons/twitterstatusfetcher.php | 591 +--------------- plugins/TwitterBridge/twitterimport.php | 651 ++++++++++++++++++ 3 files changed, 655 insertions(+), 588 deletions(-) create mode 100644 plugins/TwitterBridge/twitterimport.php diff --git a/plugins/TwitterBridge/TwitterBridgePlugin.php b/plugins/TwitterBridge/TwitterBridgePlugin.php index 097d4486f9..128b062c7f 100644 --- a/plugins/TwitterBridge/TwitterBridgePlugin.php +++ b/plugins/TwitterBridge/TwitterBridgePlugin.php @@ -200,6 +200,7 @@ class TwitterBridgePlugin extends Plugin return false; case 'TwitterOAuthClient': case 'TwitterQueueHandler': + case 'TwitterImport': include_once $dir . '/' . strtolower($cls) . '.php'; return false; case 'Notice_to_status': diff --git a/plugins/TwitterBridge/daemons/twitterstatusfetcher.php b/plugins/TwitterBridge/daemons/twitterstatusfetcher.php index cef67b1806..9298d9e3a1 100755 --- a/plugins/TwitterBridge/daemons/twitterstatusfetcher.php +++ b/plugins/TwitterBridge/daemons/twitterstatusfetcher.php @@ -192,25 +192,12 @@ class TwitterStatusFetcher extends ParallelizingDaemon common_debug(LOG_INFO, $this->name() . ' - Retrieved ' . sizeof($timeline) . ' statuses from Twitter.'); + $importer = new TwitterImport(); + // Reverse to preserve order foreach (array_reverse($timeline) as $status) { - // Hacktastic: filter out stuff coming from this StatusNet - $source = mb_strtolower(common_config('integration', 'source')); - - if (preg_match("/$source/", mb_strtolower($status->source))) { - common_debug($this->name() . ' - Skipping import of status ' . - $status->id . ' with source ' . $source); - continue; - } - - // Don't save it if the user is protected - // FIXME: save it but treat it as private - if ($status->user->protected) { - continue; - } - - $notice = $this->saveStatus($status); + $notice = $importer->importStatus($status); if (!empty($notice)) { Inbox::insertNotice($flink->user_id, $notice->id); @@ -226,578 +213,6 @@ class TwitterStatusFetcher extends ParallelizingDaemon $flink->last_noticesync = common_sql_now(); $flink->update(); } - - function saveStatus($status) - { - $profile = $this->ensureProfile($status->user); - - if (empty($profile)) { - common_log(LOG_ERR, $this->name() . - ' - Problem saving notice. No associated Profile.'); - return null; - } - - $statusUri = $this->makeStatusURI($status->user->screen_name, $status->id); - - // check to see if we've already imported the status - $n2s = Notice_to_status::staticGet('status_id', $status->id); - - if (!empty($n2s)) { - common_log( - LOG_INFO, - $this->name() . - " - Ignoring duplicate import: {$status->id}" - ); - return Notice::staticGet('id', $n2s->notice_id); - } - - // If it's a retweet, save it as a repeat! - if (!empty($status->retweeted_status)) { - common_log(LOG_INFO, "Status {$status->id} is a retweet of {$status->retweeted_status->id}."); - $original = $this->saveStatus($status->retweeted_status); - if (empty($original)) { - return null; - } else { - $author = $original->getProfile(); - // TRANS: Message used to repeat a notice. RT is the abbreviation of 'retweet'. - // TRANS: %1$s is the repeated user's name, %2$s is the repeated notice. - $content = sprintf(_m('RT @%1$s %2$s'), - $author->nickname, - $original->content); - - if (Notice::contentTooLong($content)) { - $contentlimit = Notice::maxContent(); - $content = mb_substr($content, 0, $contentlimit - 4) . ' ...'; - } - - $repeat = Notice::saveNew($profile->id, - $content, - 'twitter', - array('repeat_of' => $original->id, - 'uri' => $statusUri, - 'is_local' => Notice::GATEWAY)); - common_log(LOG_INFO, "Saved {$repeat->id} as a repeat of {$original->id}"); - Notice_to_status::saveNew($repeat->id, $status->id); - return $repeat; - } - } - - $notice = new Notice(); - - $notice->profile_id = $profile->id; - $notice->uri = $statusUri; - $notice->url = $statusUri; - $notice->created = strftime( - '%Y-%m-%d %H:%M:%S', - strtotime($status->created_at) - ); - - $notice->source = 'twitter'; - - $notice->reply_to = null; - - if (!empty($status->in_reply_to_status_id)) { - common_log(LOG_INFO, "Status {$status->id} is a reply to status {$status->in_reply_to_status_id}"); - $n2s = Notice_to_status::staticGet('status_id', $status->in_reply_to_status_id); - if (empty($n2s)) { - common_log(LOG_INFO, "Couldn't find local notice for status {$status->in_reply_to_status_id}"); - } else { - $reply = Notice::staticGet('id', $n2s->notice_id); - if (empty($reply)) { - common_log(LOG_INFO, "Couldn't find local notice for status {$status->in_reply_to_status_id}"); - } else { - common_log(LOG_INFO, "Found local notice {$reply->id} for status {$status->in_reply_to_status_id}"); - $notice->reply_to = $reply->id; - $notice->conversation = $reply->conversation; - } - } - } - - if (empty($notice->conversation)) { - $conv = Conversation::create(); - $notice->conversation = $conv->id; - common_log(LOG_INFO, "No known conversation for status {$status->id} so making a new one {$conv->id}."); - } - - $notice->is_local = Notice::GATEWAY; - - $notice->content = html_entity_decode($status->text, ENT_QUOTES, 'UTF-8'); - $notice->rendered = $this->linkify($status); - - if (Event::handle('StartNoticeSave', array(&$notice))) { - - $id = $notice->insert(); - - if (!$id) { - common_log_db_error($notice, 'INSERT', __FILE__); - common_log(LOG_ERR, $this->name() . - ' - Problem saving notice.'); - } - - Event::handle('EndNoticeSave', array($notice)); - } - - Notice_to_status::saveNew($notice->id, $status->id); - - $this->saveStatusMentions($notice, $status); - - $notice->blowOnInsert(); - - return $notice; - } - - /** - * Make an URI for a status. - * - * @param object $status status object - * - * @return string URI - */ - function makeStatusURI($username, $id) - { - return 'http://twitter.com/' - . $username - . '/status/' - . $id; - } - - /** - * Look up a Profile by profileurl field. Profile::staticGet() was - * not working consistently. - * - * @param string $nickname local nickname of the Twitter user - * @param string $profileurl the profile url - * - * @return mixed value the first Profile with that url, or null - */ - function getProfileByUrl($nickname, $profileurl) - { - $profile = new Profile(); - $profile->nickname = $nickname; - $profile->profileurl = $profileurl; - $profile->limit(1); - - if ($profile->find()) { - $profile->fetch(); - return $profile; - } - - return null; - } - - /** - * Check to see if this Twitter status has already been imported - * - * @param Profile $profile Twitter user's local profile - * @param string $statusUri URI of the status on Twitter - * - * @return mixed value a matching Notice or null - */ - function checkDupe($profile, $statusUri) - { - $notice = new Notice(); - $notice->uri = $statusUri; - $notice->profile_id = $profile->id; - $notice->limit(1); - - if ($notice->find()) { - $notice->fetch(); - return $notice; - } - - return null; - } - - function ensureProfile($user) - { - // check to see if there's already a profile for this user - $profileurl = 'http://twitter.com/' . $user->screen_name; - $profile = $this->getProfileByUrl($user->screen_name, $profileurl); - - if (!empty($profile)) { - common_debug($this->name() . - " - Profile for $profile->nickname found."); - - // Check to see if the user's Avatar has changed - - $this->checkAvatar($user, $profile); - return $profile; - - } else { - common_debug($this->name() . ' - Adding profile and remote profile ' . - "for Twitter user: $profileurl."); - - $profile = new Profile(); - $profile->query("BEGIN"); - - $profile->nickname = $user->screen_name; - $profile->fullname = $user->name; - $profile->homepage = $user->url; - $profile->bio = $user->description; - $profile->location = $user->location; - $profile->profileurl = $profileurl; - $profile->created = common_sql_now(); - - try { - $id = $profile->insert(); - } catch(Exception $e) { - common_log(LOG_WARNING, $this->name . ' Couldn\'t insert profile - ' . $e->getMessage()); - } - - if (empty($id)) { - common_log_db_error($profile, 'INSERT', __FILE__); - $profile->query("ROLLBACK"); - return false; - } - - // check for remote profile - - $remote_pro = Remote_profile::staticGet('uri', $profileurl); - - if (empty($remote_pro)) { - $remote_pro = new Remote_profile(); - - $remote_pro->id = $id; - $remote_pro->uri = $profileurl; - $remote_pro->created = common_sql_now(); - - try { - $rid = $remote_pro->insert(); - } catch (Exception $e) { - common_log(LOG_WARNING, $this->name() . ' Couldn\'t save remote profile - ' . $e->getMessage()); - } - - if (empty($rid)) { - common_log_db_error($profile, 'INSERT', __FILE__); - $profile->query("ROLLBACK"); - return false; - } - } - - $profile->query("COMMIT"); - - $this->saveAvatars($user, $id); - - return $profile; - } - } - - function checkAvatar($twitter_user, $profile) - { - global $config; - - $path_parts = pathinfo($twitter_user->profile_image_url); - - $newname = 'Twitter_' . $twitter_user->id . '_' . - $path_parts['basename']; - - $oldname = $profile->getAvatar(48)->filename; - - if ($newname != $oldname) { - common_debug($this->name() . ' - Avatar for Twitter user ' . - "$profile->nickname has changed."); - common_debug($this->name() . " - old: $oldname new: $newname"); - - $this->updateAvatars($twitter_user, $profile); - } - - if ($this->missingAvatarFile($profile)) { - common_debug($this->name() . ' - Twitter user ' . - $profile->nickname . - ' is missing one or more local avatars.'); - common_debug($this->name() ." - old: $oldname new: $newname"); - - $this->updateAvatars($twitter_user, $profile); - } - } - - function updateAvatars($twitter_user, $profile) { - - global $config; - - $path_parts = pathinfo($twitter_user->profile_image_url); - - $img_root = substr($path_parts['basename'], 0, -11); - $ext = $path_parts['extension']; - $mediatype = $this->getMediatype($ext); - - foreach (array('mini', 'normal', 'bigger') as $size) { - $url = $path_parts['dirname'] . '/' . - $img_root . '_' . $size . ".$ext"; - $filename = 'Twitter_' . $twitter_user->id . '_' . - $img_root . "_$size.$ext"; - - $this->updateAvatar($profile->id, $size, $mediatype, $filename); - $this->fetchAvatar($url, $filename); - } - } - - function missingAvatarFile($profile) { - foreach (array(24, 48, 73) as $size) { - $filename = $profile->getAvatar($size)->filename; - $avatarpath = Avatar::path($filename); - if (file_exists($avatarpath) == FALSE) { - return true; - } - } - return false; - } - - function getMediatype($ext) - { - $mediatype = null; - - switch (strtolower($ext)) { - case 'jpg': - $mediatype = 'image/jpg'; - break; - case 'gif': - $mediatype = 'image/gif'; - break; - default: - $mediatype = 'image/png'; - } - - return $mediatype; - } - - function saveAvatars($user, $id) - { - global $config; - - $path_parts = pathinfo($user->profile_image_url); - $ext = $path_parts['extension']; - $end = strlen('_normal' . $ext); - $img_root = substr($path_parts['basename'], 0, -($end+1)); - $mediatype = $this->getMediatype($ext); - - foreach (array('mini', 'normal', 'bigger') as $size) { - $url = $path_parts['dirname'] . '/' . - $img_root . '_' . $size . ".$ext"; - $filename = 'Twitter_' . $user->id . '_' . - $img_root . "_$size.$ext"; - - if ($this->fetchAvatar($url, $filename)) { - $this->newAvatar($id, $size, $mediatype, $filename); - } else { - common_log(LOG_WARNING, $id() . - " - Problem fetching Avatar: $url"); - } - } - } - - function updateAvatar($profile_id, $size, $mediatype, $filename) { - - common_debug($this->name() . " - Updating avatar: $size"); - - $profile = Profile::staticGet($profile_id); - - if (empty($profile)) { - common_debug($this->name() . " - Couldn't get profile: $profile_id!"); - return; - } - - $sizes = array('mini' => 24, 'normal' => 48, 'bigger' => 73); - $avatar = $profile->getAvatar($sizes[$size]); - - // Delete the avatar, if present - if ($avatar) { - $avatar->delete(); - } - - $this->newAvatar($profile->id, $size, $mediatype, $filename); - } - - function newAvatar($profile_id, $size, $mediatype, $filename) - { - global $config; - - $avatar = new Avatar(); - $avatar->profile_id = $profile_id; - - switch($size) { - case 'mini': - $avatar->width = 24; - $avatar->height = 24; - break; - case 'normal': - $avatar->width = 48; - $avatar->height = 48; - break; - default: - // Note: Twitter's big avatars are a different size than - // StatusNet's (StatusNet's = 96) - $avatar->width = 73; - $avatar->height = 73; - } - - $avatar->original = 0; // we don't have the original - $avatar->mediatype = $mediatype; - $avatar->filename = $filename; - $avatar->url = Avatar::url($filename); - - $avatar->created = common_sql_now(); - - try { - $id = $avatar->insert(); - } catch (Exception $e) { - common_log(LOG_WARNING, $this->name() . ' Couldn\'t insert avatar - ' . $e->getMessage()); - } - - if (empty($id)) { - common_log_db_error($avatar, 'INSERT', __FILE__); - return null; - } - - common_debug($this->name() . - " - Saved new $size avatar for $profile_id."); - - return $id; - } - - /** - * Fetch a remote avatar image and save to local storage. - * - * @param string $url avatar source URL - * @param string $filename bare local filename for download - * @return bool true on success, false on failure - */ - function fetchAvatar($url, $filename) - { - common_debug($this->name() . " - Fetching Twitter avatar: $url"); - - $request = HTTPClient::start(); - $response = $request->get($url); - if ($response->isOk()) { - $avatarfile = Avatar::path($filename); - $ok = file_put_contents($avatarfile, $response->getBody()); - if (!$ok) { - common_log(LOG_WARNING, $this->name() . - " - Couldn't open file $filename"); - return false; - } - } else { - return false; - } - - return true; - } - - const URL = 1; - const HASHTAG = 2; - const MENTION = 3; - - function linkify($status) - { - $text = $status->text; - - if (empty($status->entities)) { - common_log(LOG_WARNING, "No entities data for {$status->id}; trying to fake up links ourselves."); - $text = common_replace_urls_callback($text, 'common_linkify'); - $text = preg_replace('/(^|\"\;|\'|\(|\[|\{|\s+)#([\pL\pN_\-\.]{1,64})/e', "'\\1#'.TwitterStatusFetcher::tagLink('\\2')", $text); - $text = preg_replace('/(^|\s+)@([a-z0-9A-Z_]{1,64})/e', "'\\1@'.TwitterStatusFetcher::atLink('\\2')", $text); - return $text; - } - - // Move all the entities into order so we can - // replace them in reverse order and thus - // not mess up their indices - - $toReplace = array(); - - if (!empty($status->entities->urls)) { - foreach ($status->entities->urls as $url) { - $toReplace[$url->indices[0]] = array(self::URL, $url); - } - } - - if (!empty($status->entities->hashtags)) { - foreach ($status->entities->hashtags as $hashtag) { - $toReplace[$hashtag->indices[0]] = array(self::HASHTAG, $hashtag); - } - } - - if (!empty($status->entities->user_mentions)) { - foreach ($status->entities->user_mentions as $mention) { - $toReplace[$mention->indices[0]] = array(self::MENTION, $mention); - } - } - - // sort in reverse order by key - - krsort($toReplace); - - foreach ($toReplace as $part) { - list($type, $object) = $part; - switch($type) { - case self::URL: - $linkText = $this->makeUrlLink($object); - break; - case self::HASHTAG: - $linkText = $this->makeHashtagLink($object); - break; - case self::MENTION: - $linkText = $this->makeMentionLink($object); - break; - default: - continue; - } - $text = mb_substr($text, 0, $object->indices[0]) . $linkText . mb_substr($text, $object->indices[1]); - } - return $text; - } - - function makeUrlLink($object) - { - return "{$object->url}"; - } - - function makeHashtagLink($object) - { - return "#" . self::tagLink($object->text); - } - - function makeMentionLink($object) - { - return "@".self::atLink($object->screen_name, $object->name); - } - - static function tagLink($tag) - { - return "{$tag}"; - } - - static function atLink($screenName, $fullName=null) - { - if (!empty($fullName)) { - return "{$screenName}"; - } else { - return "{$screenName}"; - } - } - - function saveStatusMentions($notice, $status) - { - $mentions = array(); - - if (empty($status->entities) || empty($status->entities->user_mentions)) { - return; - } - - foreach ($status->entities->user_mentions as $mention) { - $flink = Foreign_link::getByForeignID($mention->id, TWITTER_SERVICE); - if (!empty($flink)) { - $user = User::staticGet('id', $flink->user_id); - if (!empty($user)) { - $reply = new Reply(); - $reply->notice_id = $notice->id; - $reply->profile_id = $user->id; - common_log(LOG_INFO, __METHOD__ . ": saving reply: notice {$notice->id} to profile {$user->id}"); - $id = $reply->insert(); - } - } - } - } } $id = null; diff --git a/plugins/TwitterBridge/twitterimport.php b/plugins/TwitterBridge/twitterimport.php new file mode 100644 index 0000000000..1b2d395304 --- /dev/null +++ b/plugins/TwitterBridge/twitterimport.php @@ -0,0 +1,651 @@ +. + * + * @category Plugin + * @package StatusNet + * @author Zach Copley + * @author Julien C + * @author Brion Vibber + * @copyright 2009-2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +require_once INSTALLDIR . '/plugins/TwitterBridge/twitter.php'; + +/** + * Encapsulation of the Twitter status -> notice incoming bridge import. + * Is used by both the polling twitterstatusfetcher.php daemon, and the + * in-progress streaming import. + * + * @category Plugin + * @package StatusNet + * @author Zach Copley + * @author Julien C + * @author Brion Vibber + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + * @link http://twitter.com/ + */ +class TwitterImport +{ + public function importStatus($status) + { + // Hacktastic: filter out stuff coming from this StatusNet + $source = mb_strtolower(common_config('integration', 'source')); + + if (preg_match("/$source/", mb_strtolower($status->source))) { + common_debug($this->name() . ' - Skipping import of status ' . + $status->id . ' with source ' . $source); + continue; + } + + // Don't save it if the user is protected + // FIXME: save it but treat it as private + if ($status->user->protected) { + continue; + } + + $notice = $this->saveStatus($status); + + return $notice; + } + + function name() + { + return get_class($this); + } + + function saveStatus($status) + { + $profile = $this->ensureProfile($status->user); + + if (empty($profile)) { + common_log(LOG_ERR, $this->name() . + ' - Problem saving notice. No associated Profile.'); + return null; + } + + $statusUri = $this->makeStatusURI($status->user->screen_name, $status->id); + + // check to see if we've already imported the status + $n2s = Notice_to_status::staticGet('status_id', $status->id); + + if (!empty($n2s)) { + common_log( + LOG_INFO, + $this->name() . + " - Ignoring duplicate import: {$status->id}" + ); + return Notice::staticGet('id', $n2s->notice_id); + } + + // If it's a retweet, save it as a repeat! + if (!empty($status->retweeted_status)) { + common_log(LOG_INFO, "Status {$status->id} is a retweet of {$status->retweeted_status->id}."); + $original = $this->saveStatus($status->retweeted_status); + if (empty($original)) { + return null; + } else { + $author = $original->getProfile(); + // TRANS: Message used to repeat a notice. RT is the abbreviation of 'retweet'. + // TRANS: %1$s is the repeated user's name, %2$s is the repeated notice. + $content = sprintf(_m('RT @%1$s %2$s'), + $author->nickname, + $original->content); + + if (Notice::contentTooLong($content)) { + $contentlimit = Notice::maxContent(); + $content = mb_substr($content, 0, $contentlimit - 4) . ' ...'; + } + + $repeat = Notice::saveNew($profile->id, + $content, + 'twitter', + array('repeat_of' => $original->id, + 'uri' => $statusUri, + 'is_local' => Notice::GATEWAY)); + common_log(LOG_INFO, "Saved {$repeat->id} as a repeat of {$original->id}"); + Notice_to_status::saveNew($repeat->id, $status->id); + return $repeat; + } + } + + $notice = new Notice(); + + $notice->profile_id = $profile->id; + $notice->uri = $statusUri; + $notice->url = $statusUri; + $notice->created = strftime( + '%Y-%m-%d %H:%M:%S', + strtotime($status->created_at) + ); + + $notice->source = 'twitter'; + + $notice->reply_to = null; + + if (!empty($status->in_reply_to_status_id)) { + common_log(LOG_INFO, "Status {$status->id} is a reply to status {$status->in_reply_to_status_id}"); + $n2s = Notice_to_status::staticGet('status_id', $status->in_reply_to_status_id); + if (empty($n2s)) { + common_log(LOG_INFO, "Couldn't find local notice for status {$status->in_reply_to_status_id}"); + } else { + $reply = Notice::staticGet('id', $n2s->notice_id); + if (empty($reply)) { + common_log(LOG_INFO, "Couldn't find local notice for status {$status->in_reply_to_status_id}"); + } else { + common_log(LOG_INFO, "Found local notice {$reply->id} for status {$status->in_reply_to_status_id}"); + $notice->reply_to = $reply->id; + $notice->conversation = $reply->conversation; + } + } + } + + if (empty($notice->conversation)) { + $conv = Conversation::create(); + $notice->conversation = $conv->id; + common_log(LOG_INFO, "No known conversation for status {$status->id} so making a new one {$conv->id}."); + } + + $notice->is_local = Notice::GATEWAY; + + $notice->content = html_entity_decode($status->text, ENT_QUOTES, 'UTF-8'); + $notice->rendered = $this->linkify($status); + + if (Event::handle('StartNoticeSave', array(&$notice))) { + + $id = $notice->insert(); + + if (!$id) { + common_log_db_error($notice, 'INSERT', __FILE__); + common_log(LOG_ERR, $this->name() . + ' - Problem saving notice.'); + } + + Event::handle('EndNoticeSave', array($notice)); + } + + Notice_to_status::saveNew($notice->id, $status->id); + + $this->saveStatusMentions($notice, $status); + + $notice->blowOnInsert(); + + return $notice; + } + + /** + * Make an URI for a status. + * + * @param object $status status object + * + * @return string URI + */ + function makeStatusURI($username, $id) + { + return 'http://twitter.com/' + . $username + . '/status/' + . $id; + } + + + /** + * Look up a Profile by profileurl field. Profile::staticGet() was + * not working consistently. + * + * @param string $nickname local nickname of the Twitter user + * @param string $profileurl the profile url + * + * @return mixed value the first Profile with that url, or null + */ + function getProfileByUrl($nickname, $profileurl) + { + $profile = new Profile(); + $profile->nickname = $nickname; + $profile->profileurl = $profileurl; + $profile->limit(1); + + if ($profile->find()) { + $profile->fetch(); + return $profile; + } + + return null; + } + + /** + * Check to see if this Twitter status has already been imported + * + * @param Profile $profile Twitter user's local profile + * @param string $statusUri URI of the status on Twitter + * + * @return mixed value a matching Notice or null + */ + function checkDupe($profile, $statusUri) + { + $notice = new Notice(); + $notice->uri = $statusUri; + $notice->profile_id = $profile->id; + $notice->limit(1); + + if ($notice->find()) { + $notice->fetch(); + return $notice; + } + + return null; + } + + function ensureProfile($user) + { + // check to see if there's already a profile for this user + $profileurl = 'http://twitter.com/' . $user->screen_name; + $profile = $this->getProfileByUrl($user->screen_name, $profileurl); + + if (!empty($profile)) { + common_debug($this->name() . + " - Profile for $profile->nickname found."); + + // Check to see if the user's Avatar has changed + + $this->checkAvatar($user, $profile); + return $profile; + + } else { + common_debug($this->name() . ' - Adding profile and remote profile ' . + "for Twitter user: $profileurl."); + + $profile = new Profile(); + $profile->query("BEGIN"); + + $profile->nickname = $user->screen_name; + $profile->fullname = $user->name; + $profile->homepage = $user->url; + $profile->bio = $user->description; + $profile->location = $user->location; + $profile->profileurl = $profileurl; + $profile->created = common_sql_now(); + + try { + $id = $profile->insert(); + } catch(Exception $e) { + common_log(LOG_WARNING, $this->name() . ' Couldn\'t insert profile - ' . $e->getMessage()); + } + + if (empty($id)) { + common_log_db_error($profile, 'INSERT', __FILE__); + $profile->query("ROLLBACK"); + return false; + } + + // check for remote profile + + $remote_pro = Remote_profile::staticGet('uri', $profileurl); + + if (empty($remote_pro)) { + $remote_pro = new Remote_profile(); + + $remote_pro->id = $id; + $remote_pro->uri = $profileurl; + $remote_pro->created = common_sql_now(); + + try { + $rid = $remote_pro->insert(); + } catch (Exception $e) { + common_log(LOG_WARNING, $this->name() . ' Couldn\'t save remote profile - ' . $e->getMessage()); + } + + if (empty($rid)) { + common_log_db_error($profile, 'INSERT', __FILE__); + $profile->query("ROLLBACK"); + return false; + } + } + + $profile->query("COMMIT"); + + $this->saveAvatars($user, $id); + + return $profile; + } + } + + function checkAvatar($twitter_user, $profile) + { + global $config; + + $path_parts = pathinfo($twitter_user->profile_image_url); + + $newname = 'Twitter_' . $twitter_user->id . '_' . + $path_parts['basename']; + + $oldname = $profile->getAvatar(48)->filename; + + if ($newname != $oldname) { + common_debug($this->name() . ' - Avatar for Twitter user ' . + "$profile->nickname has changed."); + common_debug($this->name() . " - old: $oldname new: $newname"); + + $this->updateAvatars($twitter_user, $profile); + } + + if ($this->missingAvatarFile($profile)) { + common_debug($this->name() . ' - Twitter user ' . + $profile->nickname . + ' is missing one or more local avatars.'); + common_debug($this->name() ." - old: $oldname new: $newname"); + + $this->updateAvatars($twitter_user, $profile); + } + } + + function updateAvatars($twitter_user, $profile) { + + global $config; + + $path_parts = pathinfo($twitter_user->profile_image_url); + + $img_root = substr($path_parts['basename'], 0, -11); + $ext = $path_parts['extension']; + $mediatype = $this->getMediatype($ext); + + foreach (array('mini', 'normal', 'bigger') as $size) { + $url = $path_parts['dirname'] . '/' . + $img_root . '_' . $size . ".$ext"; + $filename = 'Twitter_' . $twitter_user->id . '_' . + $img_root . "_$size.$ext"; + + $this->updateAvatar($profile->id, $size, $mediatype, $filename); + $this->fetchAvatar($url, $filename); + } + } + + function missingAvatarFile($profile) { + foreach (array(24, 48, 73) as $size) { + $filename = $profile->getAvatar($size)->filename; + $avatarpath = Avatar::path($filename); + if (file_exists($avatarpath) == FALSE) { + return true; + } + } + return false; + } + + function getMediatype($ext) + { + $mediatype = null; + + switch (strtolower($ext)) { + case 'jpg': + $mediatype = 'image/jpg'; + break; + case 'gif': + $mediatype = 'image/gif'; + break; + default: + $mediatype = 'image/png'; + } + + return $mediatype; + } + + function saveAvatars($user, $id) + { + global $config; + + $path_parts = pathinfo($user->profile_image_url); + $ext = $path_parts['extension']; + $end = strlen('_normal' . $ext); + $img_root = substr($path_parts['basename'], 0, -($end+1)); + $mediatype = $this->getMediatype($ext); + + foreach (array('mini', 'normal', 'bigger') as $size) { + $url = $path_parts['dirname'] . '/' . + $img_root . '_' . $size . ".$ext"; + $filename = 'Twitter_' . $user->id . '_' . + $img_root . "_$size.$ext"; + + if ($this->fetchAvatar($url, $filename)) { + $this->newAvatar($id, $size, $mediatype, $filename); + } else { + common_log(LOG_WARNING, $id() . + " - Problem fetching Avatar: $url"); + } + } + } + + function updateAvatar($profile_id, $size, $mediatype, $filename) { + + common_debug($this->name() . " - Updating avatar: $size"); + + $profile = Profile::staticGet($profile_id); + + if (empty($profile)) { + common_debug($this->name() . " - Couldn't get profile: $profile_id!"); + return; + } + + $sizes = array('mini' => 24, 'normal' => 48, 'bigger' => 73); + $avatar = $profile->getAvatar($sizes[$size]); + + // Delete the avatar, if present + if ($avatar) { + $avatar->delete(); + } + + $this->newAvatar($profile->id, $size, $mediatype, $filename); + } + + function newAvatar($profile_id, $size, $mediatype, $filename) + { + global $config; + + $avatar = new Avatar(); + $avatar->profile_id = $profile_id; + + switch($size) { + case 'mini': + $avatar->width = 24; + $avatar->height = 24; + break; + case 'normal': + $avatar->width = 48; + $avatar->height = 48; + break; + default: + // Note: Twitter's big avatars are a different size than + // StatusNet's (StatusNet's = 96) + $avatar->width = 73; + $avatar->height = 73; + } + + $avatar->original = 0; // we don't have the original + $avatar->mediatype = $mediatype; + $avatar->filename = $filename; + $avatar->url = Avatar::url($filename); + + $avatar->created = common_sql_now(); + + try { + $id = $avatar->insert(); + } catch (Exception $e) { + common_log(LOG_WARNING, $this->name() . ' Couldn\'t insert avatar - ' . $e->getMessage()); + } + + if (empty($id)) { + common_log_db_error($avatar, 'INSERT', __FILE__); + return null; + } + + common_debug($this->name() . + " - Saved new $size avatar for $profile_id."); + + return $id; + } + + /** + * Fetch a remote avatar image and save to local storage. + * + * @param string $url avatar source URL + * @param string $filename bare local filename for download + * @return bool true on success, false on failure + */ + function fetchAvatar($url, $filename) + { + common_debug($this->name() . " - Fetching Twitter avatar: $url"); + + $request = HTTPClient::start(); + $response = $request->get($url); + if ($response->isOk()) { + $avatarfile = Avatar::path($filename); + $ok = file_put_contents($avatarfile, $response->getBody()); + if (!$ok) { + common_log(LOG_WARNING, $this->name() . + " - Couldn't open file $filename"); + return false; + } + } else { + return false; + } + + return true; + } + + const URL = 1; + const HASHTAG = 2; + const MENTION = 3; + + function linkify($status) + { + $text = $status->text; + + if (empty($status->entities)) { + common_log(LOG_WARNING, "No entities data for {$status->id}; trying to fake up links ourselves."); + $text = common_replace_urls_callback($text, 'common_linkify'); + $text = preg_replace('/(^|\"\;|\'|\(|\[|\{|\s+)#([\pL\pN_\-\.]{1,64})/e', "'\\1#'.TwitterStatusFetcher::tagLink('\\2')", $text); + $text = preg_replace('/(^|\s+)@([a-z0-9A-Z_]{1,64})/e', "'\\1@'.TwitterStatusFetcher::atLink('\\2')", $text); + return $text; + } + + // Move all the entities into order so we can + // replace them in reverse order and thus + // not mess up their indices + + $toReplace = array(); + + if (!empty($status->entities->urls)) { + foreach ($status->entities->urls as $url) { + $toReplace[$url->indices[0]] = array(self::URL, $url); + } + } + + if (!empty($status->entities->hashtags)) { + foreach ($status->entities->hashtags as $hashtag) { + $toReplace[$hashtag->indices[0]] = array(self::HASHTAG, $hashtag); + } + } + + if (!empty($status->entities->user_mentions)) { + foreach ($status->entities->user_mentions as $mention) { + $toReplace[$mention->indices[0]] = array(self::MENTION, $mention); + } + } + + // sort in reverse order by key + + krsort($toReplace); + + foreach ($toReplace as $part) { + list($type, $object) = $part; + switch($type) { + case self::URL: + $linkText = $this->makeUrlLink($object); + break; + case self::HASHTAG: + $linkText = $this->makeHashtagLink($object); + break; + case self::MENTION: + $linkText = $this->makeMentionLink($object); + break; + default: + continue; + } + $text = mb_substr($text, 0, $object->indices[0]) . $linkText . mb_substr($text, $object->indices[1]); + } + return $text; + } + + function makeUrlLink($object) + { + return "{$object->url}"; + } + + function makeHashtagLink($object) + { + return "#" . self::tagLink($object->text); + } + + function makeMentionLink($object) + { + return "@".self::atLink($object->screen_name, $object->name); + } + + static function tagLink($tag) + { + return "{$tag}"; + } + + static function atLink($screenName, $fullName=null) + { + if (!empty($fullName)) { + return "{$screenName}"; + } else { + return "{$screenName}"; + } + } + + function saveStatusMentions($notice, $status) + { + $mentions = array(); + + if (empty($status->entities) || empty($status->entities->user_mentions)) { + return; + } + + foreach ($status->entities->user_mentions as $mention) { + $flink = Foreign_link::getByForeignID($mention->id, TWITTER_SERVICE); + if (!empty($flink)) { + $user = User::staticGet('id', $flink->user_id); + if (!empty($user)) { + $reply = new Reply(); + $reply->notice_id = $notice->id; + $reply->profile_id = $user->id; + common_log(LOG_INFO, __METHOD__ . ": saving reply: notice {$notice->id} to profile {$user->id}"); + $id = $reply->insert(); + } + } + } + } +} \ No newline at end of file From 408483f7718a2c81c37eb5f387130381a4a188c8 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 5 Oct 2010 14:26:11 -0700 Subject: [PATCH 07/18] Fix up Twitter JSON formatting to be consistent between the polling and streaming API interfaces; basic stream tester can now import your notices (ooooh) --- plugins/TwitterBridge/jsonstreamreader.php | 4 +- plugins/TwitterBridge/scripts/streamtest.php | 59 ++++++++++------ plugins/TwitterBridge/twitterstreamreader.php | 70 ++++++++++--------- 3 files changed, 78 insertions(+), 55 deletions(-) diff --git a/plugins/TwitterBridge/jsonstreamreader.php b/plugins/TwitterBridge/jsonstreamreader.php index 427037129c..f6572c9eef 100644 --- a/plugins/TwitterBridge/jsonstreamreader.php +++ b/plugins/TwitterBridge/jsonstreamreader.php @@ -253,7 +253,7 @@ abstract class JsonStreamReader // Server sends empty lines as keepalive. return; } - $data = json_decode($line, true); + $data = json_decode($line); if ($data) { $this->handleJson($data); } else { @@ -261,5 +261,5 @@ abstract class JsonStreamReader } } - abstract protected function handleJson(array $data); + abstract protected function handleJson(stdClass $data); } diff --git a/plugins/TwitterBridge/scripts/streamtest.php b/plugins/TwitterBridge/scripts/streamtest.php index eddec39a7f..1cec451c8c 100644 --- a/plugins/TwitterBridge/scripts/streamtest.php +++ b/plugins/TwitterBridge/scripts/streamtest.php @@ -28,11 +28,14 @@ define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../..')); $shortoptions = 'n:'; -$longoptions = array('nick='); +$longoptions = array('nick=','import'); $helptext = << + -n --nick Local user whose Twitter timeline to watch + --import Experimental: run incoming messages through import + Attempts a User Stream connection to Twitter as the given user, dumping data as it comes. @@ -79,54 +82,70 @@ function homeStreamForUser(User $user) } $user = User::staticGet('nickname', $nickname); +global $myuser; +$myuser = $user; + $stream = homeStreamForUser($user); $stream->hookEvent('raw', function($data) { common_log(LOG_INFO, json_encode($data)); }); $stream->hookEvent('friends', function($data) { - printf("Friend list: %s\n", implode(', ', $data)); + printf("Friend list: %s\n", implode(', ', $data->friends)); }); $stream->hookEvent('favorite', function($data) { printf("%s favorited %s's notice: %s\n", - $data['source']['screen_name'], - $data['target']['screen_name'], - $data['target_object']['text']); + $data->source->screen_name, + $data->target->screen_name, + $data->target_object->text); }); $stream->hookEvent('unfavorite', function($data) { printf("%s unfavorited %s's notice: %s\n", - $data['source']['screen_name'], - $data['target']['screen_name'], - $data['target_object']['text']); + $data->source->screen_name, + $data->target->screen_name, + $data->target_object->text); }); $stream->hookEvent('follow', function($data) { printf("%s friended %s\n", - $data['source']['screen_name'], - $data['target']['screen_name']); + $data->source->screen_name, + $data->target->screen_name); }); $stream->hookEvent('unfollow', function($data) { printf("%s unfriended %s\n", - $data['source']['screen_name'], - $data['target']['screen_name']); + $data->source->screen_name, + $data->target->screen_name); }); $stream->hookEvent('delete', function($data) { printf("Deleted status notification: %s\n", - $data['status']['id']); + $data->status->id); }); $stream->hookEvent('scrub_geo', function($data) { printf("Req to scrub geo data for user id %s up to status ID %s\n", - $data['user_id'], - $data['up_to_status_id']); + $data->user_id, + $data->up_to_status_id); }); $stream->hookEvent('status', function($data) { printf("Received status update from %s: %s\n", - $data['user']['screen_name'], - $data['text']); + $data->user->screen_name, + $data->text); + + if (have_option('import')) { + $importer = new TwitterImport(); + printf("\timporting..."); + $notice = $importer->importStatus($data); + if ($notice) { + global $myuser; + Inbox::insertNotice($myuser->id, $notice->id); + printf(" %s\n", $notice->id); + } else { + printf(" FAIL\n"); + } + } }); $stream->hookEvent('direct_message', function($data) { printf("Direct message from %s to %s: %s\n", - $data['sender']['screen_name'], - $data['recipient']['screen_name'], - $data['text']); + $data->sender->screen_name, + $data->recipient->screen_name, + $data->text); }); class TwitterManager extends IoManager diff --git a/plugins/TwitterBridge/twitterstreamreader.php b/plugins/TwitterBridge/twitterstreamreader.php index 793e239d02..3a3c8f5e66 100644 --- a/plugins/TwitterBridge/twitterstreamreader.php +++ b/plugins/TwitterBridge/twitterstreamreader.php @@ -71,7 +71,7 @@ abstract class TwitterStreamReader extends JsonStreamReader * Add an event callback to receive notifications when things come in * over the wire. * - * Callbacks should be in the form: function(array $data, array $context) + * Callbacks should be in the form: function(object $data, array $context) * where $context may list additional data on some streams, such as the * user to whom the message should be routed. * @@ -80,48 +80,49 @@ abstract class TwitterStreamReader extends JsonStreamReader * Messaging: * * 'status': $data contains a status update in standard Twitter JSON format. - * $data['user']: sending user in standard Twitter JSON format. - * $data['text']... etc + * $data->user: sending user in standard Twitter JSON format. + * $data->text... etc * * 'direct_message': $data contains a direct message in standard Twitter JSON format. - * $data['sender']: sending user in standard Twitter JSON format. - * $data['recipient']: receiving user in standard Twitter JSON format. - * $data['text']... etc + * $data->sender: sending user in standard Twitter JSON format. + * $data->recipient: receiving user in standard Twitter JSON format. + * $data->text... etc * * * Out of band events: * * 'follow': User has either started following someone, or is being followed. - * $data['source']: following user in standard Twitter JSON format. - * $data['target']: followed user in standard Twitter JSON format. + * $data->source: following user in standard Twitter JSON format. + * $data->target: followed user in standard Twitter JSON format. * * 'favorite': Someone has favorited a status update. - * $data['source']: user doing the favoriting, in standard Twitter JSON format. - * $data['target']: user whose status was favorited, in standard Twitter JSON format. - * $data['target_object']: the favorited status update in standard Twitter JSON format. + * $data->source: user doing the favoriting, in standard Twitter JSON format. + * $data->target: user whose status was favorited, in standard Twitter JSON format. + * $data->target_object: the favorited status update in standard Twitter JSON format. * * 'unfavorite': Someone has unfavorited a status update. - * $data['source']: user doing the unfavoriting, in standard Twitter JSON format. - * $data['target']: user whose status was unfavorited, in standard Twitter JSON format. - * $data['target_object']: the unfavorited status update in standard Twitter JSON format. + * $data->source: user doing the unfavoriting, in standard Twitter JSON format. + * $data->target: user whose status was unfavorited, in standard Twitter JSON format. + * $data->target_object: the unfavorited status update in standard Twitter JSON format. * * * Meta information: * - * 'friends': $data is a list of user IDs of the current user's friends. + * 'friends': + * $data->friends: array of user IDs of the current user's friends. * * 'delete': Advisory that a Twitter status has been deleted; nice clients * should follow suit. - * $data['id']: ID of status being deleted - * $data['user_id']: ID of its owning user + * $data->id: ID of status being deleted + * $data->user_id: ID of its owning user * * 'scrub_geo': Advisory that a user is clearing geo data from their status * stream; nice clients should follow suit. - * $data['user_id']: ID of user - * $data['up_to_status_id']: any notice older than this should be scrubbed. + * $data->user_id: ID of user + * $data->up_to_status_id: any notice older than this should be scrubbed. * * 'limit': Advisory that tracking has hit a resource limit. - * $data['track'] + * $data->track * * 'raw': receives the full JSON data for all message types. * @@ -149,12 +150,12 @@ abstract class TwitterStreamReader extends JsonStreamReader } } - protected function handleJson(array $data) + protected function handleJson(stdClass $data) { $this->routeMessage($data); } - abstract protected function routeMessage($data); + abstract protected function routeMessage(stdClass $data); /** * Send the decoded JSON object out to any event listeners. @@ -162,23 +163,26 @@ abstract class TwitterStreamReader extends JsonStreamReader * @param array $data * @param array $context optional additional context data to pass on */ - protected function handleMessage(array $data, array $context=array()) + protected function handleMessage(stdClass $data, array $context=array()) { $this->fireEvent('raw', $data, $context); - if (isset($data['text'])) { + if (isset($data->text)) { $this->fireEvent('status', $data, $context); return; } - if (isset($data['event'])) { - $this->fireEvent($data['event'], $data, $context); + if (isset($data->event)) { + $this->fireEvent($data->event, $data, $context); return; } + if (isset($data->friends)) { + $this->fireEvent('friends', $data, $context); + } - $knownMeta = array('friends', 'delete', 'scrubgeo', 'limit', 'direct_message'); + $knownMeta = array('delete', 'scrub_geo', 'limit', 'direct_message'); foreach ($knownMeta as $key) { - if (isset($data[$key])) { - $this->fireEvent($key, $data[$key], $context); + if (isset($data->$key)) { + $this->fireEvent($key, $data->$key, $context); return; } } @@ -237,13 +241,13 @@ class TwitterSiteStream extends TwitterStreamReader * * @param array $data */ - function routeMessage($data) + function routeMessage(stdClass $data) { $context = array( 'source' => 'sitestream', - 'for_user' => $data['for_user'] + 'for_user' => $data->for_user ); - parent::handleMessage($data['message'], $context); + parent::handleMessage($data->message, $context); } } @@ -271,7 +275,7 @@ class TwitterUserStream extends TwitterStreamReader * * @param array $data */ - function routeMessage($data) + function routeMessage(stdClass $data) { $context = array( 'source' => 'userstream' From 9c3fd10257703d821891c25b56dcb13b255aa903 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 5 Oct 2010 17:08:04 -0700 Subject: [PATCH 08/18] Prelim --all mode on streamtest.php to use site streams; doesn't use the destination user for import yet, and not actually tested yet until I'm whitelisted for the beta. :) --- plugins/TwitterBridge/scripts/streamtest.php | 58 ++++++++++++++----- plugins/TwitterBridge/twitterstreamreader.php | 2 +- 2 files changed, 46 insertions(+), 14 deletions(-) diff --git a/plugins/TwitterBridge/scripts/streamtest.php b/plugins/TwitterBridge/scripts/streamtest.php index 1cec451c8c..a175c1efa5 100644 --- a/plugins/TwitterBridge/scripts/streamtest.php +++ b/plugins/TwitterBridge/scripts/streamtest.php @@ -28,13 +28,14 @@ define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../..')); $shortoptions = 'n:'; -$longoptions = array('nick=','import'); +$longoptions = array('nick=','import','all'); $helptext = << - -n --nick Local user whose Twitter timeline to watch + -n --nick= Local user whose Twitter timeline to watch --import Experimental: run incoming messages through import + --all Experimental: run multiuser; requires nick be the app owner Attempts a User Stream connection to Twitter as the given user, dumping data as it comes. @@ -81,49 +82,80 @@ function homeStreamForUser(User $user) return new TwitterUserStream($auth); } +function siteStreamForOwner(User $user) +{ + // The user we auth as must be the owner of the application. + $auth = twitterAuthForUser($user); + $stream = new TwitterSiteStream($auth); + + // Pull Twitter user IDs for all users we want to pull data for + $userIds = array(); + + $flink = new Foreign_link(); + $flink->service = TWITTER_SERVICE; + $flink->find(); + + while ($flink->fetch()) { + if (($flink->noticesync & FOREIGN_NOTICE_RECV) == + FOREIGN_NOTICE_RECV) { + $userIds[] = $flink->foreign_id; + } + } + + $stream->followUsers($userIds); + return $stream; +} + + $user = User::staticGet('nickname', $nickname); global $myuser; $myuser = $user; -$stream = homeStreamForUser($user); -$stream->hookEvent('raw', function($data) { - common_log(LOG_INFO, json_encode($data)); +if (have_option('all')) { + $stream = siteStreamForOwner($user); +} else { + $stream = homeStreamForUser($user); +} + + +$stream->hookEvent('raw', function($data, $context) { + common_log(LOG_INFO, json_encode($data) . ' for ' . json_encode($context)); }); -$stream->hookEvent('friends', function($data) { +$stream->hookEvent('friends', function($data, $context) { printf("Friend list: %s\n", implode(', ', $data->friends)); }); -$stream->hookEvent('favorite', function($data) { +$stream->hookEvent('favorite', function($data, $context) { printf("%s favorited %s's notice: %s\n", $data->source->screen_name, $data->target->screen_name, $data->target_object->text); }); -$stream->hookEvent('unfavorite', function($data) { +$stream->hookEvent('unfavorite', function($data, $context) { printf("%s unfavorited %s's notice: %s\n", $data->source->screen_name, $data->target->screen_name, $data->target_object->text); }); -$stream->hookEvent('follow', function($data) { +$stream->hookEvent('follow', function($data, $context) { printf("%s friended %s\n", $data->source->screen_name, $data->target->screen_name); }); -$stream->hookEvent('unfollow', function($data) { +$stream->hookEvent('unfollow', function($data, $context) { printf("%s unfriended %s\n", $data->source->screen_name, $data->target->screen_name); }); -$stream->hookEvent('delete', function($data) { +$stream->hookEvent('delete', function($data, $context) { printf("Deleted status notification: %s\n", $data->status->id); }); -$stream->hookEvent('scrub_geo', function($data) { +$stream->hookEvent('scrub_geo', function($data, $context) { printf("Req to scrub geo data for user id %s up to status ID %s\n", $data->user_id, $data->up_to_status_id); }); -$stream->hookEvent('status', function($data) { +$stream->hookEvent('status', function($data, $context) { printf("Received status update from %s: %s\n", $data->user->screen_name, $data->text); diff --git a/plugins/TwitterBridge/twitterstreamreader.php b/plugins/TwitterBridge/twitterstreamreader.php index 3a3c8f5e66..5b0613bc40 100644 --- a/plugins/TwitterBridge/twitterstreamreader.php +++ b/plugins/TwitterBridge/twitterstreamreader.php @@ -208,7 +208,7 @@ class TwitterSiteStream extends TwitterStreamReader { protected $userIds; - public function __construct(TwitterOAuthClient $auth, $baseUrl='https://stream.twitter.com') + public function __construct(TwitterOAuthClient $auth, $baseUrl='http://betastream.twitter.com') { parent::__construct($auth, $baseUrl); } From e76028b629ccc819160ff5a95393c4cb86b34c1f Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Thu, 28 Oct 2010 18:26:48 -0700 Subject: [PATCH 09/18] Work in progress: starting on new TwitterDaemon using the Site Streams API -- code is incomplete, pulling bits from streamtest.php pending a chance to test the actual site-streams mode --- .../TwitterBridge/daemons/twitterdaemon.php | 327 ++++++++++++++++++ 1 file changed, 327 insertions(+) create mode 100644 plugins/TwitterBridge/daemons/twitterdaemon.php diff --git a/plugins/TwitterBridge/daemons/twitterdaemon.php b/plugins/TwitterBridge/daemons/twitterdaemon.php new file mode 100644 index 0000000000..f97f3179b8 --- /dev/null +++ b/plugins/TwitterBridge/daemons/twitterdaemon.php @@ -0,0 +1,327 @@ +#!/usr/bin/env php +. + */ + +define('INSTALLDIR', realpath(dirname(__FILE__) . '/..')); + +$shortoptions = 'fi::a'; +$longoptions = array('id::', 'foreground', 'all'); + +$helptext = <<allsites = $allsites; + } + + function runThread() + { + common_log(LOG_INFO, 'Waiting to listen to Twitter and queues'); + + $master = new TwitterMaster($this->get_id(), $this->processManager()); + $master->init($this->allsites); + $master->service(); + + common_log(LOG_INFO, 'terminating normally'); + + return $master->respawn ? self::EXIT_RESTART : self::EXIT_SHUTDOWN; + } + +} + +class TwitterMaster extends IoMaster +{ + protected $processManager; + + function __construct($id, $processManager) + { + parent::__construct($id); + $this->processManager = $processManager; + } + + /** + * Initialize IoManagers for the currently configured site + * which are appropriate to this instance. + */ + function initManagers() + { + if (common_config('twitter', 'enabled')) { + $qm = QueueManager::get(); + $qm->setActiveGroup('twitter'); + $this->instantiate($qm); + $this->instantiate(TwitterManager::get()); + $this->instantiate($this->processManager); + } + } +} + + +class TwitterManager extends IoManager +{ + // Recommended resource limits from http://dev.twitter.com/pages/site_streams + const MAX_STREAMS = 1000; + const USERS_PER_STREAM = 100; + const STREAMS_PER_SECOND = 20; + + protected $twitterStreams; + protected $twitterUsers; + + function __construct() + { + } + + /** + * Pull the site's active Twitter-importing users and start spawning + * some data streams for them! + * + * @fixme check their last-id and check whether we'll need to do a manual pull. + * @fixme abstract out the fetching so we can work over multiple sites. + */ + function initStreams() + { + // Pull Twitter user IDs for all users we want to pull data for + $flink = new Foreign_link(); + $flink->service = TWITTER_SERVICE; + // @fixme probably should do the bitfield check in a whereAdd but it's ugly :D + $flink->find(); + + $userIds = array(); + while ($flink->fetch()) { + if (($flink->noticesync & FOREIGN_NOTICE_RECV) == + FOREIGN_NOTICE_RECV) { + $userIds[] = $flink->foreign_id; + + if (count($userIds) >= self::USERS_PER_STREAM) { + $this->spawnStream($userIds); + $userIds = array(); + } + } + } + + if (count($userIds)) { + $this->spawnStream($userIds); + } + } + + /** + * Prepare a Site Stream connection for the given chunk of users. + * The actual connection will be opened later. + * + * @param $users array of Twitter-side user IDs + */ + function spawnStream($users) + { + $stream = $this->initSiteStream(); + $stream->followUsers($userIds); + + // Slip the stream reader into our list of active streams. + // We'll manage its actual connection on the next go-around. + $this->streams[] = $stream; + + // Record the user->stream mappings; this makes it easier for us to know + // later if we need to kill something. + foreach ($userIds as $id) { + $this->users[$id] = $stream; + } + } + + /** + * Initialize a generic site streams connection object. + * All our connections will look like this, then we'll add users to them. + * + * @return TwitterStreamReader + */ + function initSiteStream() + { + $auth = $this->siteStreamAuth(); + $stream = new TwitterSiteStream($auth); + + // Add our event handler callbacks. Whee! + $this->setupEvents($stream); + return $stream; + } + + /** + * Fetch the Twitter OAuth credentials to use to connect to the Site Streams API. + * + * This will use the locally-stored credentials for the applictation's owner account + * from the site configuration. These should be configured through the administration + * panels or manually in the config file. + * + * Will throw an exception if no credentials can be found -- but beware that invalid + * credentials won't cause breakage until later. + * + * @return TwitterOAuthClient + */ + function siteStreamAuth() + { + $token = common_config('twitter', 'stream_token'); + $secret = common_config('twitter', 'stream_secret'); + if (empty($token) || empty($secret)) { + throw new ServerException('Twitter site streams have not been correctly configured. Configure the app owner account via the admin panel.'); + } + return new TwitterOAuthClient($token, $secret); + } + + /** + * Collect the sockets for all active connections for i/o monitoring. + * + * @return array of resources + */ + function getSockets() + { + $sockets = array(); + foreach ($this->streams as $stream) { + foreach ($stream->getSockets() as $socket) { + $sockets[] = $socket; + } + } + return $streams; + } + + /** + * We're ready to process input from one of our data sources! Woooooo! + * @fixme is there an easier way to map from socket back to owning module? :( + * + * @param resource $socket + * @return boolean success + */ + function handleInput($socket) + { + foreach ($this->streams as $stream) { + foreach ($stream->getSockets() as $aSocket) { + if ($socket === $aSocket) { + $stream->handleInput($socket); + } + } + } + return true; + } + + /** + * Start the system up! + * @fixme do some rate-limiting on the stream setup + * @fixme do some sensible backoff on failure etc + */ + function start() + { + $this->initStreams(); + foreach ($this->streams as $stream) { + $stream->connect(); + } + return true; + } + + function finish() + { + foreach ($this->streams as $index => $stream) { + $stream->close(); + unset($this->streams[$index]); + } + return true; + } + + public static function get() + { + throw new Exception('not a singleton'); + } + + /** + * Set up event handlers on the streaming interface. + * + * @fixme add more event types as we add handling for them + */ + protected function setupEvents(TwitterStream $stream) + { + $handlers = array( + 'status', + ); + foreach ($handlers as $event) { + $stream->hookEvent($event, array($this, 'onTwitter' . ucfirst($event))); + } + } + + /** + * Event callback notifying that a user has a new message in their home timeline. + * + * @param object $data JSON data: Twitter status update + */ + protected function onTwitterStatus($data, $context) + { + $importer = new TwitterImport(); + $notice = $importer->importStatus($data); + if ($notice) { + $user = $this->getTwitterUser($context); + Inbox::insertNotice($user->id, $notice->id); + } + } + + /** + * @fixme what about handling multiple sites? + */ + function getTwitterUser($context) + { + if ($context->source != 'sitestream') { + throw new ServerException("Unexpected stream source"); + } + $flink = Foreign_link::getByForeignID(TWITTER_SERVICE, $context->for_user); + if ($flink) { + return $flink->getUser(); + } else { + throw new ServerException("No local user for this Twitter ID"); + } + } +} + + +if (have_option('i', 'id')) { + $id = get_option_value('i', 'id'); +} else if (count($args) > 0) { + $id = $args[0]; +} else { + $id = null; +} + +$foreground = have_option('f', 'foreground'); +$all = have_option('a') || have_option('--all'); + +$daemon = new TwitterDaemon($id, !$foreground, 1, $all); + +$daemon->runOnce(); From 15b108620e4a63184d001ed8ff3704225a795b8c Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 29 Oct 2010 13:06:32 -0700 Subject: [PATCH 10/18] Fix a couple 'continue's from old looping code in Twitter importer (-> return null) --- plugins/TwitterBridge/twitterimport.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/TwitterBridge/twitterimport.php b/plugins/TwitterBridge/twitterimport.php index 1b2d395304..07a9cf95f6 100644 --- a/plugins/TwitterBridge/twitterimport.php +++ b/plugins/TwitterBridge/twitterimport.php @@ -57,13 +57,13 @@ class TwitterImport if (preg_match("/$source/", mb_strtolower($status->source))) { common_debug($this->name() . ' - Skipping import of status ' . $status->id . ' with source ' . $source); - continue; + return null; } // Don't save it if the user is protected // FIXME: save it but treat it as private if ($status->user->protected) { - continue; + return null; } $notice = $this->saveStatus($status); From 86adc575ecc1dce990540d06fc86735215ae1ea1 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 29 Oct 2010 13:14:12 -0700 Subject: [PATCH 11/18] TweetInQueueHandler: run incoming tweets through the queues to keep the Twitter streaming daemon clear. --- plugins/TwitterBridge/tweetinqueuehandler.php | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 plugins/TwitterBridge/tweetinqueuehandler.php diff --git a/plugins/TwitterBridge/tweetinqueuehandler.php b/plugins/TwitterBridge/tweetinqueuehandler.php new file mode 100644 index 0000000000..ff6b2cc861 --- /dev/null +++ b/plugins/TwitterBridge/tweetinqueuehandler.php @@ -0,0 +1,63 @@ +. + */ + +if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); } + +require_once INSTALLDIR . '/plugins/TwitterBridge/twitter.php'; + +/** + * Queue handler to deal with incoming Twitter status updates, as retrieved by + * TwitterDaemon (twitterdaemon.php). + * + * The queue handler passes the status through TwitterImporter for import into the + * local database (if necessary), then adds the imported notice to the local inbox + * of the attached Twitter user. + * + * Warning: the way we do inbox distribution manually means that realtime, XMPP, etc + * don't work on Twitter-borne messages. When TwitterImporter is changed to handle + * that correctly, we'll only need to do this once...? + */ +class TweetInQueueHandler extends QueueHandler +{ + function transport() + { + return 'tweetin'; + } + + function handle($data) + { + // JSON object with Twitter data + $status = $data['status']; + + // Twitter user ID this incoming data belongs to. + $receiver = $data['for_user']; + + $importer = new TwitterImport(); + $notice = $importer->importStatus($status); + if ($notice) { + $flink = Foreign_link::getByForeignID(TWITTER_SERVICE, $receiver); + if ($flink) { + // @fixme this should go through more regular channels? + Inbox::insertNotice($flink->user_id, $notice->id); + } + } + + return true; + } +} From 47eada3a95bbe92619f2fc070f24e429f28e6fa8 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 29 Oct 2010 13:18:03 -0700 Subject: [PATCH 12/18] Work in progress on site streams-aware TwitterDaemon --- .../TwitterBridge/daemons/twitterdaemon.php | 58 ++++++++----------- 1 file changed, 25 insertions(+), 33 deletions(-) diff --git a/plugins/TwitterBridge/daemons/twitterdaemon.php b/plugins/TwitterBridge/daemons/twitterdaemon.php index f97f3179b8..851d191dd2 100644 --- a/plugins/TwitterBridge/daemons/twitterdaemon.php +++ b/plugins/TwitterBridge/daemons/twitterdaemon.php @@ -114,7 +114,7 @@ class TwitterManager extends IoManager * @fixme check their last-id and check whether we'll need to do a manual pull. * @fixme abstract out the fetching so we can work over multiple sites. */ - function initStreams() + protected function initStreams() { // Pull Twitter user IDs for all users we want to pull data for $flink = new Foreign_link(); @@ -146,7 +146,7 @@ class TwitterManager extends IoManager * * @param $users array of Twitter-side user IDs */ - function spawnStream($users) + protected function spawnStream($users) { $stream = $this->initSiteStream(); $stream->followUsers($userIds); @@ -168,7 +168,7 @@ class TwitterManager extends IoManager * * @return TwitterStreamReader */ - function initSiteStream() + protected function initSiteStream() { $auth = $this->siteStreamAuth(); $stream = new TwitterSiteStream($auth); @@ -190,7 +190,7 @@ class TwitterManager extends IoManager * * @return TwitterOAuthClient */ - function siteStreamAuth() + protected function siteStreamAuth() { $token = common_config('twitter', 'stream_token'); $secret = common_config('twitter', 'stream_secret'); @@ -205,7 +205,7 @@ class TwitterManager extends IoManager * * @return array of resources */ - function getSockets() + public function getSockets() { $sockets = array(); foreach ($this->streams as $stream) { @@ -223,7 +223,7 @@ class TwitterManager extends IoManager * @param resource $socket * @return boolean success */ - function handleInput($socket) + public function handleInput($socket) { foreach ($this->streams as $stream) { foreach ($stream->getSockets() as $aSocket) { @@ -236,11 +236,12 @@ class TwitterManager extends IoManager } /** - * Start the system up! + * Start the i/o system up! Prepare our connections and start opening them. + * * @fixme do some rate-limiting on the stream setup * @fixme do some sensible backoff on failure etc */ - function start() + public function start() { $this->initStreams(); foreach ($this->streams as $stream) { @@ -249,7 +250,10 @@ class TwitterManager extends IoManager return true; } - function finish() + /** + * Close down our connections when the daemon wraps up for business. + */ + public function finish() { foreach ($this->streams as $index => $stream) { $stream->close(); @@ -280,33 +284,21 @@ class TwitterManager extends IoManager /** * Event callback notifying that a user has a new message in their home timeline. + * We store the incoming message into the queues for processing, keeping our own + * daemon running as shiny-fast as possible. * - * @param object $data JSON data: Twitter status update + * @param object $status JSON data: Twitter status update + * @fixme in all-sites mode we may need to route queue items into another site's + * destination queues, or multiple sites. */ - protected function onTwitterStatus($data, $context) + protected function onTwitterStatus($status, $context) { - $importer = new TwitterImport(); - $notice = $importer->importStatus($data); - if ($notice) { - $user = $this->getTwitterUser($context); - Inbox::insertNotice($user->id, $notice->id); - } - } - - /** - * @fixme what about handling multiple sites? - */ - function getTwitterUser($context) - { - if ($context->source != 'sitestream') { - throw new ServerException("Unexpected stream source"); - } - $flink = Foreign_link::getByForeignID(TWITTER_SERVICE, $context->for_user); - if ($flink) { - return $flink->getUser(); - } else { - throw new ServerException("No local user for this Twitter ID"); - } + $data = array( + 'status' => $status, + 'for_user' => $context->for_user, + ); + $qm = QueueManager::get(); + $qm->enqueue($data, 'tweetin'); } } From d743539cf72487705f8a1384284ddf114af12bfa Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 29 Oct 2010 13:41:15 -0700 Subject: [PATCH 13/18] Fixups for twitter streaming daemon --- plugins/TwitterBridge/TwitterBridgePlugin.php | 6 +++++ .../TwitterBridge/daemons/twitterdaemon.php | 27 ++++++++----------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/plugins/TwitterBridge/TwitterBridgePlugin.php b/plugins/TwitterBridge/TwitterBridgePlugin.php index 128b062c7f..b4eb9d2f98 100644 --- a/plugins/TwitterBridge/TwitterBridgePlugin.php +++ b/plugins/TwitterBridge/TwitterBridgePlugin.php @@ -201,8 +201,14 @@ class TwitterBridgePlugin extends Plugin case 'TwitterOAuthClient': case 'TwitterQueueHandler': case 'TwitterImport': + case 'JsonStreamReader': + case 'TwitterStreamReader': include_once $dir . '/' . strtolower($cls) . '.php'; return false; + case 'TwitterSiteStream': + case 'TwitterUserStream': + include_once $dir . '/twitterstreamreader.php'; + return false; case 'Notice_to_status': case 'Twitter_synch_status': include_once $dir . '/' . $cls . '.php'; diff --git a/plugins/TwitterBridge/daemons/twitterdaemon.php b/plugins/TwitterBridge/daemons/twitterdaemon.php index 851d191dd2..9e218a1a1c 100644 --- a/plugins/TwitterBridge/daemons/twitterdaemon.php +++ b/plugins/TwitterBridge/daemons/twitterdaemon.php @@ -18,7 +18,7 @@ * along with this program. If not, see . */ -define('INSTALLDIR', realpath(dirname(__FILE__) . '/..')); +define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../..')); $shortoptions = 'fi::a'; $longoptions = array('id::', 'foreground', 'all'); @@ -82,13 +82,11 @@ class TwitterMaster extends IoMaster */ function initManagers() { - if (common_config('twitter', 'enabled')) { - $qm = QueueManager::get(); - $qm->setActiveGroup('twitter'); - $this->instantiate($qm); - $this->instantiate(TwitterManager::get()); - $this->instantiate($this->processManager); - } + $qm = QueueManager::get(); + $qm->setActiveGroup('twitter'); + $this->instantiate($qm); + $this->instantiate(new TwitterManager()); + $this->instantiate($this->processManager); } } @@ -103,10 +101,6 @@ class TwitterManager extends IoManager protected $twitterStreams; protected $twitterUsers; - function __construct() - { - } - /** * Pull the site's active Twitter-importing users and start spawning * some data streams for them! @@ -116,6 +110,7 @@ class TwitterManager extends IoManager */ protected function initStreams() { + common_log(LOG_INFO, 'init...'); // Pull Twitter user IDs for all users we want to pull data for $flink = new Foreign_link(); $flink->service = TWITTER_SERVICE; @@ -144,9 +139,9 @@ class TwitterManager extends IoManager * Prepare a Site Stream connection for the given chunk of users. * The actual connection will be opened later. * - * @param $users array of Twitter-side user IDs + * @param $userIds array of Twitter-side user IDs */ - protected function spawnStream($users) + protected function spawnStream($userIds) { $stream = $this->initSiteStream(); $stream->followUsers($userIds); @@ -213,7 +208,7 @@ class TwitterManager extends IoManager $sockets[] = $socket; } } - return $streams; + return $sockets; } /** @@ -272,7 +267,7 @@ class TwitterManager extends IoManager * * @fixme add more event types as we add handling for them */ - protected function setupEvents(TwitterStream $stream) + protected function setupEvents(TwitterStreamReader $stream) { $handlers = array( 'status', From 62408fef09990a127a58aa8f8a24777bc34d160d Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 29 Oct 2010 14:12:18 -0700 Subject: [PATCH 14/18] Work in progress on twitter import daemon --- plugins/TwitterBridge/TwitterBridgePlugin.php | 7 +++ .../TwitterBridge/daemons/twitterdaemon.php | 4 +- .../TwitterBridge/tweetctlqueuehandler.php | 59 +++++++++++++++++++ plugins/TwitterBridge/twittersettings.php | 17 ++++++ 4 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 plugins/TwitterBridge/tweetctlqueuehandler.php diff --git a/plugins/TwitterBridge/TwitterBridgePlugin.php b/plugins/TwitterBridge/TwitterBridgePlugin.php index b4eb9d2f98..1078abc484 100644 --- a/plugins/TwitterBridge/TwitterBridgePlugin.php +++ b/plugins/TwitterBridge/TwitterBridgePlugin.php @@ -274,7 +274,14 @@ class TwitterBridgePlugin extends Plugin function onEndInitializeQueueManager($manager) { if (self::hasKeys()) { + // Outgoing notices -> twitter $manager->connect('twitter', 'TwitterQueueHandler'); + + // Incoming statuses <- twitter + $manager->connect('tweetin', 'TweetInQueueHandler'); + + // Control messages from our web interface to the import daemon + $manager->connect('tweetctl', 'TweetCtlQueueHandler', 'twitter'); } return true; } diff --git a/plugins/TwitterBridge/daemons/twitterdaemon.php b/plugins/TwitterBridge/daemons/twitterdaemon.php index 9e218a1a1c..d313d2de96 100644 --- a/plugins/TwitterBridge/daemons/twitterdaemon.php +++ b/plugins/TwitterBridge/daemons/twitterdaemon.php @@ -98,8 +98,8 @@ class TwitterManager extends IoManager const USERS_PER_STREAM = 100; const STREAMS_PER_SECOND = 20; - protected $twitterStreams; - protected $twitterUsers; + protected $streams; + protected $users; /** * Pull the site's active Twitter-importing users and start spawning diff --git a/plugins/TwitterBridge/tweetctlqueuehandler.php b/plugins/TwitterBridge/tweetctlqueuehandler.php new file mode 100644 index 0000000000..4c8bef463e --- /dev/null +++ b/plugins/TwitterBridge/tweetctlqueuehandler.php @@ -0,0 +1,59 @@ +. + */ + +if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); } + +require_once INSTALLDIR . '/plugins/TwitterBridge/twitter.php'; + +/** + * Queue handler to deal with incoming Twitter status updates, as retrieved by + * TwitterDaemon (twitterdaemon.php). + * + * The queue handler passes the status through TwitterImporter for import into the + * local database (if necessary), then adds the imported notice to the local inbox + * of the attached Twitter user. + * + * Warning: the way we do inbox distribution manually means that realtime, XMPP, etc + * don't work on Twitter-borne messages. When TwitterImporter is changed to handle + * that correctly, we'll only need to do this once...? + */ +class TweetCtlQueueHandler extends QueueHandler +{ + function transport() + { + return 'tweetctl'; + } + + function handle($data) + { + // A user has activated or deactivated their Twitter bridge + // import status. + $action = $data['action']; + $userId = $data['for_user']; + + $tm = TwitterManager::get(); + if ($action == 'start') { + $tm->startTwitterUser($userId); + } else if ($action == 'stop') { + $tm->stopTwitterUser($userId); + } + + return true; + } +} diff --git a/plugins/TwitterBridge/twittersettings.php b/plugins/TwitterBridge/twittersettings.php index 33c5eb65bb..de1ba58b0d 100644 --- a/plugins/TwitterBridge/twittersettings.php +++ b/plugins/TwitterBridge/twittersettings.php @@ -285,6 +285,7 @@ class TwittersettingsAction extends ConnectSettingsAction } $original = clone($flink); + $wasReceiving = (bool)($original->notice_sync & FOREIGN_NOTICE_RECV); $flink->set_flags($noticesend, $noticerecv, $replysync, $friendsync); $result = $flink->update($original); @@ -294,6 +295,22 @@ class TwittersettingsAction extends ConnectSettingsAction return; } + if ($wasReceiving xor $noticerecv) { + $this->notifyDaemon($flink->foreign_id, $noticerecv); + } + $this->showForm(_m('Twitter preferences saved.'), true); } + + /** + * Tell the import daemon that we've updated a user's receive status. + */ + function notifyDaemon($twitterUserId, $receiving) + { + $data = array('for_user' => $twitterUserId, + 'action' => $receiving ? 'stop' : 'start'); + $qm = QueueManager::get(); + $qm->enqueue($data, 'twitterctl'); + } + } From 28703deb8f864575135c2ea94facc60c6ca9945b Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 2 Nov 2010 16:26:51 -0700 Subject: [PATCH 15/18] Allow custom apiroot for site streams testing on streamtest --- plugins/TwitterBridge/scripts/streamtest.php | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/plugins/TwitterBridge/scripts/streamtest.php b/plugins/TwitterBridge/scripts/streamtest.php index a175c1efa5..aad15fdeab 100644 --- a/plugins/TwitterBridge/scripts/streamtest.php +++ b/plugins/TwitterBridge/scripts/streamtest.php @@ -28,7 +28,7 @@ define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../..')); $shortoptions = 'n:'; -$longoptions = array('nick=','import','all'); +$longoptions = array('nick=','import','all','apiroot='); $helptext = << @@ -36,6 +36,7 @@ USAGE: streamtest.php -n -n --nick= Local user whose Twitter timeline to watch --import Experimental: run incoming messages through import --all Experimental: run multiuser; requires nick be the app owner + --apiroot= Provide alternate streaming API root URL Attempts a User Stream connection to Twitter as the given user, dumping data as it comes. @@ -86,7 +87,12 @@ function siteStreamForOwner(User $user) { // The user we auth as must be the owner of the application. $auth = twitterAuthForUser($user); - $stream = new TwitterSiteStream($auth); + + if (have_option('apiroot')) { + $stream = new TwitterSiteStream($auth, get_option_value('apiroot')); + } else { + $stream = new TwitterSiteStream($auth); + } // Pull Twitter user IDs for all users we want to pull data for $userIds = array(); From 445b306b5494210f119525a13c4148024a6c5576 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 2 Nov 2010 16:27:14 -0700 Subject: [PATCH 16/18] fakestream.php: script to build an emulated Twitter Site Stream from live Twitter data, for testing. --- plugins/TwitterBridge/scripts/fakestream.php | 106 +++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 plugins/TwitterBridge/scripts/fakestream.php diff --git a/plugins/TwitterBridge/scripts/fakestream.php b/plugins/TwitterBridge/scripts/fakestream.php new file mode 100644 index 0000000000..cdb56d4910 --- /dev/null +++ b/plugins/TwitterBridge/scripts/fakestream.php @@ -0,0 +1,106 @@ +. + * + * @category Plugin + * @package StatusNet + * @author Brion Vibber + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../..')); + +$shortoptions = 'n:'; +$longoptions = array('nick=','import','all'); + +$helptext = << + + -n --nick= Local user whose Twitter timeline to watch + --import Experimental: run incoming messages through import + --all Experimental: run multiuser; requires nick be the app owner + +Attempts a User Stream connection to Twitter as the given user, dumping +data as it comes. + +ENDOFHELP; + +require_once INSTALLDIR.'/scripts/commandline.inc'; + +if (have_option('n')) { + $nickname = get_option_value('n'); +} else if (have_option('nick')) { + $nickname = get_option_value('nickname'); +} else { + show_help($helptext); + exit(0); +} + +/** + * + * @param User $user + * @return TwitterOAuthClient + */ +function twitterAuthForUser(User $user) +{ + $flink = Foreign_link::getByUserID($user->id, + TWITTER_SERVICE); + if (!$flink) { + throw new ServerException("No Twitter config for this user."); + } + + $token = TwitterOAuthClient::unpackToken($flink->credentials); + if (!$token) { + throw new ServerException("No Twitter OAuth credentials for this user."); + } + + return new TwitterOAuthClient($token->key, $token->secret); +} + +/** + * Emulate the line-by-line output... + * + * @param Foreign_link $flink + * @param mixed $data + */ +function dumpMessage($flink, $data) +{ + $msg->for_user = $flink->foreign_id; + $msg->message = $data; + print json_encode($msg) . "\r\n"; +} + +if (have_option('all')) { + throw new Exception('--all not yet implemented'); +} + +$user = User::staticGet('nickname', $nickname); +$auth = twitterAuthForUser($user); +$flink = Foreign_link::getByUserID($user->id, + TWITTER_SERVICE); + +$friends->friends = $auth->friendsIds(); +dumpMessage($flink, $friends); + +$timeline = $auth->statusesHomeTimeline(); +foreach ($timeline as $status) { + dumpMessage($flink, $status); +} + From a2f0f68d75b2cf49815a695a30761c6c9538449e Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 2 Nov 2010 16:43:01 -0700 Subject: [PATCH 17/18] fakestream.php can now take --all option to pull the latest messages from multiple locally-authed accounts when generating simulated sitestreams info --- plugins/TwitterBridge/scripts/fakestream.php | 65 ++++++++++++++++---- 1 file changed, 53 insertions(+), 12 deletions(-) diff --git a/plugins/TwitterBridge/scripts/fakestream.php b/plugins/TwitterBridge/scripts/fakestream.php index cdb56d4910..3696888162 100644 --- a/plugins/TwitterBridge/scripts/fakestream.php +++ b/plugins/TwitterBridge/scripts/fakestream.php @@ -48,6 +48,8 @@ if (have_option('n')) { $nickname = get_option_value('n'); } else if (have_option('nick')) { $nickname = get_option_value('nickname'); +} else if (have_option('all')) { + $nickname = null; } else { show_help($helptext); exit(0); @@ -82,25 +84,64 @@ function twitterAuthForUser(User $user) */ function dumpMessage($flink, $data) { - $msg->for_user = $flink->foreign_id; - $msg->message = $data; + $msg = prepMessage($flink, $data); print json_encode($msg) . "\r\n"; } +function prepMessage($flink, $data) +{ + $msg->for_user = $flink->foreign_id; + $msg->message = $data; + return $msg; +} + if (have_option('all')) { - throw new Exception('--all not yet implemented'); + $users = array(); + + $flink = new Foreign_link(); + $flink->service = TWITTER_SERVICE; + $flink->find(); + + while ($flink->fetch()) { + if (($flink->noticesync & FOREIGN_NOTICE_RECV) == + FOREIGN_NOTICE_RECV) { + $users[] = $flink->user_id; + } + } +} else { + $user = User::staticGet('nickname', $nickname); + $users = array($user->id); } -$user = User::staticGet('nickname', $nickname); -$auth = twitterAuthForUser($user); -$flink = Foreign_link::getByUserID($user->id, - TWITTER_SERVICE); +$output = array(); +foreach ($users as $id) { + $user = User::staticGet('id', $id); + if (!$user) { + throw new Exception("No user for id $id"); + } + $auth = twitterAuthForUser($user); + $flink = Foreign_link::getByUserID($user->id, + TWITTER_SERVICE); -$friends->friends = $auth->friendsIds(); -dumpMessage($flink, $friends); + $friends->friends = $auth->friendsIds(); + dumpMessage($flink, $friends); -$timeline = $auth->statusesHomeTimeline(); -foreach ($timeline as $status) { - dumpMessage($flink, $status); + $timeline = $auth->statusesHomeTimeline(); + foreach ($timeline as $status) { + $output[] = prepMessage($flink, $status); + } } +usort($output, function($a, $b) { + if ($a->message->id < $b->message->id) { + return -1; + } else if ($a->message->id == $b->message->id) { + return 0; + } else { + return 1; + } +}); + +foreach ($output as $msg) { + print json_encode($msg) . "\r\n"; +} From 9cbda32768f65fc53dbeef4e5fb4f53fa4701109 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 2 Nov 2010 16:51:07 -0700 Subject: [PATCH 18/18] Pull out the 'tweetctl' queue for now; these should go over control signals, and actual handling isn't implemented yet anyway. --- plugins/TwitterBridge/TwitterBridgePlugin.php | 3 --- plugins/TwitterBridge/twittersettings.php | 7 ++----- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/plugins/TwitterBridge/TwitterBridgePlugin.php b/plugins/TwitterBridge/TwitterBridgePlugin.php index 1078abc484..f5c3612506 100644 --- a/plugins/TwitterBridge/TwitterBridgePlugin.php +++ b/plugins/TwitterBridge/TwitterBridgePlugin.php @@ -279,9 +279,6 @@ class TwitterBridgePlugin extends Plugin // Incoming statuses <- twitter $manager->connect('tweetin', 'TweetInQueueHandler'); - - // Control messages from our web interface to the import daemon - $manager->connect('tweetctl', 'TweetCtlQueueHandler', 'twitter'); } return true; } diff --git a/plugins/TwitterBridge/twittersettings.php b/plugins/TwitterBridge/twittersettings.php index de1ba58b0d..c169172b00 100644 --- a/plugins/TwitterBridge/twittersettings.php +++ b/plugins/TwitterBridge/twittersettings.php @@ -285,7 +285,7 @@ class TwittersettingsAction extends ConnectSettingsAction } $original = clone($flink); - $wasReceiving = (bool)($original->notice_sync & FOREIGN_NOTICE_RECV); + $wasReceiving = (bool)($original->noticesync & FOREIGN_NOTICE_RECV); $flink->set_flags($noticesend, $noticerecv, $replysync, $friendsync); $result = $flink->update($original); @@ -307,10 +307,7 @@ class TwittersettingsAction extends ConnectSettingsAction */ function notifyDaemon($twitterUserId, $receiving) { - $data = array('for_user' => $twitterUserId, - 'action' => $receiving ? 'stop' : 'start'); - $qm = QueueManager::get(); - $qm->enqueue($data, 'twitterctl'); + // todo... should use control signals rather than queues } }