gnu-social/scripts/twitterstatusfetcher.php

678 lines
19 KiB
PHP
Raw Normal View History

#!/usr/bin/env php
<?php
/**
* Laconica - a distributed open-source microblogging tool
* Copyright (C) 2008, 2009, Control Yourself, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
define('INSTALLDIR', realpath(dirname(__FILE__) . '/..'));
// Tune number of processes and how often to poll Twitter
// XXX: Should these things be in config.php?
define('MAXCHILDREN', 2);
define('POLL_INTERVAL', 60); // in seconds
$shortoptions = 'di::';
$longoptions = array('id::', 'debug');
$helptext = <<<END_OF_TRIM_HELP
Batch script for retrieving Twitter messages from foreign service.
-i --id Identity (default 'generic')
-d --debug Debug (lots of log output)
2009-07-18 02:09:03 +01:00
END_OF_TRIM_HELP;
require_once INSTALLDIR .'/scripts/commandline.inc';
require_once INSTALLDIR . '/lib/daemon.php';
/**
* Fetcher for statuses from Twitter
*
* Fetches statuses from Twitter and inserts them as notices in local
* system.
*
* @category Twitter
* @package Laconica
* @author Zach Copley <zach@controlyourself.ca>
* @author Evan Prodromou <evan@controlyourself.ca>
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
* @link http://laconi.ca/
*/
// NOTE: an Avatar path MUST be set in config.php for this
// script to work: e.g.: $config['avatar']['path'] = '/laconica/avatar';
class TwitterStatusFetcher extends Daemon
{
private $_children = array();
2009-07-18 02:09:03 +01:00
function __construct($id=null, $daemonize=true)
{
parent::__construct($daemonize);
if ($id) {
$this->set_id($id);
}
}
/**
* Name of this daemon
*
* @return string Name of the daemon.
*/
function name()
{
return ('twitterstatusfetcher.'.$this->_id);
}
/**
* Run the daemon
*
* @return void
*/
function run()
{
if (defined('SCRIPT_DEBUG')) {
common_debug($this->name() .
': debugging log output enabled.');
}
do {
$flinks = $this->refreshFlinks();
foreach ($flinks as $f) {
$pid = pcntl_fork();
if ($pid == -1) {
die ("Couldn't fork!");
}
if ($pid) {
// Parent
if (defined('SCRIPT_DEBUG')) {
common_debug("Parent: forked new status ".
" fetcher process " . $pid);
}
$this->_children[] = $pid;
} else {
// Child
// Each child ps needs its own DB connection
// Note: DataObject::getDatabaseConnection() creates
// a new connection if there isn't one already
global $_DB_DATAOBJECT;
$conn = &$f->getDatabaseConnection();
$this->getTimeline($f);
$conn->disconnect();
// XXX: Couldn't find a less brutal way to blow
// away a cached connection
unset($_DB_DATAOBJECT['CONNECTIONS']);
exit();
}
// Remove child from ps list as it finishes
while (($c = pcntl_wait($status, WNOHANG OR WUNTRACED)) > 0) {
if (defined('SCRIPT_DEBUG')) {
2009-05-07 09:10:31 +01:00
common_debug("Child $c finished.");
}
$this->removePs($this->_children, $c);
}
// Wait! We have too many damn kids.
if (sizeof($this->_children) > MAXCHILDREN) {
if (defined('SCRIPT_DEBUG')) {
2009-05-07 09:10:31 +01:00
common_debug('Too many children. Waiting...');
}
if (($c = pcntl_wait($status, WUNTRACED)) > 0) {
if (defined('SCRIPT_DEBUG')) {
2009-05-07 09:10:31 +01:00
common_debug("Finished waiting for $c");
}
$this->removePs($this->_children, $c);
}
}
}
// Remove all children from the process list before restarting
while (($c = pcntl_wait($status, WUNTRACED)) > 0) {
if (defined('SCRIPT_DEBUG')) {
2009-05-07 09:10:31 +01:00
common_debug("Child $c finished.");
}
$this->removePs($this->_children, $c);
}
// Rest for a bit before we fetch more statuses
2009-05-07 09:10:31 +01:00
if (defined('SCRIPT_DEBUG')) {
2009-05-07 09:10:31 +01:00
common_debug('Waiting ' . POLL_INTERVAL .
' secs before hitting Twitter again.');
}
if (POLL_INTERVAL > 0) {
sleep(POLL_INTERVAL);
}
} while (true);
}
/**
* Refresh the foreign links for this user
*
* @return void
*/
function refreshFlinks()
{
global $_DB_DATAOBJECT;
$flink = new Foreign_link();
$conn = &$flink->getDatabaseConnection();
$flink->service = TWITTER_SERVICE;
$flink->orderBy('last_noticesync');
$cnt = $flink->find();
if (defined('SCRIPT_DEBUG')) {
2009-05-07 09:10:31 +01:00
common_debug('Updating Twitter friends subscriptions' .
" for $cnt users.");
}
$flinks = array();
while ($flink->fetch()) {
2009-05-07 09:10:31 +01:00
if (($flink->noticesync & FOREIGN_NOTICE_RECV) ==
FOREIGN_NOTICE_RECV) {
$flinks[] = clone($flink);
}
}
$flink->free();
unset($flink);
$conn->disconnect();
unset($_DB_DATAOBJECT['CONNECTIONS']);
return $flinks;
}
/**
* Unknown
*
* @param array &$plist unknown.
* @param string $ps unknown.
*
* @return unknown
* @todo document
*/
function removePs(&$plist, $ps)
{
for ($i = 0; $i < sizeof($plist); $i++) {
if ($plist[$i] == $ps) {
unset($plist[$i]);
$plist = array_values($plist);
break;
}
}
}
function getTimeline($flink)
{
if (empty($flink)) {
2009-05-07 09:10:31 +01:00
common_log(LOG_WARNING,
"Can't retrieve Foreign_link for foreign ID $fid");
return;
}
if (defined('SCRIPT_DEBUG')) {
2009-05-07 09:10:31 +01:00
common_debug('Trying to get timeline for Twitter user ' .
$flink->foreign_id);
}
2009-05-07 09:10:31 +01:00
// XXX: Biggest remaining issue - How do we know at which status
// to start importing? How many statuses? Right now I'm going
// with the default last 20.
$client = new TwitterOAuthClient($flink->token, $flink->credentials);
$timeline = null;
try {
$timeline = $client->statuses_friends_timeline();
} catch (OAuthClientCurlException $e) {
common_log(LOG_WARNING,
'OAuth client unable to get friends timeline for user ' .
$flink->user_id . ' - code: ' .
$e->getCode() . 'msg: ' . $e->getMessage());
}
if (empty($timeline)) {
common_log(LOG_WARNING, "Empty timeline.");
return;
}
2009-05-20 23:08:08 +01:00
// Reverse to preserve order
foreach (array_reverse($timeline) as $status) {
2009-05-07 09:10:31 +01:00
// Hacktastic: filter out stuff coming from this Laconica
$source = mb_strtolower(common_config('integration', 'source'));
if (preg_match("/$source/", mb_strtolower($status->source))) {
2009-05-07 09:10:31 +01:00
if (defined('SCRIPT_DEBUG')) {
common_debug('Skipping import of status ' . $status->id .
' with source ' . $source);
}
continue;
}
$this->saveStatus($status, $flink);
}
// Okay, record the time we synced with Twitter for posterity
$flink->last_noticesync = common_sql_now();
$flink->update();
}
function saveStatus($status, $flink)
{
$id = $this->ensureProfile($status->user);
$profile = Profile::staticGet($id);
if (empty($profile)) {
2009-05-07 09:10:31 +01:00
common_log(LOG_ERR,
'Problem saving notice. No associated Profile.');
return null;
}
// XXX: change of screen name?
$uri = 'http://twitter.com/' . $status->user->screen_name .
'/status/' . $status->id;
$notice = Notice::staticGet('uri', $uri);
// check to see if we've already imported the status
if (empty($notice)) {
$notice = new Notice();
$notice->profile_id = $id;
$notice->uri = $uri;
$notice->created = strftime('%Y-%m-%d %H:%M:%S',
strtotime($status->created_at));
$notice->content = common_shorten_links($status->text); // XXX
$notice->rendered = common_render_content($notice->content, $notice);
$notice->source = 'twitter';
$notice->reply_to = null; // XXX lookup reply
$notice->is_local = Notice::GATEWAY;
if (Event::handle('StartNoticeSave', array(&$notice))) {
$id = $notice->insert();
Event::handle('EndNoticeSave', array($notice));
}
}
if (!Notice_inbox::pkeyGet(array('notice_id' => $notice->id,
'user_id' => $flink->user_id))) {
// Add to inbox
$inbox = new Notice_inbox();
$inbox->user_id = $flink->user_id;
$inbox->notice_id = $notice->id;
$inbox->created = $notice->created;
$inbox->source = NOTICE_INBOX_SOURCE_GATEWAY; // From a private source
$inbox->insert();
}
}
function ensureProfile($user)
{
// check to see if there's already a profile for this user
$profileurl = 'http://twitter.com/' . $user->screen_name;
$profile = Profile::staticGet('profileurl', $profileurl);
if (!empty($profile)) {
2009-05-07 09:10:31 +01:00
if (defined('SCRIPT_DEBUG')) {
common_debug("Profile for $profile->nickname found.");
}
// Check to see if the user's Avatar has changed
$this->checkAvatar($user, $profile);
2009-05-07 09:10:31 +01:00
return $profile->id;
} else {
if (defined('SCRIPT_DEBUG')) {
2009-05-07 09:10:31 +01:00
common_debug('Adding profile and remote profile ' .
"for Twitter user: $profileurl");
}
$profile = new Profile();
$profile->query("BEGIN");
$profile->nickname = $user->screen_name;
$profile->fullname = $user->name;
$profile->homepage = $user->url;
$profile->bio = $user->description;
$profile->location = $user->location;
$profile->profileurl = $profileurl;
$profile->created = common_sql_now();
$id = $profile->insert();
if (empty($id)) {
common_log_db_error($profile, 'INSERT', __FILE__);
$profile->query("ROLLBACK");
return false;
}
// check for remote profile
$remote_pro = Remote_profile::staticGet('uri', $profileurl);
if (empty($remote_pro)) {
$remote_pro = new Remote_profile();
$remote_pro->id = $id;
$remote_pro->uri = $profileurl;
$remote_pro->created = common_sql_now();
$rid = $remote_pro->insert();
if (empty($rid)) {
common_log_db_error($profile, 'INSERT', __FILE__);
$profile->query("ROLLBACK");
return false;
}
}
$profile->query("COMMIT");
$this->saveAvatars($user, $id);
return $id;
}
}
function checkAvatar($twitter_user, $profile)
{
global $config;
$path_parts = pathinfo($twitter_user->profile_image_url);
$newname = 'Twitter_' . $twitter_user->id . '_' .
$path_parts['basename'];
$oldname = $profile->getAvatar(48)->filename;
if ($newname != $oldname) {
if (defined('SCRIPT_DEBUG')) {
2009-05-07 09:10:31 +01:00
common_debug('Avatar for Twitter user ' .
"$profile->nickname has changed.");
common_debug("old: $oldname new: $newname");
}
$this->updateAvatars($twitter_user, $profile);
}
if ($this->missingAvatarFile($profile)) {
if (defined('SCRIPT_DEBUG')) {
common_debug('Twitter user ' . $profile->nickname .
' is missing one or more local avatars.');
common_debug("old: $oldname new: $newname");
}
$this->updateAvatars($twitter_user, $profile);
}
}
function updateAvatars($twitter_user, $profile) {
global $config;
$path_parts = pathinfo($twitter_user->profile_image_url);
$img_root = substr($path_parts['basename'], 0, -11);
$ext = $path_parts['extension'];
$mediatype = $this->getMediatype($ext);
foreach (array('mini', 'normal', 'bigger') as $size) {
$url = $path_parts['dirname'] . '/' .
$img_root . '_' . $size . ".$ext";
$filename = 'Twitter_' . $twitter_user->id . '_' .
$img_root . "_$size.$ext";
$this->updateAvatar($profile->id, $size, $mediatype, $filename);
$this->fetchAvatar($url, $filename);
}
}
function missingAvatarFile($profile) {
foreach (array(24, 48, 73) as $size) {
$filename = $profile->getAvatar($size)->filename;
$avatarpath = Avatar::path($filename);
if (file_exists($avatarpath) == FALSE) {
return true;
}
}
return false;
}
function getMediatype($ext)
{
$mediatype = null;
switch (strtolower($ext)) {
case 'jpg':
$mediatype = 'image/jpg';
break;
case 'gif':
$mediatype = 'image/gif';
break;
default:
$mediatype = 'image/png';
}
return $mediatype;
}
function saveAvatars($user, $id)
{
global $config;
$path_parts = pathinfo($user->profile_image_url);
$ext = $path_parts['extension'];
$end = strlen('_normal' . $ext);
$img_root = substr($path_parts['basename'], 0, -($end+1));
$mediatype = $this->getMediatype($ext);
foreach (array('mini', 'normal', 'bigger') as $size) {
$url = $path_parts['dirname'] . '/' .
$img_root . '_' . $size . ".$ext";
$filename = 'Twitter_' . $user->id . '_' .
$img_root . "_$size.$ext";
if ($this->fetchAvatar($url, $filename)) {
$this->newAvatar($id, $size, $mediatype, $filename);
} else {
common_log(LOG_WARNING, "Problem fetching Avatar: $url", __FILE__);
}
}
}
function updateAvatar($profile_id, $size, $mediatype, $filename) {
if (defined('SCRIPT_DEBUG')) {
2009-05-07 09:10:31 +01:00
common_debug("Updating avatar: $size");
}
$profile = Profile::staticGet($profile_id);
if (empty($profile)) {
if (defined('SCRIPT_DEBUG')) {
2009-05-07 09:10:31 +01:00
common_debug("Couldn't get profile: $profile_id!");
}
return;
}
$sizes = array('mini' => 24, 'normal' => 48, 'bigger' => 73);
$avatar = $profile->getAvatar($sizes[$size]);
// Delete the avatar, if present
if ($avatar) {
$avatar->delete();
}
$this->newAvatar($profile->id, $size, $mediatype, $filename);
}
function newAvatar($profile_id, $size, $mediatype, $filename)
{
global $config;
$avatar = new Avatar();
$avatar->profile_id = $profile_id;
switch($size) {
case 'mini':
$avatar->width = 24;
$avatar->height = 24;
break;
case 'normal':
$avatar->width = 48;
$avatar->height = 48;
break;
default:
// Note: Twitter's big avatars are a different size than
// Laconica's (Laconica's = 96)
$avatar->width = 73;
$avatar->height = 73;
}
$avatar->original = 0; // we don't have the original
$avatar->mediatype = $mediatype;
$avatar->filename = $filename;
$avatar->url = Avatar::url($filename);
if (defined('SCRIPT_DEBUG')) {
2009-05-07 09:10:31 +01:00
common_debug("new filename: $avatar->url");
}
$avatar->created = common_sql_now();
$id = $avatar->insert();
if (empty($id)) {
common_log_db_error($avatar, 'INSERT', __FILE__);
return null;
}
if (defined('SCRIPT_DEBUG')) {
2009-05-07 09:10:31 +01:00
common_debug("Saved new $size avatar for $profile_id.");
}
return $id;
}
function fetchAvatar($url, $filename)
{
$avatar_dir = INSTALLDIR . '/avatar/';
$avatarfile = $avatar_dir . $filename;
$out = fopen($avatarfile, 'wb');
if (!$out) {
common_log(LOG_WARNING, "Couldn't open file $filename", __FILE__);
return false;
}
if (defined('SCRIPT_DEBUG')) {
2009-05-07 09:10:31 +01:00
common_debug("Fetching avatar: $url");
}
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FILE, $out);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 0);
$result = curl_exec($ch);
curl_close($ch);
fclose($out);
return $result;
}
}
declare(ticks = 1);
if (have_option('i')) {
$id = get_option_value('i');
} else if (have_option('--id')) {
$id = get_option_value('--id');
} else if (count($args) > 0) {
$id = $args[0];
} else {
$id = null;
}
if (have_option('d') || have_option('debug')) {
define('SCRIPT_DEBUG', true);
}
2009-07-18 02:09:03 +01:00
$fetcher = new TwitterStatusFetcher($id);
$fetcher->runOnce();