Merge branch 'testing' of git@gitorious.org:statusnet/mainline into 0.9.x

Conflicts:
	plugins/OStatus/extlib/hkit/hkit.class.php
	plugins/OStatus/lib/discoveryhints.php
This commit is contained in:
Brion Vibber 2010-03-19 15:51:22 -07:00
commit 7e2af3dcae
11 changed files with 516 additions and 747 deletions

View File

@ -161,8 +161,8 @@ class SiteadminpanelAction extends AdminPanelAction
// Validate text limit
if (!Validate::number($values['site']['textlimit'], array('min' => 140))) {
$this->clientError(_("Minimum text limit is 140 characters."));
if (!Validate::number($values['site']['textlimit'], array('min' => 0))) {
$this->clientError(_("Minimum text limit is 0 (unlimited)."));
}
// Validate dupe limit

View File

@ -347,6 +347,11 @@ class ActivityUtils
$els = $element->childNodes;
foreach ($els as $link) {
if (!($link instanceof DOMElement)) {
continue;
}
if ($link->localName == self::LINK && $link->namespaceURI == self::ATOM) {
$linkRel = $link->getAttribute(self::REL);
@ -646,38 +651,11 @@ class ActivityObject
);
if ($element->tagName == 'author') {
$this->type = self::PERSON; // XXX: is this fair?
$this->title = $this->_childContent($element, self::NAME);
$this->id = $this->_childContent($element, self::URI);
if (empty($this->id)) {
$email = $this->_childContent($element, self::EMAIL);
if (!empty($email)) {
// XXX: acct: ?
$this->id = 'mailto:'.$email;
}
}
$this->_fromAuthor($element);
} else if ($element->tagName == 'item') {
$this->_fromRssItem($element);
} else {
$this->type = $this->_childContent($element, Activity::OBJECTTYPE,
Activity::SPEC);
if (empty($this->type)) {
$this->type = ActivityObject::NOTE;
}
$this->id = $this->_childContent($element, self::ID);
$this->title = $this->_childContent($element, self::TITLE);
$this->summary = $this->_childContent($element, self::SUMMARY);
$this->source = $this->_getSource($element);
$this->content = ActivityUtils::getContent($element);
$this->link = ActivityUtils::getPermalink($element);
$this->_fromAtomEntry($element);
}
// Some per-type attributes...
@ -700,6 +678,149 @@ class ActivityObject
}
}
private function _fromAuthor($element)
{
$this->type = self::PERSON; // XXX: is this fair?
$this->title = $this->_childContent($element, self::NAME);
$this->id = $this->_childContent($element, self::URI);
if (empty($this->id)) {
$email = $this->_childContent($element, self::EMAIL);
if (!empty($email)) {
// XXX: acct: ?
$this->id = 'mailto:'.$email;
}
}
}
private function _fromAtomEntry($element)
{
$this->type = $this->_childContent($element, Activity::OBJECTTYPE,
Activity::SPEC);
if (empty($this->type)) {
$this->type = ActivityObject::NOTE;
}
$this->id = $this->_childContent($element, self::ID);
$this->title = $this->_childContent($element, self::TITLE);
$this->summary = $this->_childContent($element, self::SUMMARY);
$this->source = $this->_getSource($element);
$this->content = ActivityUtils::getContent($element);
$this->link = ActivityUtils::getPermalink($element);
}
// @fixme rationalize with Activity::_fromRssItem()
private function _fromRssItem($item)
{
$this->title = ActivityUtils::childContent($item, ActivityObject::TITLE, Activity::RSS);
$contentEl = ActivityUtils::child($item, ActivityUtils::CONTENT, Activity::CONTENTNS);
if (!empty($contentEl)) {
$this->content = htmlspecialchars_decode($contentEl->textContent, ENT_QUOTES);
} else {
$descriptionEl = ActivityUtils::child($item, Activity::DESCRIPTION, Activity::RSS);
if (!empty($descriptionEl)) {
$this->content = htmlspecialchars_decode($descriptionEl->textContent, ENT_QUOTES);
}
}
$this->link = ActivityUtils::childContent($item, ActivityUtils::LINK, Activity::RSS);
$guidEl = ActivityUtils::child($item, Activity::GUID, Activity::RSS);
if (!empty($guidEl)) {
$this->id = $guidEl->textContent;
if ($guidEl->hasAttribute('isPermaLink')) {
// overwrites <link>
$this->link = $this->id;
}
}
}
public static function fromRssAuthor($el)
{
$text = $el->textContent;
if (preg_match('/^(.*?) \((.*)\)$/', $text, $match)) {
$email = $match[1];
$name = $match[2];
} else if (preg_match('/^(.*?) <(.*)>$/', $text, $match)) {
$name = $match[1];
$email = $match[2];
} else if (preg_match('/.*@.*/', $text)) {
$email = $text;
$name = null;
} else {
$name = $text;
$email = null;
}
// Not really enough info
$obj = new ActivityObject();
$obj->element = $el;
$obj->type = ActivityObject::PERSON;
$obj->title = $name;
if (!empty($email)) {
$obj->id = 'mailto:'.$email;
}
return $obj;
}
public static function fromDcCreator($el)
{
// Not really enough info
$text = $el->textContent;
$obj = new ActivityObject();
$obj->element = $el;
$obj->title = $text;
$obj->type = ActivityObject::PERSON;
return $obj;
}
public static function fromRssChannel($el)
{
$obj = new ActivityObject();
$obj->element = $el;
$obj->type = ActivityObject::PERSON; // @fixme guess better
$obj->title = ActivityUtils::childContent($el, ActivityObject::TITLE, self::RSS);
$obj->link = ActivityUtils::childContent($el, ActivityUtils::LINK, self::RSS);
$obj->id = ActivityUtils::getLink($el, self::SELF);
$desc = ActivityUtils::childContent($el, self::DESCRIPTION, self::RSS);
if (!empty($desc)) {
$obj->content = htmlspecialchars_decode($desc, ENT_QUOTES);
}
$imageEl = ActivityUtils::child($el, self::IMAGE, self::RSS);
if (!empty($imageEl)) {
$obj->avatarLinks[] = ActivityUtils::childContent($imageEl, self::URL, self::RSS);
}
return $obj;
}
private function _childContent($element, $tag, $namespace=ActivityUtils::ATOM)
{
return ActivityUtils::childContent($element, $tag, $namespace);
@ -1054,6 +1175,21 @@ class Activity
const PUBLISHED = 'published';
const UPDATED = 'updated';
const RSS = null; // no namespace!
const PUBDATE = 'pubDate';
const DESCRIPTION = 'description';
const GUID = 'guid';
const SELF = 'self';
const IMAGE = 'image';
const URL = 'url';
const DC = 'http://purl.org/dc/elements/1.1/';
const CREATOR = 'creator';
const CONTENTNS = 'http://purl.org/rss/1.0/modules/content/';
public $actor; // an ActivityObject
public $verb; // a string (the URL)
public $object; // an ActivityObject
@ -1084,8 +1220,6 @@ class Activity
return;
}
$this->entry = $entry;
// Insist on a feed's root DOMElement; don't allow a DOMDocument
if ($feed instanceof DOMDocument) {
throw new ClientException(
@ -1093,8 +1227,22 @@ class Activity
);
}
$this->entry = $entry;
$this->feed = $feed;
if ($entry->namespaceURI == Activity::ATOM &&
$entry->localName == 'entry') {
$this->_fromAtomEntry($entry, $feed);
} else if ($entry->namespaceURI == Activity::RSS &&
$entry->localName == 'item') {
$this->_fromRssItem($entry, $feed);
} else {
throw new Exception("Unknown DOM element: {$entry->namespaceURI} {$entry->localName}");
}
}
function _fromAtomEntry($entry, $feed)
{
$pubEl = $this->_child($entry, self::PUBLISHED, self::ATOM);
if (!empty($pubEl)) {
@ -1180,6 +1328,69 @@ class Activity
}
}
function _fromRssItem($item, $rss)
{
$verbEl = $this->_child($item, self::VERB);
if (!empty($verbEl)) {
$this->verb = trim($verbEl->textContent);
} else {
$this->verb = ActivityVerb::POST;
// XXX: do other implied stuff here
}
$pubDateEl = $this->_child($item, self::PUBDATE, self::RSS);
if (!empty($pubDateEl)) {
$this->time = strtotime($pubDateEl->textContent);
}
$authorEl = $this->_child($item, self::AUTHOR, self::RSS);
if (!empty($authorEl)) {
$this->actor = ActivityObject::fromRssAuthor($authorEl);
} else {
$dcCreatorEl = $this->_child($item, self::CREATOR, self::DC);
if (!empty($dcCreatorEl)) {
$this->actor = ActivityObject::fromDcCreator($dcCreatorEl);
} else if (!empty($rss)) {
$this->actor = ActivityObject::fromRssChannel($rss);
}
}
$this->title = ActivityUtils::childContent($item, ActivityObject::TITLE, self::RSS);
$contentEl = ActivityUtils::child($item, ActivityUtils::CONTENT, self::CONTENTNS);
if (!empty($contentEl)) {
$this->content = htmlspecialchars_decode($contentEl->textContent, ENT_QUOTES);
} else {
$descriptionEl = ActivityUtils::child($item, self::DESCRIPTION, self::RSS);
if (!empty($descriptionEl)) {
$this->content = htmlspecialchars_decode($descriptionEl->textContent, ENT_QUOTES);
}
}
$this->link = ActivityUtils::childContent($item, ActivityUtils::LINK, self::RSS);
// @fixme enclosures
// @fixme thumbnails... maybe
$guidEl = ActivityUtils::child($item, self::GUID, self::RSS);
if (!empty($guidEl)) {
$this->id = $guidEl->textContent;
if ($guidEl->hasAttribute('isPermaLink') && $guidEl->getAttribute('isPermaLink') != 'false') {
// overwrites <link>
$this->link = $this->id;
}
}
$this->object = new ActivityObject($item);
$this->context = new ActivityContext($item);
}
/**
* Returns an Atom <entry> based on this activity
*

View File

@ -61,7 +61,7 @@ class FeedSub extends Memcached_DataObject
public $__table = 'feedsub';
public $id;
public $feeduri;
public $uri;
// PuSH subscription data
public $huburi;
@ -238,7 +238,7 @@ class FeedSub extends Memcached_DataObject
public function subscribe($mode='subscribe')
{
if ($this->sub_state && $this->sub_state != 'inactive') {
throw new ServerException("Attempting to start PuSH subscription to feed in state $this->sub_state");
common_log(LOG_WARNING, "Attempting to (re)start PuSH subscription to $this->uri in unexpected state $this->sub_state");
}
if (empty($this->huburi)) {
if (common_config('feedsub', 'nohub')) {
@ -261,7 +261,7 @@ class FeedSub extends Memcached_DataObject
*/
public function unsubscribe() {
if ($this->sub_state != 'active') {
throw new ServerException("Attempting to end PuSH subscription to feed in state $this->sub_state");
common_log(LOG_WARNING, "Attempting to (re)end PuSH subscription to $this->uri in unexpected state $this->sub_state");
}
if (empty($this->huburi)) {
if (common_config('feedsub', 'nohub')) {

View File

@ -204,12 +204,13 @@ class Ostatus_profile extends Memcached_DataObject
public function subscribe()
{
$feedsub = FeedSub::ensureFeed($this->feeduri);
if ($feedsub->sub_state == 'active' || $feedsub->sub_state == 'subscribe') {
if ($feedsub->sub_state == 'active') {
// Active subscription, we don't need to do anything.
return true;
} else if ($feedsub->sub_state == '' || $feedsub->sub_state == 'inactive') {
} else {
// Inactive or we got left in an inconsistent state.
// Run a subscription request to make sure we're current!
return $feedsub->subscribe();
} else if ('unsubscribe') {
throw new FeedSubException("Unsub is pending, can't subscribe...");
}
}
@ -222,15 +223,13 @@ class Ostatus_profile extends Memcached_DataObject
*/
public function unsubscribe() {
$feedsub = FeedSub::staticGet('uri', $this->feeduri);
if (!$feedsub) {
if (!$feedsub || $feedsub->sub_state == '' || $feedsub->sub_state == 'inactive') {
// No active PuSH subscription, we can just leave it be.
return true;
}
if ($feedsub->sub_state == 'active') {
} else {
// PuSH subscription is either active or in an indeterminate state.
// Send an unsubscribe.
return $feedsub->unsubscribe();
} else if ($feedsub->sub_state == '' || $feedsub->sub_state == 'inactive' || $feedsub->sub_state == 'unsubscribe') {
return true;
} else if ($feedsub->sub_state == 'subscribe') {
throw new FeedSubException("Feed is awaiting subscription, can't unsub...");
}
}
@ -847,8 +846,8 @@ class Ostatus_profile extends Memcached_DataObject
}
/**
*
* Download and update given avatar image
*
* @param string $url
* @throws Exception in various failure cases
*/
@ -858,6 +857,9 @@ class Ostatus_profile extends Memcached_DataObject
// We've already got this one.
return;
}
if (!common_valid_http_url($url)) {
throw new ServerException(_m("Invalid avatar URL %s"), $url);
}
if ($this->isGroup()) {
$self = $this->localGroup();

View File

@ -1,105 +0,0 @@
<?php
// hcard profile for hkit
$this->root_class = 'vcard';
$this->classes = array(
'fn', array('honorific-prefix', 'given-name', 'additional-name', 'family-name', 'honorific-suffix'),
'n', array('honorific-prefix', 'given-name', 'additional-name', 'family-name', 'honorific-suffix'),
'adr', array('post-office-box', 'extended-address', 'street-address', 'postal-code', 'country-name', 'type', 'region', 'locality'),
'label', 'bday', 'agent', 'nickname', 'photo', 'class',
'email', array('type', 'value'),
'category', 'key', 'logo', 'mailer', 'note',
'org', array('organization-name', 'organization-unit'),
'tel', array('type', 'value'),
'geo', array('latitude', 'longitude'),
'tz', 'uid', 'url', 'rev', 'role', 'sort-string', 'sound', 'title'
);
// classes that must only appear once per card
$this->singles = array(
'fn'
);
// classes that are required (not strictly enforced - give at least one!)
$this->required = array(
'fn'
);
$this->att_map = array(
'fn' => array('IMG|alt'),
'url' => array('A|href', 'IMG|src', 'AREA|href'),
'photo' => array('IMG|src'),
'bday' => array('ABBR|title'),
'logo' => array('IMG|src'),
'email' => array('A|href'),
'geo' => array('ABBR|title')
);
$this->callbacks = array(
'url' => array($this, 'resolvePath'),
'photo' => array($this, 'resolvePath'),
'logo' => array($this, 'resolvePath'),
'email' => array($this, 'resolveEmail')
);
function hKit_hcard_post($a)
{
foreach ($a as &$vcard){
hKit_implied_n_optimization($vcard);
hKit_implied_n_from_fn($vcard);
}
return $a;
}
function hKit_implied_n_optimization(&$vcard)
{
if (array_key_exists('fn', $vcard) && !is_array($vcard['fn']) &&
!array_key_exists('n', $vcard) && (!array_key_exists('org', $vcard) || $vcard['fn'] != $vcard['org'])){
if (sizeof(explode(' ', $vcard['fn'])) == 2){
$patterns = array();
$patterns[] = array('/^(\S+),\s*(\S{1})$/', 2, 1); // Lastname, Initial
$patterns[] = array('/^(\S+)\s*(\S{1})\.*$/', 2, 1); // Lastname Initial(.)
$patterns[] = array('/^(\S+),\s*(\S+)$/', 2, 1); // Lastname, Firstname
$patterns[] = array('/^(\S+)\s*(\S+)$/', 1, 2); // Firstname Lastname
foreach ($patterns as $pattern){
if (preg_match($pattern[0], $vcard['fn'], $matches) === 1){
$n = array();
$n['given-name'] = $matches[$pattern[1]];
$n['family-name'] = $matches[$pattern[2]];
$vcard['n'] = $n;
break;
}
}
}
}
}
function hKit_implied_n_from_fn(&$vcard)
{
if (array_key_exists('fn', $vcard) && is_array($vcard['fn'])
&& !array_key_exists('n', $vcard) && (!array_key_exists('org', $vcard) || $vcard['fn'] != $vcard['org'])){
$vcard['n'] = $vcard['fn'];
}
if (array_key_exists('fn', $vcard) && is_array($vcard['fn'])){
$vcard['fn'] = $vcard['fn']['text'];
}
}
?>

View File

@ -1,475 +0,0 @@
<?php
/*
hKit Library for PHP5 - a generic library for parsing Microformats
Copyright (C) 2006 Drew McLellan
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Author
Drew McLellan - http://allinthehead.com/
Contributors:
Scott Reynen - http://www.randomchaos.com/
Version 0.5, 22-Jul-2006
fixed by-ref issue cropping up in PHP 5.0.5
fixed a bug with a@title
added support for new fn=n optimisation
added support for new a.include include-pattern
Version 0.4, 23-Jun-2006
prevented nested includes from causing infinite loops
returns false if URL can't be fetched
added pre-flight check for base support level
added deduping of once-only classnames
prevented accumulation of multiple 'value' values
tuned whitespace handling and treatment of DEL elements
Version 0.3, 21-Jun-2006
added post-processor callback method into profiles
fixed minor problems raised by hcard testsuite
added support for include-pattern
added support for td@headers pattern
added implied-n optimization into default hcard profile
Version 0.2, 20-Jun-2006
added class callback mechanism
added resolvePath & resolveEmail
added basic BASE support
Version 0.1.1, 19-Jun-2006 (different timezone, no time machine)
added external Tidy option
Version 0.1, 20-Jun-2006
initial release
*/
class hKit
{
public $tidy_mode = 'proxy'; // 'proxy', 'exec', 'php' or 'none'
public $tidy_proxy = 'http://cgi.w3.org/cgi-bin/tidy?forceXML=on&docAddr='; // required only for tidy_mode=proxy
public $tmp_dir = '/path/to/writable/dir/'; // required only for tidy_mode=exec
private $root_class = '';
private $classes = '';
private $singles = '';
private $required = '';
private $att_map = '';
private $callbacks = '';
private $processor = '';
private $url = '';
private $base = '';
private $doc = '';
public function hKit()
{
// pre-flight checks
$pass = true;
$required = array('dom_import_simplexml', 'file_get_contents', 'simplexml_load_string');
$missing = array();
foreach ($required as $f){
if (!function_exists($f)){
$pass = false;
$missing[] = $f . '()';
}
}
if (!$pass)
die('hKit error: these required functions are not available: <strong>' . implode(', ', $missing) . '</strong>');
}
public function getByURL($profile='', $url='')
{
if ($profile=='' || $url == '') return false;
$this->loadProfile($profile);
$source = $this->loadURL($url);
if ($source){
$tidy_xhtml = $this->tidyThis($source);
$fragment = false;
if (strrchr($url, '#'))
$fragment = array_pop(explode('#', $url));
$doc = $this->loadDoc($tidy_xhtml, $fragment);
$s = $this->processNodes($doc, $this->classes);
$s = $this->postProcess($profile, $s);
return $s;
}else{
return false;
}
}
public function getByString($profile='', $input_xml='')
{
if ($profile=='' || $input_xml == '') return false;
$this->loadProfile($profile);
$doc = $this->loadDoc($input_xml);
$s = $this->processNodes($doc, $this->classes);
$s = $this->postProcess($profile, $s);
return $s;
}
private function processNodes($items, $classes, $allow_includes=true){
$out = array();
foreach($items as $item){
$data = array();
for ($i=0; $i<sizeof($classes); $i++){
if (!is_array($classes[$i])){
$xpath = ".//*[contains(concat(' ',normalize-space(@class),' '),' " . $classes[$i] . " ')]";
$results = $item->xpath($xpath);
if ($results){
foreach ($results as $result){
if (isset($classes[$i+1]) && is_array($classes[$i+1])){
$nodes = $this->processNodes($results, $classes[$i+1]);
if (sizeof($nodes) > 0){
$nodes = array_merge(array('text'=>$this->getNodeValue($result, $classes[$i])), $nodes);
$data[$classes[$i]] = $nodes;
}else{
$data[$classes[$i]] = $this->getNodeValue($result, $classes[$i]);
}
}else{
if (isset($data[$classes[$i]])){
if (is_array($data[$classes[$i]])){
// is already an array - append
$data[$classes[$i]][] = $this->getNodeValue($result, $classes[$i]);
}else{
// make it an array
if ($classes[$i] == 'value'){ // unless it's the 'value' of a type/value pattern
$data[$classes[$i]] .= $this->getNodeValue($result, $classes[$i]);
}else{
$old_val = $data[$classes[$i]];
$data[$classes[$i]] = array($old_val, $this->getNodeValue($result, $classes[$i]));
$old_val = false;
}
}
}else{
// set as normal value
$data[$classes[$i]] = $this->getNodeValue($result, $classes[$i]);
}
}
// td@headers pattern
if (strtoupper(dom_import_simplexml($result)->tagName)== "TD" && $result['headers']){
$include_ids = explode(' ', $result['headers']);
$doc = $this->doc;
foreach ($include_ids as $id){
$xpath = "//*[@id='$id']/..";
$includes = $doc->xpath($xpath);
foreach ($includes as $include){
$tmp = $this->processNodes($include, $this->classes);
if (is_array($tmp)) $data = array_merge($data, $tmp);
}
}
}
}
}
}
$result = false;
}
// include-pattern
if ($allow_includes){
$xpath = ".//*[contains(concat(' ',normalize-space(@class),' '),' include ')]";
$results = $item->xpath($xpath);
if ($results){
foreach ($results as $result){
$tagName = strtoupper(dom_import_simplexml($result)->tagName);
if ((($tagName == "OBJECT" && $result['data']) || ($tagName == "A" && $result['href']))
&& preg_match('/\binclude\b/', $result['class'])){
$att = ($tagName == "OBJECT" ? 'data' : 'href');
$id = str_replace('#', '', $result[$att]);
$doc = $this->doc;
$xpath = "//*[@id='$id']";
$includes = $doc->xpath($xpath);
foreach ($includes as $include){
$include = simplexml_load_string('<root1><root2>'.$include->asXML().'</root2></root1>'); // don't ask.
$tmp = $this->processNodes($include, $this->classes, false);
if (is_array($tmp)) $data = array_merge($data, $tmp);
}
}
}
}
}
$out[] = $data;
}
if (sizeof($out) > 1){
return $out;
}else if (isset($data)){
return $data;
}else{
return array();
}
}
private function getNodeValue($node, $className)
{
$tag_name = strtoupper(dom_import_simplexml($node)->tagName);
$s = false;
// ignore DEL tags
if ($tag_name == 'DEL') return $s;
// look up att map values
if (array_key_exists($className, $this->att_map)){
foreach ($this->att_map[$className] as $map){
if (preg_match("/$tag_name\|/", $map)){
$s = ''.$node[array_pop($foo = explode('|', $map))];
}
}
}
// if nothing and OBJ, try data.
if (!$s && $tag_name=='OBJECT' && $node['data']) $s = ''.$node['data'];
// if nothing and IMG, try alt.
if (!$s && $tag_name=='IMG' && $node['alt']) $s = ''.$node['alt'];
// if nothing and AREA, try alt.
if (!$s && $tag_name=='AREA' && $node['alt']) $s = ''.$node['alt'];
//if nothing and not A, try title.
if (!$s && $tag_name!='A' && $node['title']) $s = ''.$node['title'];
// if nothing found, go with node text
$s = ($s ? $s : implode(array_filter($node->xpath('child::node()'), array(&$this, "filterBlankValues")), ' '));
// callbacks
if (array_key_exists($className, $this->callbacks)){
$s = preg_replace_callback('/.*/', $this->callbacks[$className], $s, 1);
}
// trim and remove line breaks
if ($tag_name != 'PRE'){
$s = trim(preg_replace('/[\r\n\t]+/', '', $s));
$s = trim(preg_replace('/(\s{2})+/', ' ', $s));
}
return $s;
}
private function filterBlankValues($s){
return preg_match("/\w+/", $s);
}
private function tidyThis($source)
{
switch ( $this->tidy_mode )
{
case 'exec':
$tmp_file = $this->tmp_dir.md5($source).'.txt';
file_put_contents($tmp_file, $source);
exec("tidy -utf8 -indent -asxhtml -numeric -bare -quiet $tmp_file", $tidy);
unlink($tmp_file);
return implode("\n", $tidy);
break;
case 'php':
$tidy = tidy_parse_string($source);
return tidy_clean_repair($tidy);
break;
default:
return $source;
break;
}
}
private function loadProfile($profile)
{
require_once("$profile.profile.php");
}
private function loadDoc($input_xml, $fragment=false)
{
$xml = simplexml_load_string($input_xml);
$this->doc = $xml;
if ($fragment){
$doc = $xml->xpath("//*[@id='$fragment']");
$xml = simplexml_load_string($doc[0]->asXML());
$doc = null;
}
// base tag
if ($xml->head->base['href']) $this->base = $xml->head->base['href'];
// xml:base attribute - PITA with SimpleXML
preg_match('/xml:base="(.*)"/', $xml->asXML(), $matches);
if (is_array($matches) && sizeof($matches)>1) $this->base = $matches[1];
return $xml->xpath("//*[contains(concat(' ',normalize-space(@class),' '),' $this->root_class ')]");
}
private function loadURL($url)
{
$this->url = $url;
if ($this->tidy_mode == 'proxy' && $this->tidy_proxy != ''){
$url = $this->tidy_proxy . $url;
}
return @file_get_contents($url);
}
private function postProcess($profile, $s)
{
$required = $this->required;
if (is_array($s) && array_key_exists($required[0], $s)){
$s = array($s);
}
$s = $this->dedupeSingles($s);
if (function_exists('hKit_'.$profile.'_post')){
$s = call_user_func('hKit_'.$profile.'_post', $s);
}
$s = $this->removeTextVals($s);
return $s;
}
private function resolvePath($filepath)
{ // ugly code ahoy: needs a serious tidy up
$filepath = $filepath[0];
$base = $this->base;
$url = $this->url;
if ($base != '' && strpos($base, '://') !== false)
$url = $base;
$r = parse_url($url);
$domain = $r['scheme'] . '://' . $r['host'];
if (!isset($r['path'])) $r['path'] = '/';
$path = explode('/', $r['path']);
$file = explode('/', $filepath);
$new = array('');
if (strpos($filepath, '://') !== false || strpos($filepath, 'data:') !== false){
return $filepath;
}
if ($file[0] == ''){
// absolute path
return ''.$domain . implode('/', $file);
}else{
// relative path
if ($path[sizeof($path)-1] == '') array_pop($path);
if (strpos($path[sizeof($path)-1], '.') !== false) array_pop($path);
foreach ($file as $segment){
if ($segment == '..'){
array_pop($path);
}else{
$new[] = $segment;
}
}
return ''.$domain . implode('/', $path) . implode('/', $new);
}
}
private function resolveEmail($v)
{
$parts = parse_url($v[0]);
return ($parts['path']);
}
private function dedupeSingles($s)
{
$singles = $this->singles;
foreach ($s as &$item){
foreach ($singles as $classname){
if (array_key_exists($classname, $item) && is_array($item[$classname])){
if (isset($item[$classname][0])) $item[$classname] = $item[$classname][0];
}
}
}
return $s;
}
private function removeTextVals($s)
{
foreach ($s as $key => &$val){
if ($key){
$k = $key;
}else{
$k = '';
}
if (is_array($val)){
$val = $this->removeTextVals($val);
}else{
if ($k == 'text'){
$val = '';
}
}
}
return array_filter($s);
}
}
?>

View File

@ -63,54 +63,16 @@ class DiscoveryHints {
static function hcardHints($body, $url)
{
common_debug("starting tidy");
$hcard = self::_hcard($body, $url);
$body = self::_tidy($body, $url);
common_debug("done with tidy");
set_include_path(get_include_path() . PATH_SEPARATOR . INSTALLDIR . '/plugins/OStatus/extlib/hkit/');
require_once('hkit.class.php');
// hKit code is not clean for notices and warnings
$old = error_reporting();
error_reporting($old & ~E_NOTICE & ~E_WARNING);
$h = new hKit;
$hcards = $h->getByString('hcard', $body);
error_reporting($old);
if (empty($hcards)) {
if (empty($hcard)) {
return array();
}
if (count($hcards) == 1) {
$hcard = $hcards[0];
} else {
foreach ($hcards as $try) {
if (array_key_exists('url', $try)) {
if (is_string($try['url']) && $try['url'] == $url) {
$hcard = $try;
break;
} else if (is_array($try['url'])) {
foreach ($try['url'] as $tryurl) {
if ($tryurl == $url) {
$hcard = $try;
break 2;
}
}
}
}
}
// last chance; grab the first one
if (empty($hcard)) {
$hcard = $hcards[0];
}
}
$hints = array();
// XXX: don't copy stuff into an array and then copy it again
if (array_key_exists('nickname', $hcard)) {
$hints['nickname'] = $hcard['nickname'];
}
@ -122,7 +84,7 @@ class DiscoveryHints {
}
if (array_key_exists('photo', $hcard)) {
$hints['avatar'] = $hcard['photo'];
$hints['avatar'] = $hcard['photo'][0];
}
if (array_key_exists('note', $hcard)) {
@ -149,80 +111,142 @@ class DiscoveryHints {
return $hints;
}
/**
* hKit needs well-formed XML for its parsing.
* We'll take the HTML body here and normalize it to XML.
*
* @param string $body HTML document source, possibly not-well-formed
* @param string $url source URL
* @return string well-formed XML document source
* @throws Exception if HTML parsing failed.
*/
private static function _tidy($body, $url)
static function _hcard($body, $url)
{
if (empty($body)) {
throw new Exception("Empty HTML could not be parsed.");
}
$dom = new DOMDocument();
// DOMDocument::loadHTML may throw warnings on unrecognized elements.
// Some HTML errors will trigger warnings, but still work.
$old = error_reporting();
error_reporting($old & ~E_WARNING);
$old = error_reporting(error_reporting() & ~E_WARNING);
$ok = $dom->loadHTML($body);
$doc = new DOMDocument();
$doc->loadHTML($body);
error_reporting($old);
if ($ok) {
// If the original had xmlns or xml:lang attributes on the
// <html>, we seen to end up with duplicates, which causes
// parse errors. Remove em!
//
// For some reason we have to iterate and remove them twice,
// *plus* they don't show up on hasAttribute() or removeAttribute().
// This might be some weird bug in PHP or libxml2, uncertain if
// it affects other folks consistently.
$root = $dom->documentElement;
foreach ($root->attributes as $i => $x) {
if ($i == 'xmlns' || $i == 'xml:lang') {
$root->removeAttributeNode($x);
$xp = new DOMXPath($doc);
$hcardNodes = self::_getChildrenByClass($doc->documentElement, 'vcard', $xp);
$hcards = array();
for ($i = 0; $i < $hcardNodes->length; $i++) {
$hcardNode = $hcardNodes->item($i);
$hcard = self::_hcardFromNode($hcardNode, $xp, $url);
$hcards[] = $hcard;
}
}
foreach ($root->attributes as $i => $x) {
if ($i == 'xmlns' || $i == 'xml:lang') {
$root->removeAttributeNode($x);
$repr = null;
foreach ($hcards as $hcard) {
if (in_array($url, $hcard['url'])) {
$repr = $hcard;
break;
}
}
// hKit doesn't give us a chance to pass the source URL for
// resolving relative links, such as the avatar photo on a
// Google profile. We'll slip it into a <base> tag if there's
// not already one present.
$bases = $dom->getElementsByTagName('base');
if ($bases && $bases->length >= 1) {
$base = $bases->item(0);
if ($base->hasAttribute('href')) {
$base->setAttribute('href', $url);
}
if (!is_null($repr)) {
return $repr;
} else if (count($hcards) > 0) {
return $hcards[0];
} else {
$base = $dom->createElement('base');
$base->setAttribute('href', $url);
$heads = $dom->getElementsByTagName('head');
if ($heads || $heads->length) {
$head = $heads->item(0);
return null;
}
}
function _getChildrenByClass($el, $cls, $xp)
{
// borrowed from hkit. Thanks dudes!
$qry = ".//*[contains(concat(' ',normalize-space(@class),' '),' $cls ')]";
$nodes = $xp->query($qry, $el);
return $nodes;
}
function _hcardFromNode($hcardNode, $xp, $base)
{
$hcard = array();
$hcard['url'] = array();
$urlNodes = self::_getChildrenByClass($hcardNode, 'url', $xp);
for ($j = 0; $j < $urlNodes->length; $j++) {
$urlNode = $urlNodes->item($j);
if ($urlNode->hasAttribute('href')) {
$url = $urlNode->getAttribute('href');
} else {
$head = $dom->createElement('head');
if ($root->firstChild) {
$root->insertBefore($head, $root->firstChild);
$url = $urlNode->textContent;
}
$hcard['url'][] = self::_rel2abs($url, $base);
}
$hcard['photo'] = array();
$photoNodes = self::_getChildrenByClass($hcardNode, 'photo', $xp);
for ($j = 0; $j < $photoNodes->length; $j++) {
$photoNode = $photoNodes->item($j);
if ($photoNode->hasAttribute('src')) {
$url = $photoNode->getAttribute('src');
} else if ($photoNode->hasAttribute('href')) {
$url = $photoNode->getAttribute('href');
} else {
$root->appendChild($head);
$url = $photoNode->textContent;
}
$hcard['photo'][] = self::_rel2abs($url, $base);
}
$singles = array('nickname', 'note', 'fn', 'n', 'adr');
foreach ($singles as $single) {
$nodes = self::_getChildrenByClass($hcardNode, $single, $xp);
if ($nodes->length > 0) {
$node = $nodes->item(0);
$hcard[$single] = $node->textContent;
}
}
$head->appendChild($base);
}
return $dom->saveXML();
} else {
throw new Exception("Invalid HTML could not be parsed.");
return $hcard;
}
// XXX: this is a first pass; we probably need
// to handle things like ../ and ./ and so on
static function _rel2abs($rel, $wrt)
{
$parts = parse_url($rel);
if ($parts === false) {
return false;
}
// If it's got a scheme, use it
if ($parts['scheme'] != '') {
return $rel;
}
$w = parse_url($wrt);
$base = $w['scheme'].'://'.$w['host'];
if ($rel[0] == '/') {
return $base.$rel;
}
$wp = explode('/', $w['path']);
array_pop($wp);
return $base.implode('/', $wp).'/'.$rel;
}
}

View File

@ -73,6 +73,7 @@ class FeedDiscovery
public $uri;
public $type;
public $feed;
public $root;
/** Post-initialize query helper... */
public function getLink($rel, $type=null)
@ -83,7 +84,7 @@ class FeedDiscovery
public function getAtomLink($rel, $type=null)
{
return ActivityUtils::getLink($this->feed->documentElement, $rel, $type);
return ActivityUtils::getLink($this->root, $rel, $type);
}
/**
@ -154,9 +155,27 @@ class FeedDiscovery
$this->uri = $sourceurl;
$this->type = $type;
$this->feed = $feed;
$el = $this->feed->documentElement;
// Looking for the "root" element: RSS channel or Atom feed
if ($el->tagName == 'rss') {
$channels = $el->getElementsByTagName('channel');
if ($channels->length > 0) {
$this->root = $channels->item(0);
} else {
throw new FeedSubBadXmlException($sourceurl);
}
} else if ($el->tagName == 'feed') {
$this->root = $el;
} else {
throw new FeedSubBadXmlException($sourceurl);
}
return $this->uri;
} else {
throw new FeedSubBadXmlException($url);
throw new FeedSubBadXmlException($sourceurl);
}
}

View File

@ -50,7 +50,7 @@ $encGroup = str_replace($marker, '%', $encGroup);
$sql = "SELECT * FROM ostatus_profile WHERE uri LIKE '%s' OR uri LIKE '%s'";
$oprofile->query(sprintf($sql, $encProfile, $encGroup));
echo "Found $oprofile->N bogus ostatus_profile entries:\n";
echo "Found $oprofile->N bogus ostatus_profile entries for local users and groups:\n";
while ($oprofile->fetch()) {
echo "$oprofile->uri";
@ -58,7 +58,7 @@ while ($oprofile->fetch()) {
if ($dry) {
echo " (unchanged)\n";
} else {
echo " deleting...";
echo " removing bogus ostatus_profile entry...";
$evil = clone($oprofile);
$evil->delete();
echo " ok\n";

View File

@ -59,8 +59,8 @@ unless you configure it with a consumer key and secret.)
secret. The Twitter bridge will fall back on the global key pair if
it can't find a local pair, e.g.:
$config['twitter']['global_consumer_key'] = 'YOUR_CONSUMER_KEY'
$config['twitter']['global_consumer_secret'] = 'YOUR_CONSUMER_SECRET'
$config['twitter']['global_consumer_key'] = 'YOUR_CONSUMER_KEY';
$config['twitter']['global_consumer_secret'] = 'YOUR_CONSUMER_SECRET';
Administration panel
--------------------

View File

@ -138,9 +138,38 @@ class ActivityParseTests extends PHPUnit_Framework_TestCase
$this->assertEquals($poco->urls[0]->value, 'http://example.com/blog.html');
$this->assertEquals($poco->urls[0]->primary, 'true');
$this->assertEquals($act->actor->geopoint, '37.7749295 -122.4194155');
}
public function testExample6()
{
global $_example6;
$dom = DOMDocument::loadXML($_example6);
$rss = $dom->documentElement;
$channels = $dom->getElementsByTagName('channel');
$channel = $channels->item(0);
$items = $channel->getElementsByTagName('item');
$item = $items->item(0);
$act = new Activity($item, $channel);
$this->assertEquals($act->verb, ActivityVerb::POST);
$this->assertEquals($act->id, 'http://en.blog.wordpress.com/?p=3857');
$this->assertEquals($act->link, 'http://en.blog.wordpress.com/2010/03/03/rub-a-dub-dub-in-the-pubsubhubbub/');
$this->assertEquals($act->title, 'Rub-a-Dub-Dub in the PubSubHubbub');
$this->assertEquals($act->time, 1267634892);
$actor = $act->actor;
$this->assertFalse(empty($actor));
$this->assertEquals($actor->title, "Joseph Scott");
}
}
$_example1 = <<<EXAMPLE1
@ -330,3 +359,67 @@ $_example5 = <<<EXAMPLE5
</entry>
</feed>
EXAMPLE5;
$_example6 = <<<EXAMPLE6
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:atom="http://www.w3.org/2005/Atom"
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
>
<channel>
<title>WordPress.com News</title>
<atom:link href="http://en.blog.wordpress.com/feed/" rel="self" type="application/rss+xml" />
<link>http://en.blog.wordpress.com</link>
<description>The latest news on WordPress.com and the WordPress community.</description>
<lastBuildDate>Thu, 18 Mar 2010 23:25:35 +0000</lastBuildDate>
<generator>http://wordpress.com/</generator>
<language>en</language>
<sy:updatePeriod>hourly</sy:updatePeriod>
<sy:updateFrequency>1</sy:updateFrequency>
<cloud domain='en.blog.wordpress.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
<url>http://www.gravatar.com/blavatar/e6392390e3bcfadff3671c5a5653d95b?s=96&#038;d=http://s2.wp.com/i/buttonw-com.png</url>
<title>WordPress.com News</title>
<link>http://en.blog.wordpress.com</link>
</image>
<atom:link rel="search" type="application/opensearchdescription+xml" href="http://en.blog.wordpress.com/osd.xml" title="WordPress.com News" />
<atom:link rel='hub' href='http://en.blog.wordpress.com/?pushpress=hub'/>
<item>
<title>Rub-a-Dub-Dub in the PubSubHubbub</title>
<link>http://en.blog.wordpress.com/2010/03/03/rub-a-dub-dub-in-the-pubsubhubbub/</link>
<comments>http://en.blog.wordpress.com/2010/03/03/rub-a-dub-dub-in-the-pubsubhubbub/#comments</comments>
<pubDate>Wed, 03 Mar 2010 16:48:12 +0000</pubDate>
<dc:creator>Joseph Scott</dc:creator>
<category><![CDATA[Feeds]]></category>
<category><![CDATA[atom]]></category>
<category><![CDATA[pubsubhubbub]]></category>
<category><![CDATA[rss]]></category>
<guid isPermaLink="false">http://en.blog.wordpress.com/?p=3857</guid>
<description><![CDATA[From the tongue twisting name department we welcome PubSubHubbub, or as some people have shortened it to: PuSH. Like rssCloud, PuSH is a way for services that subscribe to updates from your blog (think Google Reader, Bloglines or Netvibes) to get updates even faster. In a nutshell, instead of having to periodically ask [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=en.blog.wordpress.com&blog=3584907&post=3857&subd=en.blog&ref=&feed=1" />]]></description>
<content:encoded><![CDATA[<p>From the tongue twisting name department we welcome <a href="http://code.google.com/p/pubsubhubbub/">PubSubHubbub</a>, or as some people have shortened it to: PuSH. Like <a href="http://en.blog.wordpress.com/2009/09/07/rss-in-the-clouds/">rssCloud</a>, PuSH is a way for services that subscribe to updates from your blog (think Google Reader, Bloglines or Netvibes) to get updates even faster. In a nutshell, instead of having to periodically ask your blog if there are any updates they can now register to automatically receive updates each time you publish new content. In most cases these updates are sent out within a second or two of when you hit the publish button.</p>
<p>Today we&#8217;ve turned on PuSH support for the more than 10.5 million blogs on WordPress.com. There&#8217;s nothing to configure, it&#8217;s working right now behind the scenes to help others keep up to date with your posts.</p>
<p>For those using the WordPress.org software we are releasing a new PuSH plugin: <a href="http://wordpress.org/extend/plugins/pushpress/">PuSHPress</a>. This plugin differs from the current PuSH related plugins by including a built-in hub.</p>
<p>For more PuSH related reading check out the <a href="http://code.google.com/p/pubsubhubbub/">PubSubHubbub project site</a> and <a href="http://groups.google.com/group/pubsubhubbub?pli=1">Google Group</a>. And if you really want to geek out there&#8217;s always the <a href="http://pubsubhubbub.googlecode.com/svn/trunk/pubsubhubbub-core-0.3.html">PubSubHubbub Spec</a> <img src='http://s.wordpress.com/wp-includes/images/smilies/icon_smile.gif' alt=':-)' class='wp-smiley' /> </p>
<br /> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/en.blog.wordpress.com/3857/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/en.blog.wordpress.com/3857/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/en.blog.wordpress.com/3857/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/en.blog.wordpress.com/3857/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/en.blog.wordpress.com/3857/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/en.blog.wordpress.com/3857/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/en.blog.wordpress.com/3857/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/en.blog.wordpress.com/3857/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/en.blog.wordpress.com/3857/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/en.blog.wordpress.com/3857/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=en.blog.wordpress.com&blog=3584907&post=3857&subd=en.blog&ref=&feed=1" />]]></content:encoded>
<wfw:commentRss>http://en.blog.wordpress.com/2010/03/03/rub-a-dub-dub-in-the-pubsubhubbub/feed/</wfw:commentRss>
<slash:comments>96</slash:comments>
<media:content url="http://1.gravatar.com/avatar/582b66ad5ae1b69c7601a990cb9a661a?s=96&#38;d=identicon" medium="image">
<media:title type="html">josephscott</media:title>
</media:content>
</item>
</channel>
</rss>
EXAMPLE6;