| 
									
										
										
										
											2010-03-16 11:25:18 -05:00
										 |  |  | <?php | 
					
						
							|  |  |  | /* | 
					
						
							|  |  |  |  * StatusNet - the distributed open-source microblogging tool | 
					
						
							|  |  |  |  * Copyright (C) 2010, StatusNet, Inc. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Some utilities for generating hint data | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This program is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  |  * it under the terms of the GNU Affero General Public License as published by | 
					
						
							|  |  |  |  * the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  |  * (at your option) any later version. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This program is distributed in the hope that it will be useful, | 
					
						
							|  |  |  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  |  * GNU Affero General Public License for more details. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * You should have received a copy of the GNU Affero General Public License | 
					
						
							|  |  |  |  * along with this program.  If not, see <http://www.gnu.org/licenses/>. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class DiscoveryHints { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     static function fromXRD($xrd) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $hints = array(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         foreach ($xrd->links as $link) { | 
					
						
							|  |  |  |             switch ($link['rel']) { | 
					
						
							|  |  |  |             case Discovery::PROFILEPAGE: | 
					
						
							|  |  |  |                 $hints['profileurl'] = $link['href']; | 
					
						
							|  |  |  |                 break; | 
					
						
							|  |  |  |             case Salmon::NS_REPLIES: | 
					
						
							|  |  |  |                 $hints['salmon'] = $link['href']; | 
					
						
							|  |  |  |                 break; | 
					
						
							|  |  |  |             case Discovery::UPDATESFROM: | 
					
						
							|  |  |  |                 $hints['feedurl'] = $link['href']; | 
					
						
							|  |  |  |                 break; | 
					
						
							|  |  |  |             case Discovery::HCARD: | 
					
						
							|  |  |  |                 $hints['hcardurl'] = $link['href']; | 
					
						
							|  |  |  |                 break; | 
					
						
							|  |  |  |             default: | 
					
						
							|  |  |  |                 break; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $hints; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     static function fromHcardUrl($url) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $client = new HTTPClient(); | 
					
						
							|  |  |  |         $client->setHeader('Accept', 'text/html,application/xhtml+xml'); | 
					
						
							|  |  |  |         $response = $client->get($url); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (!$response->isOk()) { | 
					
						
							|  |  |  |             return null; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return self::hcardHints($response->getBody(), | 
					
						
							|  |  |  |                                 $response->getUrl()); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     static function hcardHints($body, $url) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         common_debug("starting tidy"); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-03-18 17:08:19 -07:00
										 |  |  |         $body = self::_tidy($body, $url); | 
					
						
							| 
									
										
										
										
											2010-03-16 11:25:18 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  |         common_debug("done with tidy"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         set_include_path(get_include_path() . PATH_SEPARATOR . INSTALLDIR . '/plugins/OStatus/extlib/hkit/'); | 
					
						
							|  |  |  |         require_once('hkit.class.php'); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-03-18 17:08:19 -07:00
										 |  |  |         // hKit code is not clean for notices and warnings
 | 
					
						
							|  |  |  |         $old = error_reporting(); | 
					
						
							|  |  |  |         error_reporting($old & ~E_NOTICE & ~E_WARNING); | 
					
						
							| 
									
										
										
										
											2010-03-16 11:25:18 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-03-18 17:08:19 -07:00
										 |  |  |         $h	= new hKit; | 
					
						
							| 
									
										
										
										
											2010-03-16 11:25:18 -05:00
										 |  |  |         $hcards = $h->getByString('hcard', $body); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-03-18 17:08:19 -07:00
										 |  |  |         error_reporting($old); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-03-16 11:25:18 -05:00
										 |  |  |         if (empty($hcards)) { | 
					
						
							|  |  |  |             return array(); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (count($hcards) == 1) { | 
					
						
							|  |  |  |             $hcard = $hcards[0]; | 
					
						
							|  |  |  |         } else { | 
					
						
							|  |  |  |             foreach ($hcards as $try) { | 
					
						
							|  |  |  |                 if (array_key_exists('url', $try)) { | 
					
						
							|  |  |  |                     if (is_string($try['url']) && $try['url'] == $url) { | 
					
						
							|  |  |  |                         $hcard = $try; | 
					
						
							|  |  |  |                         break; | 
					
						
							|  |  |  |                     } else if (is_array($try['url'])) { | 
					
						
							|  |  |  |                         foreach ($try['url'] as $tryurl) { | 
					
						
							|  |  |  |                             if ($tryurl == $url) { | 
					
						
							|  |  |  |                                 $hcard = $try; | 
					
						
							|  |  |  |                                 break 2; | 
					
						
							|  |  |  |                             } | 
					
						
							|  |  |  |                         } | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             // last chance; grab the first one
 | 
					
						
							|  |  |  |             if (empty($hcard)) { | 
					
						
							|  |  |  |                 $hcard = $hcards[0]; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $hints = array(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (array_key_exists('nickname', $hcard)) { | 
					
						
							|  |  |  |             $hints['nickname'] = $hcard['nickname']; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (array_key_exists('fn', $hcard)) { | 
					
						
							|  |  |  |             $hints['fullname'] = $hcard['fn']; | 
					
						
							|  |  |  |         } else if (array_key_exists('n', $hcard)) { | 
					
						
							|  |  |  |             $hints['fullname'] = implode(' ', $hcard['n']); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (array_key_exists('photo', $hcard)) { | 
					
						
							|  |  |  |             $hints['avatar'] = $hcard['photo']; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (array_key_exists('note', $hcard)) { | 
					
						
							|  |  |  |             $hints['bio'] = $hcard['note']; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (array_key_exists('adr', $hcard)) { | 
					
						
							|  |  |  |             if (is_string($hcard['adr'])) { | 
					
						
							|  |  |  |                 $hints['location'] = $hcard['adr']; | 
					
						
							|  |  |  |             } else if (is_array($hcard['adr'])) { | 
					
						
							|  |  |  |                 $hints['location'] = implode(' ', $hcard['adr']); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (array_key_exists('url', $hcard)) { | 
					
						
							|  |  |  |             if (is_string($hcard['url'])) { | 
					
						
							|  |  |  |                 $hints['homepage'] = $hcard['url']; | 
					
						
							|  |  |  |             } else if (is_array($hcard['url'])) { | 
					
						
							|  |  |  |                 // HACK get the last one; that's how our hcards look
 | 
					
						
							|  |  |  |                 $hints['homepage'] = $hcard['url'][count($hcard['url'])-1]; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $hints; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-03-18 17:08:19 -07:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * hKit needs well-formed XML for its parsing. | 
					
						
							|  |  |  |      * We'll take the HTML body here and normalize it to XML. | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * @param string $body HTML document source, possibly not-well-formed | 
					
						
							|  |  |  |      * @param string $url source URL | 
					
						
							|  |  |  |      * @return string well-formed XML document source | 
					
						
							|  |  |  |      * @throws Exception if HTML parsing failed. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     private static function _tidy($body, $url) | 
					
						
							| 
									
										
										
										
											2010-03-16 11:25:18 -05:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2010-03-18 17:08:19 -07:00
										 |  |  |         if (empty($body)) { | 
					
						
							|  |  |  |             throw new Exception("Empty HTML could not be parsed."); | 
					
						
							| 
									
										
										
										
											2010-03-16 11:25:18 -05:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2010-03-18 17:08:19 -07:00
										 |  |  |         $dom = new DOMDocument(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Some HTML errors will trigger warnings, but still work.
 | 
					
						
							|  |  |  |         $old = error_reporting(); | 
					
						
							|  |  |  |         error_reporting($old & ~E_WARNING); | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         $ok = $dom->loadHTML($body); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         error_reporting($old); | 
					
						
							|  |  |  |          | 
					
						
							|  |  |  |         if ($ok) { | 
					
						
							|  |  |  |             // hKit doesn't give us a chance to pass the source URL for
 | 
					
						
							|  |  |  |             // resolving relative links, such as the avatar photo on a
 | 
					
						
							|  |  |  |             // Google profile. We'll slip it into a <base> tag if there's
 | 
					
						
							|  |  |  |             // not already one present.
 | 
					
						
							|  |  |  |             $bases = $dom->getElementsByTagName('base'); | 
					
						
							|  |  |  |             if ($bases && $bases->length >= 1) { | 
					
						
							|  |  |  |                 $base = $bases->item(0); | 
					
						
							|  |  |  |                 if ($base->hasAttribute('href')) { | 
					
						
							|  |  |  |                     $base->setAttribute('href', $url); | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } else { | 
					
						
							|  |  |  |                 $base = $dom->createElement('base'); | 
					
						
							|  |  |  |                 $base->setAttribute('href', $url); | 
					
						
							|  |  |  |                 $heads = $dom->getElementsByTagName('head'); | 
					
						
							|  |  |  |                 if ($heads || $heads->length) { | 
					
						
							|  |  |  |                     $head = $heads->item(0); | 
					
						
							|  |  |  |                 } else { | 
					
						
							|  |  |  |                     $head = $dom->createElement('head'); | 
					
						
							|  |  |  |                     $root = $dom->documentRoot; | 
					
						
							|  |  |  |                     if ($root->firstChild) { | 
					
						
							|  |  |  |                         $root->insertBefore($head, $root->firstChild); | 
					
						
							|  |  |  |                     } else { | 
					
						
							|  |  |  |                         $root->appendChild($head); | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 $head->appendChild($base); | 
					
						
							| 
									
										
										
										
											2010-03-16 11:25:18 -05:00
										 |  |  |             } | 
					
						
							| 
									
										
										
										
											2010-03-18 17:08:19 -07:00
										 |  |  |             return $dom->saveXML(); | 
					
						
							|  |  |  |         } else { | 
					
						
							|  |  |  |             throw new Exception("Invalid HTML could not be parsed."); | 
					
						
							| 
									
										
										
										
											2010-03-16 11:25:18 -05:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } |