element * * Discovers XRD file for a user by fetching the URL and reading any * elements in the HTML response. * * @category Discovery * @package StatusNet * @author James Walker * @copyright 2010 StatusNet, Inc. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 * @link http://status.net/ */ class LRDDMethod_LinkHTML extends LRDDMethod { /** * For HTTP IDs, fetch the URL and look for elements * in the HTML response. * * @todo fail out of WebFinger URIs faster */ public function discover($uri) { $response = self::fetchUrl($uri); return self::parse($response->getBody()); } /** * Parse HTML and return elements * * Given an HTML string, scans the string for elements * * @param string $html HTML to scan * * @return array array of associative arrays in JRD-ish array format */ public function parse($html) { $links = array(); preg_match('/]*)?>(.*?)<\/head>/is', $html, $head_matches); $head_html = $head_matches[2]; preg_match_all('/]*>/i', $head_html, $link_matches); foreach ($link_matches[0] as $link_html) { $link_url = null; $link_rel = null; $link_type = null; preg_match('/\srel=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $rel_matches); if ( isset($rel_matches[3]) ) { $link_rel = $rel_matches[3]; } else if ( isset($rel_matches[1]) ) { $link_rel = $rel_matches[1]; } preg_match('/\shref=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $href_matches); if ( isset($href_matches[3]) ) { $link_uri = $href_matches[3]; } else if ( isset($href_matches[1]) ) { $link_uri = $href_matches[1]; } preg_match('/\stype=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $type_matches); if ( isset($type_matches[3]) ) { $link_type = $type_matches[3]; } else if ( isset($type_matches[1]) ) { $link_type = $type_matches[1]; } $links[] = new XML_XRD_Element_Link($link_rel, $link_uri, $link_type); } return $links; } }