| 
									
										
										
										
											2013-09-30 17:13:03 +02:00
										 |  |  | <?php | 
					
						
							| 
									
										
										
										
											2020-07-22 01:30:04 +03:00
										 |  |  | // This file is part of GNU social - https://www.gnu.org/software/social
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // GNU social is free software: you can redistribute it and/or modify
 | 
					
						
							|  |  |  | // it under the terms of the GNU Affero General Public License as published by
 | 
					
						
							|  |  |  | // the Free Software Foundation, either version 3 of the License, or
 | 
					
						
							|  |  |  | // (at your option) any later version.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // GNU social is distributed in the hope that it will be useful,
 | 
					
						
							|  |  |  | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
					
						
							|  |  |  | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
					
						
							|  |  |  | // GNU Affero General Public License for more details.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // You should have received a copy of the GNU Affero General Public License
 | 
					
						
							|  |  |  | // along with GNU social.  If not, see <http://www.gnu.org/licenses/>.
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-30 17:13:03 +02:00
										 |  |  | /** | 
					
						
							|  |  |  |  * Implementation of discovery using HTML <link> element | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Discovers XRD file for a user by fetching the URL and reading any | 
					
						
							|  |  |  |  * <link> elements in the HTML response. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * @category  Discovery | 
					
						
							| 
									
										
										
										
											2020-07-22 01:30:04 +03:00
										 |  |  |  * @package   GNUsocial | 
					
						
							| 
									
										
										
										
											2013-09-30 17:13:03 +02:00
										 |  |  |  * @author    James Walker <james@status.net> | 
					
						
							|  |  |  |  * @copyright 2010 StatusNet, Inc. | 
					
						
							| 
									
										
										
										
											2020-07-22 01:30:04 +03:00
										 |  |  |  * @license   https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later | 
					
						
							| 
									
										
										
										
											2013-09-30 17:13:03 +02:00
										 |  |  |  */ | 
					
						
							|  |  |  | class LRDDMethod_LinkHTML extends LRDDMethod | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * For HTTP IDs, fetch the URL and look for <link> elements | 
					
						
							|  |  |  |      * in the HTML response. | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * @todo fail out of WebFinger URIs faster | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function discover($uri) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $response = self::fetchUrl($uri); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return self::parse($response->getBody()); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Parse HTML and return <link> elements | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * Given an HTML string, scans the string for <link> elements | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * @param string $html HTML to scan | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * @return array array of associative arrays in JRD-ish array format | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function parse($html) | 
					
						
							|  |  |  |     { | 
					
						
							| 
									
										
										
										
											2020-07-22 01:30:04 +03:00
										 |  |  |         $links = []; | 
					
						
							| 
									
										
										
										
											2013-09-30 17:13:03 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         preg_match('/<head(\s[^>]*)?>(.*?)<\/head>/is', $html, $head_matches); | 
					
						
							| 
									
										
										
										
											2020-07-22 01:30:04 +03:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if (count($head_matches) != 3) { | 
					
						
							|  |  |  |             return []; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         [,, $head_html] = $head_matches; | 
					
						
							| 
									
										
										
										
											2013-09-30 17:13:03 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         preg_match_all('/<link\s[^>]*>/i', $head_html, $link_matches); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         foreach ($link_matches[0] as $link_html) { | 
					
						
							|  |  |  |             $link_url  = null; | 
					
						
							|  |  |  |             $link_rel  = null; | 
					
						
							|  |  |  |             $link_type = null; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             preg_match('/\srel=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $rel_matches); | 
					
						
							| 
									
										
										
										
											2020-07-22 01:30:04 +03:00
										 |  |  |             if (count($rel_matches) > 3) { | 
					
						
							| 
									
										
										
										
											2013-09-30 17:13:03 +02:00
										 |  |  |                 $link_rel = $rel_matches[3]; | 
					
						
							| 
									
										
										
										
											2020-07-22 01:30:04 +03:00
										 |  |  |             } elseif (count($rel_matches) > 1) { | 
					
						
							| 
									
										
										
										
											2013-09-30 17:13:03 +02:00
										 |  |  |                 $link_rel = $rel_matches[1]; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             preg_match('/\shref=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $href_matches); | 
					
						
							| 
									
										
										
										
											2020-07-22 01:30:04 +03:00
										 |  |  |             if (count($href_matches) > 3) { | 
					
						
							| 
									
										
										
										
											2013-09-30 17:13:03 +02:00
										 |  |  |                 $link_uri = $href_matches[3]; | 
					
						
							| 
									
										
										
										
											2020-07-22 01:30:04 +03:00
										 |  |  |             } elseif (count($href_matches) > 1) { | 
					
						
							| 
									
										
										
										
											2013-09-30 17:13:03 +02:00
										 |  |  |                 $link_uri = $href_matches[1]; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             preg_match('/\stype=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $type_matches); | 
					
						
							| 
									
										
										
										
											2020-07-22 01:30:04 +03:00
										 |  |  |             if (count($type_matches) > 3) { | 
					
						
							| 
									
										
										
										
											2013-09-30 17:13:03 +02:00
										 |  |  |                 $link_type = $type_matches[3]; | 
					
						
							| 
									
										
										
										
											2020-07-22 01:30:04 +03:00
										 |  |  |             } elseif (count($type_matches) > 1) { | 
					
						
							| 
									
										
										
										
											2013-09-30 17:13:03 +02:00
										 |  |  |                 $link_type = $type_matches[1]; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             $links[] = new XML_XRD_Element_Link($link_rel, $link_uri, $link_type); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $links; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } |