| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | <?php | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | namespace Mf2; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | use DOMDocument; | 
					
						
							|  |  |  |  | use DOMElement; | 
					
						
							|  |  |  |  | use DOMXPath; | 
					
						
							|  |  |  |  | use DOMNode; | 
					
						
							|  |  |  |  | use DOMNodeList; | 
					
						
							|  |  |  |  | use Exception; | 
					
						
							|  |  |  |  | use SplObjectStorage; | 
					
						
							|  |  |  |  | use stdClass; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | /** | 
					
						
							|  |  |  |  |  * Parse Microformats2 | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  * Functional shortcut for the commonest cases of parsing microformats2 from HTML. | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  * Example usage: | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  *     use Mf2; | 
					
						
							|  |  |  |  |  *     $output = Mf2\parse('<span class="h-card">Barnaby Walters</span>'); | 
					
						
							|  |  |  |  |  *     echo json_encode($output, JSON_PRETTY_PRINT); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  * Produces: | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  *     { | 
					
						
							|  |  |  |  |  *      "items": [ | 
					
						
							|  |  |  |  |  *       { | 
					
						
							|  |  |  |  |  *        "type": ["h-card"], | 
					
						
							|  |  |  |  |  *        "properties": { | 
					
						
							|  |  |  |  |  *         "name": ["Barnaby Walters"] | 
					
						
							|  |  |  |  |  *        } | 
					
						
							|  |  |  |  |  *       } | 
					
						
							|  |  |  |  |  *      ], | 
					
						
							|  |  |  |  |  *      "rels": {} | 
					
						
							|  |  |  |  |  *     } | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  * @param string|DOMDocument $input The HTML string or DOMDocument object to parse | 
					
						
							|  |  |  |  |  * @param string $url The URL the input document was found at, for relative URL resolution | 
					
						
							|  |  |  |  |  * @param bool $convertClassic whether or not to convert classic microformats | 
					
						
							|  |  |  |  |  * @return array Canonical MF2 array structure | 
					
						
							|  |  |  |  |  */ | 
					
						
							|  |  |  |  | function parse($input, $url = null, $convertClassic = true) { | 
					
						
							|  |  |  |  | 	$parser = new Parser($input, $url); | 
					
						
							|  |  |  |  | 	return $parser->parse($convertClassic); | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | /** | 
					
						
							|  |  |  |  |  * Fetch microformats2 | 
					
						
							|  |  |  |  |  * | 
					
						
							|  |  |  |  |  * Given a URL, fetches it (following up to 5 redirects) and, if the content-type appears to be HTML, returns the parsed | 
					
						
							|  |  |  |  |  * microformats2 array structure. | 
					
						
							|  |  |  |  |  * | 
					
						
							|  |  |  |  |  * Not that even if the response code was a 4XX or 5XX error, if the content-type is HTML-like then it will be parsed | 
					
						
							|  |  |  |  |  * all the same, as there are legitimate cases where error pages might contain useful microformats (for example a deleted | 
					
						
							|  |  |  |  |  * h-entry resulting in a 410 Gone page with a stub h-entry explaining the reason for deletion). Look in $curlInfo['http_code'] | 
					
						
							|  |  |  |  |  * for the actual value. | 
					
						
							|  |  |  |  |  * | 
					
						
							|  |  |  |  |  * @param string $url The URL to fetch | 
					
						
							|  |  |  |  |  * @param bool $convertClassic (optional, default true) whether or not to convert classic microformats | 
					
						
							|  |  |  |  |  * @param &array $curlInfo (optional) the results of curl_getinfo will be placed in this variable for debugging | 
					
						
							|  |  |  |  |  * @return array|null canonical microformats2 array structure on success, null on failure | 
					
						
							|  |  |  |  |  */ | 
					
						
							|  |  |  |  | function fetch($url, $convertClassic = true, &$curlInfo=null) { | 
					
						
							|  |  |  |  | 	$ch = curl_init(); | 
					
						
							|  |  |  |  | 	curl_setopt($ch, CURLOPT_URL, $url); | 
					
						
							|  |  |  |  | 	curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | 
					
						
							|  |  |  |  | 	curl_setopt($ch, CURLOPT_HEADER, 0); | 
					
						
							|  |  |  |  | 	curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); | 
					
						
							|  |  |  |  | 	curl_setopt($ch, CURLOPT_MAXREDIRS, 5); | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 	$html = curl_exec($ch); | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	$info = $curlInfo = curl_getinfo($ch); | 
					
						
							|  |  |  |  | 	curl_close($ch); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	if (strpos(strtolower($info['content_type']), 'html') === false) { | 
					
						
							|  |  |  |  | 		// The content was not delivered as HTML, do not attempt to parse it.
 | 
					
						
							|  |  |  |  | 		return null; | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	return parse($html, $url, $convertClassic); | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | /** | 
					
						
							|  |  |  |  |  * Unicode to HTML Entities | 
					
						
							|  |  |  |  |  * @param string $input String containing characters to convert into HTML entities | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * @return string | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  */ | 
					
						
							|  |  |  |  | function unicodeToHtmlEntities($input) { | 
					
						
							|  |  |  |  | 	return mb_convert_encoding($input, 'HTML-ENTITIES', mb_detect_encoding($input)); | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | /** | 
					
						
							|  |  |  |  |  * Collapse Whitespace | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  * Collapses any sequences of whitespace within a string into a single space | 
					
						
							|  |  |  |  |  * character. | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  * @deprecated since v0.2.3 | 
					
						
							|  |  |  |  |  * @param string $str | 
					
						
							|  |  |  |  |  * @return string | 
					
						
							|  |  |  |  |  */ | 
					
						
							|  |  |  |  | function collapseWhitespace($str) { | 
					
						
							|  |  |  |  | 	return preg_replace('/[\s|\n]+/', ' ', $str); | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | function unicodeTrim($str) { | 
					
						
							|  |  |  |  | 	// this is cheating. TODO: find a better way if this causes any problems
 | 
					
						
							|  |  |  |  | 	$str = str_replace(mb_convert_encoding(' ', 'UTF-8', 'HTML-ENTITIES'), ' ', $str); | 
					
						
							|  |  |  |  | 	$str = preg_replace('/^\s+/', '', $str); | 
					
						
							|  |  |  |  | 	return preg_replace('/\s+$/', '', $str); | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | /** | 
					
						
							|  |  |  |  |  * Microformat Name From Class string | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * | 
					
						
							|  |  |  |  |  * Given the value of @class, get the relevant mf classnames (e.g. h-card, | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  * p-name). | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  * @param string $class A space delimited list of classnames | 
					
						
							|  |  |  |  |  * @param string $prefix The prefix to look for | 
					
						
							|  |  |  |  |  * @return string|array The prefixed name of the first microfomats class found or false | 
					
						
							|  |  |  |  |  */ | 
					
						
							|  |  |  |  | function mfNamesFromClass($class, $prefix='h-') { | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 	$class = str_replace(array(' ', '	', "\n"), ' ', $class); | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	$classes = explode(' ', $class); | 
					
						
							|  |  |  |  | 	$matches = array(); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	foreach ($classes as $classname) { | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 		$compare_classname = ' ' . $classname; | 
					
						
							|  |  |  |  | 		$compare_prefix = ' ' . $prefix; | 
					
						
							|  |  |  |  | 		if (strstr($compare_classname, $compare_prefix) !== false && ($compare_classname != $compare_prefix)) { | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			$matches[] = ($prefix === 'h-') ? $classname : substr($classname, strlen($prefix)); | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	return $matches; | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | /** | 
					
						
							|  |  |  |  |  * Get Nested µf Property Name From Class | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * | 
					
						
							|  |  |  |  |  * Returns all the p-, u-, dt- or e- prefixed classnames it finds in a | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  * space-separated string. | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  * @param string $class | 
					
						
							|  |  |  |  |  * @return array | 
					
						
							|  |  |  |  |  */ | 
					
						
							|  |  |  |  | function nestedMfPropertyNamesFromClass($class) { | 
					
						
							|  |  |  |  | 	$prefixes = array('p-', 'u-', 'dt-', 'e-'); | 
					
						
							|  |  |  |  | 	$propertyNames = array(); | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 	$class = str_replace(array(' ', '	', "\n"), ' ', $class); | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	foreach (explode(' ', $class) as $classname) { | 
					
						
							|  |  |  |  | 		foreach ($prefixes as $prefix) { | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 			// Check if $classname is a valid property classname for $prefix.
 | 
					
						
							|  |  |  |  | 			if (mb_substr($classname, 0, mb_strlen($prefix)) == $prefix && $classname != $prefix) { | 
					
						
							|  |  |  |  | 				$propertyName = mb_substr($classname, mb_strlen($prefix)); | 
					
						
							|  |  |  |  | 				$propertyNames[$propertyName][] = $prefix; | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			} | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 | 
					
						
							|  |  |  |  | 	foreach ($propertyNames as $property => $prefixes) { | 
					
						
							|  |  |  |  | 		$propertyNames[$property] = array_unique($prefixes); | 
					
						
							|  |  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 	return $propertyNames; | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | /** | 
					
						
							|  |  |  |  |  * Wraps mfNamesFromClass to handle an element as input (common) | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  * @param DOMElement $e The element to get the classname for | 
					
						
							|  |  |  |  |  * @param string $prefix The prefix to look for | 
					
						
							|  |  |  |  |  * @return mixed See return value of mf2\Parser::mfNameFromClass() | 
					
						
							|  |  |  |  |  */ | 
					
						
							|  |  |  |  | function mfNamesFromElement(\DOMElement $e, $prefix = 'h-') { | 
					
						
							|  |  |  |  | 	$class = $e->getAttribute('class'); | 
					
						
							|  |  |  |  | 	return mfNamesFromClass($class, $prefix); | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | /** | 
					
						
							|  |  |  |  |  * Wraps nestedMfPropertyNamesFromClass to handle an element as input | 
					
						
							|  |  |  |  |  */ | 
					
						
							|  |  |  |  | function nestedMfPropertyNamesFromElement(\DOMElement $e) { | 
					
						
							|  |  |  |  | 	$class = $e->getAttribute('class'); | 
					
						
							|  |  |  |  | 	return nestedMfPropertyNamesFromClass($class); | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | /** | 
					
						
							|  |  |  |  |  * Converts various time formats to HH:MM | 
					
						
							|  |  |  |  |  * @param string $time The time to convert | 
					
						
							|  |  |  |  |  * @return string | 
					
						
							|  |  |  |  |  */ | 
					
						
							|  |  |  |  | function convertTimeFormat($time) { | 
					
						
							|  |  |  |  | 	$hh = $mm = $ss = ''; | 
					
						
							|  |  |  |  | 	preg_match('/(\d{1,2}):?(\d{2})?:?(\d{2})?(a\.?m\.?|p\.?m\.?)?/i', $time, $matches); | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	// If no am/pm is specified:
 | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 	if (empty($matches[4])) { | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		return $time; | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	} else { | 
					
						
							|  |  |  |  | 		// Otherwise, am/pm is specified.
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		$meridiem = strtolower(str_replace('.', '', $matches[4])); | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 		// Hours.
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		$hh = $matches[1]; | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 		// Add 12 to hours if pm applies.
 | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 		if ($meridiem == 'pm' && ($hh < 12)) { | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			$hh += 12; | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		$hh = str_pad($hh, 2, '0', STR_PAD_LEFT); | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 		// Minutes.
 | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 		$mm = (empty($matches[2]) ) ? '00' : $matches[2]; | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 		// Seconds, only if supplied.
 | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 		if (!empty($matches[3])) { | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			$ss = $matches[3]; | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 		if (empty($ss)) { | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			return sprintf('%s:%s', $hh, $mm); | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 		else { | 
					
						
							|  |  |  |  | 			return sprintf('%s:%s:%s', $hh, $mm, $ss); | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | /** | 
					
						
							|  |  |  |  |  * Microformats2 Parser | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  * A class which holds state for parsing microformats2 from HTML. | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  * Example usage: | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  |  * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  |  *     use Mf2; | 
					
						
							|  |  |  |  |  *     $parser = new Mf2\Parser('<p class="h-card">Barnaby Walters</p>'); | 
					
						
							|  |  |  |  |  *     $output = $parser->parse(); | 
					
						
							|  |  |  |  |  */ | 
					
						
							|  |  |  |  | class Parser { | 
					
						
							|  |  |  |  | 	/** @var string The baseurl (if any) to use for this parse */ | 
					
						
							|  |  |  |  | 	public $baseurl; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	/** @var DOMXPath object which can be used to query over any fragment*/ | 
					
						
							|  |  |  |  | 	public $xpath; | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	/** @var DOMDocument */ | 
					
						
							|  |  |  |  | 	public $doc; | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	/** @var SplObjectStorage */ | 
					
						
							|  |  |  |  | 	protected $parsed; | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	public $jsonMode; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	/** | 
					
						
							|  |  |  |  | 	 * Constructor | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * @param DOMDocument|string $input The data to parse. A string of HTML or a DOMDocument | 
					
						
							|  |  |  |  | 	 * @param string $url The URL of the parsed document, for relative URL resolution | 
					
						
							|  |  |  |  | 	 * @param boolean $jsonMode Whether or not to use a stdClass instance for an empty `rels` dictionary. This breaks PHP looping over rels, but allows the output to be correctly serialized as JSON. | 
					
						
							|  |  |  |  | 	 */ | 
					
						
							|  |  |  |  | 	public function __construct($input, $url = null, $jsonMode = false) { | 
					
						
							|  |  |  |  | 		libxml_use_internal_errors(true); | 
					
						
							|  |  |  |  | 		if (is_string($input)) { | 
					
						
							|  |  |  |  | 			$doc = new DOMDocument(); | 
					
						
							|  |  |  |  | 			@$doc->loadHTML(unicodeToHtmlEntities($input)); | 
					
						
							|  |  |  |  | 		} elseif (is_a($input, 'DOMDocument')) { | 
					
						
							|  |  |  |  | 			$doc = $input; | 
					
						
							|  |  |  |  | 		} else { | 
					
						
							|  |  |  |  | 			$doc = new DOMDocument(); | 
					
						
							|  |  |  |  | 			@$doc->loadHTML(''); | 
					
						
							|  |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		$this->xpath = new DOMXPath($doc); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		$baseurl = $url; | 
					
						
							|  |  |  |  | 		foreach ($this->xpath->query('//base[@href]') as $base) { | 
					
						
							|  |  |  |  | 			$baseElementUrl = $base->getAttribute('href'); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			if (parse_url($baseElementUrl, PHP_URL_SCHEME) === null) { | 
					
						
							|  |  |  |  | 				/* The base element URL is relative to the document URL. | 
					
						
							|  |  |  |  | 				 * | 
					
						
							|  |  |  |  | 				 * :/ | 
					
						
							|  |  |  |  | 				 * | 
					
						
							|  |  |  |  | 				 * Perhaps the author was high? */ | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 				$baseurl = resolveUrl($url, $baseElementUrl); | 
					
						
							|  |  |  |  | 			} else { | 
					
						
							|  |  |  |  | 				$baseurl = $baseElementUrl; | 
					
						
							|  |  |  |  | 			} | 
					
						
							|  |  |  |  | 			break; | 
					
						
							|  |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 		// Ignore <template> elements as per the HTML5 spec
 | 
					
						
							|  |  |  |  | 		foreach ($this->xpath->query('//template') as $templateEl) { | 
					
						
							|  |  |  |  | 			$templateEl->parentNode->removeChild($templateEl); | 
					
						
							|  |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		$this->baseurl = $baseurl; | 
					
						
							|  |  |  |  | 		$this->doc = $doc; | 
					
						
							|  |  |  |  | 		$this->parsed = new SplObjectStorage(); | 
					
						
							|  |  |  |  | 		$this->jsonMode = $jsonMode; | 
					
						
							|  |  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	private function elementPrefixParsed(\DOMElement $e, $prefix) { | 
					
						
							|  |  |  |  | 		if (!$this->parsed->contains($e)) | 
					
						
							|  |  |  |  | 			$this->parsed->attach($e, array()); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		$prefixes = $this->parsed[$e]; | 
					
						
							|  |  |  |  | 		$prefixes[] = $prefix; | 
					
						
							|  |  |  |  | 		$this->parsed[$e] = $prefixes; | 
					
						
							|  |  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	private function isElementParsed(\DOMElement $e, $prefix) { | 
					
						
							|  |  |  |  | 		if (!$this->parsed->contains($e)) | 
					
						
							|  |  |  |  | 			return false; | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		$prefixes = $this->parsed[$e]; | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		if (!in_array($prefix, $prefixes)) | 
					
						
							|  |  |  |  | 			return false; | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		return true; | 
					
						
							|  |  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 	private function resolveChildUrls(DOMElement $el) { | 
					
						
							|  |  |  |  | 		$hyperlinkChildren = $this->xpath->query('.//*[@src or @href or @data]', $el); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		foreach ($hyperlinkChildren as $child) { | 
					
						
							|  |  |  |  | 			if ($child->hasAttribute('href')) | 
					
						
							|  |  |  |  | 				$child->setAttribute('href', $this->resolveUrl($child->getAttribute('href'))); | 
					
						
							|  |  |  |  | 			if ($child->hasAttribute('src')) | 
					
						
							|  |  |  |  | 				$child->setAttribute('src', $this->resolveUrl($child->getAttribute('src'))); | 
					
						
							|  |  |  |  | 			if ($child->hasAttribute('data')) | 
					
						
							|  |  |  |  | 				$child->setAttribute('data', $this->resolveUrl($child->getAttribute('data'))); | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	public function textContent(DOMElement $el) { | 
					
						
							|  |  |  |  | 		$this->resolveChildUrls($el); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		$clonedEl = $el->cloneNode(true); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		foreach ($this->xpath->query('.//img', $clonedEl) as $imgEl) { | 
					
						
							|  |  |  |  | 			$newNode = $this->doc->createTextNode($imgEl->getAttribute($imgEl->hasAttribute('alt') ? 'alt' : 'src')); | 
					
						
							|  |  |  |  | 			$imgEl->parentNode->replaceChild($newNode, $imgEl); | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		return $clonedEl->textContent; | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	// TODO: figure out if this has problems with sms: and geo: URLs
 | 
					
						
							|  |  |  |  | 	public function resolveUrl($url) { | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 		// If the URL is seriously malformed it’s probably beyond the scope of this
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		// parser to try to do anything with it.
 | 
					
						
							|  |  |  |  | 		if (parse_url($url) === false) | 
					
						
							|  |  |  |  | 			return $url; | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		$scheme = parse_url($url, PHP_URL_SCHEME); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		if (empty($scheme) and !empty($this->baseurl)) { | 
					
						
							|  |  |  |  | 			return resolveUrl($this->baseurl, $url); | 
					
						
							|  |  |  |  | 		} else { | 
					
						
							|  |  |  |  | 			return $url; | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	// Parsing Functions
 | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	/** | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * Parse value-class/value-title on an element, joining with $separator if | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * there are multiple. | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * @param \DOMElement $e | 
					
						
							|  |  |  |  | 	 * @param string $separator = '' if multiple value-title elements, join with this string | 
					
						
							|  |  |  |  | 	 * @return string|null the parsed value or null if value-class or -title aren’t in use | 
					
						
							|  |  |  |  | 	 */ | 
					
						
							|  |  |  |  | 	public function parseValueClassTitle(\DOMElement $e, $separator = '') { | 
					
						
							|  |  |  |  | 		$valueClassElements = $this->xpath->query('./*[contains(concat(" ", @class, " "), " value ")]', $e); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		if ($valueClassElements->length !== 0) { | 
					
						
							|  |  |  |  | 			// Process value-class stuff
 | 
					
						
							|  |  |  |  | 			$val = ''; | 
					
						
							|  |  |  |  | 			foreach ($valueClassElements as $el) { | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 				$val .= $this->textContent($el); | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			return unicodeTrim($val); | 
					
						
							|  |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		$valueTitleElements = $this->xpath->query('./*[contains(concat(" ", @class, " "), " value-title ")]', $e); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		if ($valueTitleElements->length !== 0) { | 
					
						
							|  |  |  |  | 			// Process value-title stuff
 | 
					
						
							|  |  |  |  | 			$val = ''; | 
					
						
							|  |  |  |  | 			foreach ($valueTitleElements as $el) { | 
					
						
							|  |  |  |  | 				$val .= $el->getAttribute('title'); | 
					
						
							|  |  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			return unicodeTrim($val); | 
					
						
							|  |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		// No value-title or -class in this element
 | 
					
						
							|  |  |  |  | 		return null; | 
					
						
							|  |  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	/** | 
					
						
							|  |  |  |  | 	 * Given an element with class="p-*", get it’s value | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * @param DOMElement $p The element to parse | 
					
						
							|  |  |  |  | 	 * @return string The plaintext value of $p, dependant on type | 
					
						
							|  |  |  |  | 	 * @todo Make this adhere to value-class | 
					
						
							|  |  |  |  | 	 */ | 
					
						
							|  |  |  |  | 	public function parseP(\DOMElement $p) { | 
					
						
							|  |  |  |  | 		$classTitle = $this->parseValueClassTitle($p, ' '); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		if ($classTitle !== null) | 
					
						
							|  |  |  |  | 			return $classTitle; | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		if ($p->tagName == 'img' and $p->getAttribute('alt') !== '') { | 
					
						
							|  |  |  |  | 			$pValue = $p->getAttribute('alt'); | 
					
						
							|  |  |  |  | 		} elseif ($p->tagName == 'area' and $p->getAttribute('alt') !== '') { | 
					
						
							|  |  |  |  | 			$pValue = $p->getAttribute('alt'); | 
					
						
							|  |  |  |  | 		} elseif ($p->tagName == 'abbr' and $p->getAttribute('title') !== '') { | 
					
						
							|  |  |  |  | 			$pValue = $p->getAttribute('title'); | 
					
						
							|  |  |  |  | 		} elseif (in_array($p->tagName, array('data', 'input')) and $p->getAttribute('value') !== '') { | 
					
						
							|  |  |  |  | 			$pValue = $p->getAttribute('value'); | 
					
						
							|  |  |  |  | 		} else { | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 			$pValue = unicodeTrim($this->textContent($p)); | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		return $pValue; | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	/** | 
					
						
							|  |  |  |  | 	 * Given an element with class="u-*", get the value of the URL | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * @param DOMElement $u The element to parse | 
					
						
							|  |  |  |  | 	 * @return string The plaintext value of $u, dependant on type | 
					
						
							|  |  |  |  | 	 * @todo make this adhere to value-class | 
					
						
							|  |  |  |  | 	 */ | 
					
						
							|  |  |  |  | 	public function parseU(\DOMElement $u) { | 
					
						
							|  |  |  |  | 		if (($u->tagName == 'a' or $u->tagName == 'area') and $u->getAttribute('href') !== null) { | 
					
						
							|  |  |  |  | 			$uValue = $u->getAttribute('href'); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 		} elseif (in_array($u->tagName, array('img', 'audio', 'video', 'source')) and $u->getAttribute('src') !== null) { | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			$uValue = $u->getAttribute('src'); | 
					
						
							|  |  |  |  | 		} elseif ($u->tagName == 'object' and $u->getAttribute('data') !== null) { | 
					
						
							|  |  |  |  | 			$uValue = $u->getAttribute('data'); | 
					
						
							|  |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		if (isset($uValue)) { | 
					
						
							|  |  |  |  | 			return $this->resolveUrl($uValue); | 
					
						
							|  |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		$classTitle = $this->parseValueClassTitle($u); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		if ($classTitle !== null) { | 
					
						
							|  |  |  |  | 			return $classTitle; | 
					
						
							|  |  |  |  | 		} elseif ($u->tagName == 'abbr' and $u->getAttribute('title') !== null) { | 
					
						
							|  |  |  |  | 			return $u->getAttribute('title'); | 
					
						
							|  |  |  |  | 		} elseif (in_array($u->tagName, array('data', 'input')) and $u->getAttribute('value') !== null) { | 
					
						
							|  |  |  |  | 			return $u->getAttribute('value'); | 
					
						
							|  |  |  |  | 		} else { | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 			return unicodeTrim($this->textContent($u)); | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		} | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	/** | 
					
						
							|  |  |  |  | 	 * Given an element with class="dt-*", get the value of the datetime as a php date object | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * @param DOMElement $dt The element to parse | 
					
						
							|  |  |  |  | 	 * @param array $dates Array of dates processed so far | 
					
						
							|  |  |  |  | 	 * @return string The datetime string found | 
					
						
							|  |  |  |  | 	 */ | 
					
						
							|  |  |  |  | 	public function parseDT(\DOMElement $dt, &$dates = array()) { | 
					
						
							|  |  |  |  | 		// Check for value-class pattern
 | 
					
						
							|  |  |  |  | 		$valueClassChildren = $this->xpath->query('./*[contains(concat(" ", @class, " "), " value ") or contains(concat(" ", @class, " "), " value-title ")]', $dt); | 
					
						
							|  |  |  |  | 		$dtValue = false; | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		if ($valueClassChildren->length > 0) { | 
					
						
							|  |  |  |  | 			// They’re using value-class
 | 
					
						
							|  |  |  |  | 			$dateParts = array(); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			foreach ($valueClassChildren as $e) { | 
					
						
							|  |  |  |  | 				if (strstr(' ' . $e->getAttribute('class') . ' ', ' value-title ')) { | 
					
						
							|  |  |  |  | 					$title = $e->getAttribute('title'); | 
					
						
							|  |  |  |  | 					if (!empty($title)) | 
					
						
							|  |  |  |  | 						$dateParts[] = $title; | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 				elseif ($e->tagName == 'img' or $e->tagName == 'area') { | 
					
						
							|  |  |  |  | 					// Use @alt
 | 
					
						
							|  |  |  |  | 					$alt = $e->getAttribute('alt'); | 
					
						
							|  |  |  |  | 					if (!empty($alt)) | 
					
						
							|  |  |  |  | 						$dateParts[] = $alt; | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 				elseif ($e->tagName == 'data') { | 
					
						
							|  |  |  |  | 					// Use @value, otherwise innertext
 | 
					
						
							|  |  |  |  | 					$value = $e->hasAttribute('value') ? $e->getAttribute('value') : unicodeTrim($e->nodeValue); | 
					
						
							|  |  |  |  | 					if (!empty($value)) | 
					
						
							|  |  |  |  | 						$dateParts[] = $value; | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 				elseif ($e->tagName == 'abbr') { | 
					
						
							|  |  |  |  | 					// Use @title, otherwise innertext
 | 
					
						
							|  |  |  |  | 					$title = $e->hasAttribute('title') ? $e->getAttribute('title') : unicodeTrim($e->nodeValue); | 
					
						
							|  |  |  |  | 					if (!empty($title)) | 
					
						
							|  |  |  |  | 						$dateParts[] = $title; | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 				elseif ($e->tagName == 'del' or $e->tagName == 'ins' or $e->tagName == 'time') { | 
					
						
							|  |  |  |  | 					// Use @datetime if available, otherwise innertext
 | 
					
						
							|  |  |  |  | 					$dtAttr = ($e->hasAttribute('datetime')) ? $e->getAttribute('datetime') : unicodeTrim($e->nodeValue); | 
					
						
							|  |  |  |  | 					if (!empty($dtAttr)) | 
					
						
							|  |  |  |  | 						$dateParts[] = $dtAttr; | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 				else { | 
					
						
							|  |  |  |  | 					if (!empty($e->nodeValue)) | 
					
						
							|  |  |  |  | 						$dateParts[] = unicodeTrim($e->nodeValue); | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 			} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 			// Look through dateParts
 | 
					
						
							|  |  |  |  | 			$datePart = ''; | 
					
						
							|  |  |  |  | 			$timePart = ''; | 
					
						
							|  |  |  |  | 			foreach ($dateParts as $part) { | 
					
						
							|  |  |  |  | 				// Is this part a full ISO8601 datetime?
 | 
					
						
							|  |  |  |  | 				if (preg_match('/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}(?::\d{2})?(?:Z?[+|-]\d{2}:?\d{2})?$/', $part)) { | 
					
						
							|  |  |  |  | 					// Break completely, we’ve got our value.
 | 
					
						
							|  |  |  |  | 					$dtValue = $part; | 
					
						
							|  |  |  |  | 					break; | 
					
						
							|  |  |  |  | 				} else { | 
					
						
							|  |  |  |  | 					// Is the current part a valid time(+TZ?) AND no other time representation has been found?
 | 
					
						
							|  |  |  |  | 					if ((preg_match('/\d{1,2}:\d{1,2}(Z?[+|-]\d{2}:?\d{2})?/', $part) or preg_match('/\d{1,2}[a|p]m/', $part)) and empty($timePart)) { | 
					
						
							|  |  |  |  | 						$timePart = $part; | 
					
						
							|  |  |  |  | 					} elseif (preg_match('/\d{4}-\d{2}-\d{2}/', $part) and empty($datePart)) { | 
					
						
							|  |  |  |  | 						// Is the current part a valid date AND no other date representation has been found?
 | 
					
						
							|  |  |  |  | 						$datePart = $part; | 
					
						
							|  |  |  |  | 					} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 					if ( !empty($datePart) && !in_array($datePart, $dates) ) { | 
					
						
							|  |  |  |  | 						$dates[] = $datePart; | 
					
						
							|  |  |  |  | 					} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 					$dtValue = ''; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 					if ( empty($datePart) && !empty($timePart) ) { | 
					
						
							|  |  |  |  | 						$timePart = convertTimeFormat($timePart); | 
					
						
							|  |  |  |  | 						$dtValue = unicodeTrim($timePart, 'T'); | 
					
						
							|  |  |  |  | 					} | 
					
						
							|  |  |  |  | 					else if ( !empty($datePart) && empty($timePart) ) { | 
					
						
							|  |  |  |  | 						$dtValue = rtrim($datePart, 'T'); | 
					
						
							|  |  |  |  | 					} | 
					
						
							|  |  |  |  | 					else { | 
					
						
							|  |  |  |  | 						$timePart = convertTimeFormat($timePart); | 
					
						
							|  |  |  |  | 						$dtValue = rtrim($datePart, 'T') . 'T' . unicodeTrim($timePart, 'T'); | 
					
						
							|  |  |  |  | 					} | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 			} | 
					
						
							|  |  |  |  | 		} else { | 
					
						
							|  |  |  |  | 			// Not using value-class (phew).
 | 
					
						
							|  |  |  |  | 			if ($dt->tagName == 'img' or $dt->tagName == 'area') { | 
					
						
							|  |  |  |  | 				// Use @alt
 | 
					
						
							|  |  |  |  | 				// Is it an entire dt?
 | 
					
						
							|  |  |  |  | 				$alt = $dt->getAttribute('alt'); | 
					
						
							|  |  |  |  | 				if (!empty($alt)) | 
					
						
							|  |  |  |  | 					$dtValue = $alt; | 
					
						
							|  |  |  |  | 			} elseif (in_array($dt->tagName, array('data'))) { | 
					
						
							|  |  |  |  | 				// Use @value, otherwise innertext
 | 
					
						
							|  |  |  |  | 				// Is it an entire dt?
 | 
					
						
							|  |  |  |  | 				$value = $dt->getAttribute('value'); | 
					
						
							|  |  |  |  | 				if (!empty($value)) | 
					
						
							|  |  |  |  | 					$dtValue = $value; | 
					
						
							|  |  |  |  | 				else | 
					
						
							|  |  |  |  | 					$dtValue = $dt->nodeValue; | 
					
						
							|  |  |  |  | 			} elseif ($dt->tagName == 'abbr') { | 
					
						
							|  |  |  |  | 				// Use @title, otherwise innertext
 | 
					
						
							|  |  |  |  | 				// Is it an entire dt?
 | 
					
						
							|  |  |  |  | 				$title = $dt->getAttribute('title'); | 
					
						
							|  |  |  |  | 				if (!empty($title)) | 
					
						
							|  |  |  |  | 					$dtValue = $title; | 
					
						
							|  |  |  |  | 				else | 
					
						
							|  |  |  |  | 					$dtValue = $dt->nodeValue; | 
					
						
							|  |  |  |  | 			} elseif ($dt->tagName == 'del' or $dt->tagName == 'ins' or $dt->tagName == 'time') { | 
					
						
							|  |  |  |  | 				// Use @datetime if available, otherwise innertext
 | 
					
						
							|  |  |  |  | 				// Is it an entire dt?
 | 
					
						
							|  |  |  |  | 				$dtAttr = $dt->getAttribute('datetime'); | 
					
						
							|  |  |  |  | 				if (!empty($dtAttr)) | 
					
						
							|  |  |  |  | 					$dtValue = $dtAttr; | 
					
						
							|  |  |  |  | 				else | 
					
						
							|  |  |  |  | 					$dtValue = $dt->nodeValue; | 
					
						
							|  |  |  |  | 			} else { | 
					
						
							|  |  |  |  | 				$dtValue = $dt->nodeValue; | 
					
						
							|  |  |  |  | 			} | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 			if (preg_match('/(\d{4}-\d{2}-\d{2})/', $dtValue, $matches)) { | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 				$dates[] = $matches[0]; | 
					
						
							|  |  |  |  | 			} | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		/** | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 		 * if $dtValue is only a time and there are recently parsed dates, | 
					
						
							|  |  |  |  | 		 * form the full date-time using the most recently parsed dt- value | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		 */ | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 		if ((preg_match('/^\d{1,2}:\d{1,2}(Z?[+|-]\d{2}:?\d{2})?/', $dtValue) or preg_match('/^\d{1,2}[a|p]m/', $dtValue)) && !empty($dates)) { | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			$dtValue = convertTimeFormat($dtValue); | 
					
						
							|  |  |  |  | 			$dtValue = end($dates) . 'T' . unicodeTrim($dtValue, 'T'); | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		return $dtValue; | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	/** | 
					
						
							|  |  |  |  | 	 * 	Given the root element of some embedded markup, return a string representing that markup | 
					
						
							|  |  |  |  | 	 * | 
					
						
							|  |  |  |  | 	 * 	@param DOMElement $e The element to parse | 
					
						
							|  |  |  |  | 	 * 	@return string $e’s innerHTML | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * @todo need to mark this element as e- parsed so it doesn’t get parsed as it’s parent’s e-* too | 
					
						
							|  |  |  |  | 	 */ | 
					
						
							|  |  |  |  | 	public function parseE(\DOMElement $e) { | 
					
						
							|  |  |  |  | 		$classTitle = $this->parseValueClassTitle($e); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		if ($classTitle !== null) | 
					
						
							|  |  |  |  | 			return $classTitle; | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		// Expand relative URLs within children of this element
 | 
					
						
							|  |  |  |  | 		// TODO: as it is this is not relative to only children, make this .// and rerun tests
 | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 		$this->resolveChildUrls($e); | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		$html = ''; | 
					
						
							|  |  |  |  | 		foreach ($e->childNodes as $node) { | 
					
						
							|  |  |  |  | 			$html .= $node->C14N(); | 
					
						
							|  |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		return array( | 
					
						
							|  |  |  |  | 			'html' => $html, | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 			'value' => unicodeTrim($this->textContent($e)) | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		); | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	/** | 
					
						
							|  |  |  |  | 	 * Recursively parse microformats | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * @param DOMElement $e The element to parse | 
					
						
							|  |  |  |  | 	 * @return array A representation of the values contained within microformat $e | 
					
						
							|  |  |  |  | 	 */ | 
					
						
							|  |  |  |  | 	public function parseH(\DOMElement $e) { | 
					
						
							|  |  |  |  | 		// If it’s already been parsed (e.g. is a child mf), skip
 | 
					
						
							|  |  |  |  | 		if ($this->parsed->contains($e)) | 
					
						
							|  |  |  |  | 			return null; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		// Get current µf name
 | 
					
						
							|  |  |  |  | 		$mfTypes = mfNamesFromElement($e, 'h-'); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		// Initalise var to store the representation in
 | 
					
						
							|  |  |  |  | 		$return = array(); | 
					
						
							|  |  |  |  | 		$children = array(); | 
					
						
							|  |  |  |  | 		$dates = array(); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		// Handle nested microformats (h-*)
 | 
					
						
							|  |  |  |  | 		foreach ($this->xpath->query('.//*[contains(concat(" ", @class)," h-")]', $e) as $subMF) { | 
					
						
							|  |  |  |  | 			// Parse
 | 
					
						
							|  |  |  |  | 			$result = $this->parseH($subMF); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			// If result was already parsed, skip it
 | 
					
						
							|  |  |  |  | 			if (null === $result) | 
					
						
							|  |  |  |  | 				continue; | 
					
						
							|  |  |  |  | 			 | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 			// In most cases, the value attribute of the nested microformat should be the p- parsed value of the elemnt.
 | 
					
						
							|  |  |  |  | 			// The only times this is different is when the microformat is nested under certain prefixes, which are handled below.
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			$result['value'] = $this->parseP($subMF); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 			// Does this µf have any property names other than h-*?
 | 
					
						
							|  |  |  |  | 			$properties = nestedMfPropertyNamesFromElement($subMF); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			if (!empty($properties)) { | 
					
						
							|  |  |  |  | 				// Yes! It’s a nested property µf
 | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 				foreach ($properties as $property => $prefixes) { | 
					
						
							|  |  |  |  | 					// Note: handling microformat nesting under multiple conflicting prefixes is not currently specified by the mf2 parsing spec.
 | 
					
						
							|  |  |  |  | 					$prefixSpecificResult = $result; | 
					
						
							|  |  |  |  | 					if (in_array('p-', $prefixes)) { | 
					
						
							|  |  |  |  | 						$prefixSpecificResult['value'] = $prefixSpecificResult['properties']['name'][0]; | 
					
						
							|  |  |  |  | 					} elseif (in_array('e-', $prefixes)) { | 
					
						
							|  |  |  |  | 						$eParsedResult = $this->parseE($subMF); | 
					
						
							|  |  |  |  | 						$prefixSpecificResult['html'] = $eParsedResult['html']; | 
					
						
							|  |  |  |  | 						$prefixSpecificResult['value'] = $eParsedResult['value']; | 
					
						
							|  |  |  |  | 					} elseif (in_array('u-', $prefixes)) { | 
					
						
							|  |  |  |  | 						$prefixSpecificResult['value'] = $this->parseU($subMF); | 
					
						
							|  |  |  |  | 					} | 
					
						
							|  |  |  |  | 					$return[$property][] = $prefixSpecificResult; | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 				} | 
					
						
							|  |  |  |  | 			} else { | 
					
						
							|  |  |  |  | 				// No, it’s a child µf
 | 
					
						
							|  |  |  |  | 				$children[] = $result; | 
					
						
							|  |  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			// Make sure this sub-mf won’t get parsed as a µf or property
 | 
					
						
							|  |  |  |  | 			// TODO: Determine if clearing this is required?
 | 
					
						
							|  |  |  |  | 			$this->elementPrefixParsed($subMF, 'h'); | 
					
						
							|  |  |  |  | 			$this->elementPrefixParsed($subMF, 'p'); | 
					
						
							|  |  |  |  | 			$this->elementPrefixParsed($subMF, 'u'); | 
					
						
							|  |  |  |  | 			$this->elementPrefixParsed($subMF, 'dt'); | 
					
						
							|  |  |  |  | 			$this->elementPrefixParsed($subMF, 'e'); | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 		if($e->tagName == 'area') { | 
					
						
							|  |  |  |  | 			$coords = $e->getAttribute('coords'); | 
					
						
							|  |  |  |  | 			$shape = $e->getAttribute('shape'); | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		// Handle p-*
 | 
					
						
							|  |  |  |  | 		foreach ($this->xpath->query('.//*[contains(concat(" ", @class) ," p-")]', $e) as $p) { | 
					
						
							|  |  |  |  | 			if ($this->isElementParsed($p, 'p')) | 
					
						
							|  |  |  |  | 				continue; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 			$pValue = $this->parseP($p); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			// Add the value to the array for it’s p- properties
 | 
					
						
							|  |  |  |  | 			foreach (mfNamesFromElement($p, 'p-') as $propName) { | 
					
						
							|  |  |  |  | 				if (!empty($propName)) | 
					
						
							|  |  |  |  | 					$return[$propName][] = $pValue; | 
					
						
							|  |  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			// Make sure this sub-mf won’t get parsed as a top level mf
 | 
					
						
							|  |  |  |  | 			$this->elementPrefixParsed($p, 'p'); | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		// Handle u-*
 | 
					
						
							|  |  |  |  | 		foreach ($this->xpath->query('.//*[contains(concat(" ",  @class)," u-")]', $e) as $u) { | 
					
						
							|  |  |  |  | 			if ($this->isElementParsed($u, 'u')) | 
					
						
							|  |  |  |  | 				continue; | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			$uValue = $this->parseU($u); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			// Add the value to the array for it’s property types
 | 
					
						
							|  |  |  |  | 			foreach (mfNamesFromElement($u, 'u-') as $propName) { | 
					
						
							|  |  |  |  | 				$return[$propName][] = $uValue; | 
					
						
							|  |  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			// Make sure this sub-mf won’t get parsed as a top level mf
 | 
					
						
							|  |  |  |  | 			$this->elementPrefixParsed($u, 'u'); | 
					
						
							|  |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		// Handle dt-*
 | 
					
						
							|  |  |  |  | 		foreach ($this->xpath->query('.//*[contains(concat(" ", @class), " dt-")]', $e) as $dt) { | 
					
						
							|  |  |  |  | 			if ($this->isElementParsed($dt, 'dt')) | 
					
						
							|  |  |  |  | 				continue; | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			$dtValue = $this->parseDT($dt, $dates); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			if ($dtValue) { | 
					
						
							|  |  |  |  | 				// Add the value to the array for dt- properties
 | 
					
						
							|  |  |  |  | 				foreach (mfNamesFromElement($dt, 'dt-') as $propName) { | 
					
						
							|  |  |  |  | 					$return[$propName][] = $dtValue; | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			// Make sure this sub-mf won’t get parsed as a top level mf
 | 
					
						
							|  |  |  |  | 			$this->elementPrefixParsed($dt, 'dt'); | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		// Handle e-*
 | 
					
						
							|  |  |  |  | 		foreach ($this->xpath->query('.//*[contains(concat(" ", @class)," e-")]', $e) as $em) { | 
					
						
							|  |  |  |  | 			if ($this->isElementParsed($em, 'e')) | 
					
						
							|  |  |  |  | 				continue; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 			$eValue = $this->parseE($em); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 			if ($eValue) { | 
					
						
							|  |  |  |  | 				// Add the value to the array for e- properties
 | 
					
						
							|  |  |  |  | 				foreach (mfNamesFromElement($em, 'e-') as $propName) { | 
					
						
							|  |  |  |  | 					$return[$propName][] = $eValue; | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 			} | 
					
						
							|  |  |  |  | 			// Make sure this sub-mf won’t get parsed as a top level mf
 | 
					
						
							|  |  |  |  | 			$this->elementPrefixParsed($em, 'e'); | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		// Implied Properties
 | 
					
						
							|  |  |  |  | 		// Check for p-name
 | 
					
						
							|  |  |  |  | 		if (!array_key_exists('name', $return)) { | 
					
						
							|  |  |  |  | 			try { | 
					
						
							|  |  |  |  | 				// Look for img @alt
 | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 				if (($e->tagName == 'img' or $e->tagName == 'area') and $e->getAttribute('alt') != '') | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 					throw new Exception($e->getAttribute('alt')); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 				if ($e->tagName == 'abbr' and $e->hasAttribute('title')) | 
					
						
							|  |  |  |  | 					throw new Exception($e->getAttribute('title')); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 				// Look for nested img @alt
 | 
					
						
							|  |  |  |  | 				foreach ($this->xpath->query('./img[count(preceding-sibling::*)+count(following-sibling::*)=0]', $e) as $em) { | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 					$emNames = mfNamesFromElement($em, 'h-'); | 
					
						
							|  |  |  |  | 					if (empty($emNames) && $em->getAttribute('alt') != '') { | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 						throw new Exception($em->getAttribute('alt')); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 					} | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 				// Look for nested area @alt
 | 
					
						
							|  |  |  |  | 				foreach ($this->xpath->query('./area[count(preceding-sibling::*)+count(following-sibling::*)=0]', $e) as $em) { | 
					
						
							|  |  |  |  | 					$emNames = mfNamesFromElement($em, 'h-'); | 
					
						
							|  |  |  |  | 					if (empty($emNames) && $em->getAttribute('alt') != '') { | 
					
						
							|  |  |  |  | 						throw new Exception($em->getAttribute('alt')); | 
					
						
							|  |  |  |  | 					} | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 				} | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 				// Look for double nested img @alt
 | 
					
						
							|  |  |  |  | 				foreach ($this->xpath->query('./*[count(preceding-sibling::*)+count(following-sibling::*)=0]/img[count(preceding-sibling::*)+count(following-sibling::*)=0]', $e) as $em) { | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 					$emNames = mfNamesFromElement($em, 'h-'); | 
					
						
							|  |  |  |  | 					if (empty($emNames) && $em->getAttribute('alt') != '') { | 
					
						
							|  |  |  |  | 						throw new Exception($em->getAttribute('alt')); | 
					
						
							|  |  |  |  | 					} | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 				// Look for double nested img @alt
 | 
					
						
							|  |  |  |  | 				foreach ($this->xpath->query('./*[count(preceding-sibling::*)+count(following-sibling::*)=0]/area[count(preceding-sibling::*)+count(following-sibling::*)=0]', $e) as $em) { | 
					
						
							|  |  |  |  | 					$emNames = mfNamesFromElement($em, 'h-'); | 
					
						
							|  |  |  |  | 					if (empty($emNames) && $em->getAttribute('alt') != '') { | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 						throw new Exception($em->getAttribute('alt')); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 					} | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 				} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 				throw new Exception($e->nodeValue); | 
					
						
							|  |  |  |  | 			} catch (Exception $exc) { | 
					
						
							|  |  |  |  | 				$return['name'][] = unicodeTrim($exc->getMessage()); | 
					
						
							|  |  |  |  | 			} | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		// Check for u-photo
 | 
					
						
							|  |  |  |  | 		if (!array_key_exists('photo', $return)) { | 
					
						
							|  |  |  |  | 			// Look for img @src
 | 
					
						
							|  |  |  |  | 			try { | 
					
						
							|  |  |  |  | 				if ($e->tagName == 'img') | 
					
						
							|  |  |  |  | 					throw new Exception($e->getAttribute('src')); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 				// Look for nested img @src
 | 
					
						
							|  |  |  |  | 				foreach ($this->xpath->query('./img[count(preceding-sibling::*)+count(following-sibling::*)=0]', $e) as $em) { | 
					
						
							|  |  |  |  | 					if ($em->getAttribute('src') != '') | 
					
						
							|  |  |  |  | 						throw new Exception($em->getAttribute('src')); | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 				// Look for double nested img @src
 | 
					
						
							|  |  |  |  | 				foreach ($this->xpath->query('./*[count(preceding-sibling::*)+count(following-sibling::*)=0]/img[count(preceding-sibling::*)+count(following-sibling::*)=0]', $e) as $em) { | 
					
						
							|  |  |  |  | 					if ($em->getAttribute('src') != '') | 
					
						
							|  |  |  |  | 						throw new Exception($em->getAttribute('src')); | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 			} catch (Exception $exc) { | 
					
						
							|  |  |  |  | 				$return['photo'][] = $this->resolveUrl($exc->getMessage()); | 
					
						
							|  |  |  |  | 			} | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		// Check for u-url
 | 
					
						
							|  |  |  |  | 		if (!array_key_exists('url', $return)) { | 
					
						
							|  |  |  |  | 			// Look for img @src
 | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 			if ($e->tagName == 'a' or $e->tagName == 'area') | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 				$url = $e->getAttribute('href'); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 			// Look for nested a @href
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			foreach ($this->xpath->query('./a[count(preceding-sibling::a)+count(following-sibling::a)=0]', $e) as $em) { | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 				$emNames = mfNamesFromElement($em, 'h-'); | 
					
						
							|  |  |  |  | 				if (empty($emNames)) { | 
					
						
							|  |  |  |  | 					$url = $em->getAttribute('href'); | 
					
						
							|  |  |  |  | 					break; | 
					
						
							|  |  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 			// Look for nested area @src
 | 
					
						
							|  |  |  |  | 			foreach ($this->xpath->query('./area[count(preceding-sibling::area)+count(following-sibling::area)=0]', $e) as $em) { | 
					
						
							|  |  |  |  | 				$emNames = mfNamesFromElement($em, 'h-'); | 
					
						
							|  |  |  |  | 				if (empty($emNames)) { | 
					
						
							|  |  |  |  | 					$url = $em->getAttribute('href'); | 
					
						
							|  |  |  |  | 					break; | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 			} | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			if (!empty($url)) | 
					
						
							|  |  |  |  | 				$return['url'][] = $this->resolveUrl($url); | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		// Make sure things are in alphabetical order
 | 
					
						
							|  |  |  |  | 		sort($mfTypes); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		// Phew. Return the final result.
 | 
					
						
							|  |  |  |  | 		$parsed = array( | 
					
						
							|  |  |  |  | 			'type' => $mfTypes, | 
					
						
							|  |  |  |  | 			'properties' => $return | 
					
						
							|  |  |  |  | 		); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 		if (!empty($shape)) { | 
					
						
							|  |  |  |  | 			$parsed['shape'] = $shape; | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		if (!empty($coords)) { | 
					
						
							|  |  |  |  | 			$parsed['coords'] = $coords; | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		if (!empty($children)) { | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			$parsed['children'] = array_values(array_filter($children)); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		return $parsed; | 
					
						
							|  |  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	/** | 
					
						
							|  |  |  |  | 	 * Parse Rels and Alternatives | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							|  |  |  |  | 	 * Returns [$rels, $alternatives]. If the $rels value is to be empty, i.e. there are no links on the page | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * with a rel value *not* containing `alternate`, then the type of $rels depends on $this->jsonMode. If set | 
					
						
							|  |  |  |  | 	 * to true, it will be a stdClass instance, optimising for JSON serialisation. Otherwise (the default case), | 
					
						
							|  |  |  |  | 	 * it will be an empty array. | 
					
						
							|  |  |  |  | 	 */ | 
					
						
							|  |  |  |  | 	public function parseRelsAndAlternates() { | 
					
						
							|  |  |  |  | 		$rels = array(); | 
					
						
							|  |  |  |  | 		$alternates = array(); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		// Iterate through all a, area and link elements with rel attributes
 | 
					
						
							|  |  |  |  | 		foreach ($this->xpath->query('//*[@rel and @href]') as $hyperlink) { | 
					
						
							|  |  |  |  | 			if ($hyperlink->getAttribute('rel') == '') | 
					
						
							|  |  |  |  | 				continue; | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			// Resolve the href
 | 
					
						
							|  |  |  |  | 			$href = $this->resolveUrl($hyperlink->getAttribute('href')); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			// Split up the rel into space-separated values
 | 
					
						
							|  |  |  |  | 			$linkRels = array_filter(explode(' ', $hyperlink->getAttribute('rel'))); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			// If alternate in rels, create alternate structure, append
 | 
					
						
							|  |  |  |  | 			if (in_array('alternate', $linkRels)) { | 
					
						
							|  |  |  |  | 				$alt = array( | 
					
						
							|  |  |  |  | 					'url' => $href, | 
					
						
							|  |  |  |  | 					'rel' => implode(' ', array_diff($linkRels, array('alternate'))) | 
					
						
							|  |  |  |  | 				); | 
					
						
							|  |  |  |  | 				if ($hyperlink->hasAttribute('media')) | 
					
						
							|  |  |  |  | 					$alt['media'] = $hyperlink->getAttribute('media'); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 				if ($hyperlink->hasAttribute('hreflang')) | 
					
						
							|  |  |  |  | 					$alt['hreflang'] = $hyperlink->getAttribute('hreflang'); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 				if ($hyperlink->hasAttribute('title')) | 
					
						
							|  |  |  |  | 					$alt['title'] = $hyperlink->getAttribute('title'); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 				if ($hyperlink->hasAttribute('type')) | 
					
						
							|  |  |  |  | 					$alt['type'] = $hyperlink->getAttribute('type'); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 				if ($hyperlink->nodeValue) | 
					
						
							|  |  |  |  | 					$alt['text'] = $hyperlink->nodeValue; | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 				$alternates[] = $alt; | 
					
						
							|  |  |  |  | 			} else { | 
					
						
							|  |  |  |  | 				foreach ($linkRels as $rel) { | 
					
						
							|  |  |  |  | 					$rels[$rel][] = $href; | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 			} | 
					
						
							|  |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		if (empty($rels) and $this->jsonMode) { | 
					
						
							|  |  |  |  | 			$rels = new stdClass(); | 
					
						
							|  |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		return array($rels, $alternates); | 
					
						
							|  |  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	/** | 
					
						
							|  |  |  |  | 	 * Kicks off the parsing routine | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * If `$htmlSafe` is set, any angle brackets in the results from non e-* properties | 
					
						
							|  |  |  |  | 	 * will be HTML-encoded, bringing all output to the same level of encoding. | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * If a DOMElement is set as the $context, only descendants of that element will | 
					
						
							|  |  |  |  | 	 * be parsed for microformats. | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * @param bool $htmlSafe whether or not to html-encode non e-* properties. Defaults to false | 
					
						
							|  |  |  |  | 	 * @param DOMElement $context optionally an element from which to parse microformats | 
					
						
							|  |  |  |  | 	 * @return array An array containing all the µfs found in the current document | 
					
						
							|  |  |  |  | 	 */ | 
					
						
							|  |  |  |  | 	public function parse($convertClassic = true, DOMElement $context = null) { | 
					
						
							|  |  |  |  | 		$mfs = array(); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		if ($convertClassic) { | 
					
						
							|  |  |  |  | 			$this->convertLegacy(); | 
					
						
							|  |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		$mfElements = null === $context | 
					
						
							|  |  |  |  | 			? $this->xpath->query('//*[contains(concat(" ",	@class), " h-")]') | 
					
						
							|  |  |  |  | 			: $this->xpath->query('.//*[contains(concat(" ",	@class), " h-")]', $context); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		// Parser microformats
 | 
					
						
							|  |  |  |  | 		foreach ($mfElements as $node) { | 
					
						
							|  |  |  |  | 			// For each microformat
 | 
					
						
							|  |  |  |  | 			$result = $this->parseH($node); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 			// Add the value to the array for this property type
 | 
					
						
							|  |  |  |  | 			$mfs[] = $result; | 
					
						
							|  |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		// Parse rels
 | 
					
						
							|  |  |  |  | 		list($rels, $alternates) = $this->parseRelsAndAlternates(); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		$top = array( | 
					
						
							|  |  |  |  | 			'items' => array_values(array_filter($mfs)), | 
					
						
							|  |  |  |  | 			'rels' => $rels | 
					
						
							|  |  |  |  | 		); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		if (count($alternates)) | 
					
						
							|  |  |  |  | 			$top['alternates'] = $alternates; | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		return $top; | 
					
						
							|  |  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	/** | 
					
						
							|  |  |  |  | 	 * Parse From ID | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * Given an ID, parse all microformats which are children of the element with | 
					
						
							|  |  |  |  | 	 * that ID. | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * Note that rel values are still document-wide. | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							|  |  |  |  | 	 * If an element with the ID is not found, an empty skeleton mf2 array structure | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * will be returned. | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * @param string $id | 
					
						
							|  |  |  |  | 	 * @param bool $htmlSafe = false whether or not to HTML-encode angle brackets in non e-* properties | 
					
						
							|  |  |  |  | 	 * @return array | 
					
						
							|  |  |  |  | 	 */ | 
					
						
							|  |  |  |  | 	public function parseFromId($id, $convertClassic=true) { | 
					
						
							|  |  |  |  | 		$matches = $this->xpath->query("//*[@id='{$id}']"); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		if (empty($matches)) | 
					
						
							|  |  |  |  | 			return array('items' => array(), 'rels' => array(), 'alternates' => array()); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		return $this->parse($convertClassic, $matches->item(0)); | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	/** | 
					
						
							|  |  |  |  | 	 * Convert Legacy Classnames | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * Adds microformats2 classnames into a document containing only legacy | 
					
						
							|  |  |  |  | 	 * semantic classnames. | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * @return Parser $this | 
					
						
							|  |  |  |  | 	 */ | 
					
						
							|  |  |  |  | 	public function convertLegacy() { | 
					
						
							|  |  |  |  | 		$doc = $this->doc; | 
					
						
							|  |  |  |  | 		$xp = new DOMXPath($doc); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		// replace all roots
 | 
					
						
							|  |  |  |  | 		foreach ($this->classicRootMap as $old => $new) { | 
					
						
							|  |  |  |  | 			foreach ($xp->query('//*[contains(concat(" ", @class, " "), " ' . $old . ' ") and not(contains(concat(" ", @class, " "), " ' . $new . ' "))]') as $el) { | 
					
						
							|  |  |  |  | 				$el->setAttribute('class', $el->getAttribute('class') . ' ' . $new); | 
					
						
							|  |  |  |  | 			} | 
					
						
							|  |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		foreach ($this->classicPropertyMap as $oldRoot => $properties) { | 
					
						
							|  |  |  |  | 			$newRoot = $this->classicRootMap[$oldRoot]; | 
					
						
							|  |  |  |  | 			foreach ($properties as $old => $new) { | 
					
						
							|  |  |  |  | 				foreach ($xp->query('//*[contains(concat(" ", @class, " "), " ' . $oldRoot . ' ")]//*[contains(concat(" ", @class, " "), " ' . $old . ' ") and not(contains(concat(" ", @class, " "), " ' . $new . ' "))]') as $el) { | 
					
						
							|  |  |  |  | 					$el->setAttribute('class', $el->getAttribute('class') . ' ' . $new); | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 			} | 
					
						
							|  |  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		return $this; | 
					
						
							|  |  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	/** | 
					
						
							|  |  |  |  | 	 * XPath Query | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * Runs an XPath query over the current document. Works in exactly the same | 
					
						
							|  |  |  |  | 	 * way as DOMXPath::query. | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	 * | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	 * @param string $expression | 
					
						
							|  |  |  |  | 	 * @param DOMNode $context | 
					
						
							|  |  |  |  | 	 * @return DOMNodeList | 
					
						
							|  |  |  |  | 	 */ | 
					
						
							|  |  |  |  | 	public function query($expression, $context = null) { | 
					
						
							|  |  |  |  | 		return $this->xpath->query($expression, $context); | 
					
						
							|  |  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	/** | 
					
						
							|  |  |  |  | 	 * Classic Root Classname map | 
					
						
							|  |  |  |  | 	 */ | 
					
						
							|  |  |  |  | 	public $classicRootMap = array( | 
					
						
							|  |  |  |  | 		'vcard' => 'h-card', | 
					
						
							|  |  |  |  | 		'hfeed' => 'h-feed', | 
					
						
							|  |  |  |  | 		'hentry' => 'h-entry', | 
					
						
							|  |  |  |  | 		'hrecipe' => 'h-recipe', | 
					
						
							|  |  |  |  | 		'hresume' => 'h-resume', | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 		'vevent' => 'h-event', | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 		'hreview' => 'h-review', | 
					
						
							|  |  |  |  | 		'hproduct' => 'h-product' | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	); | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	public $classicPropertyMap = array( | 
					
						
							|  |  |  |  | 		'vcard' => array( | 
					
						
							|  |  |  |  | 			'fn' => 'p-name', | 
					
						
							|  |  |  |  | 			'url' => 'u-url', | 
					
						
							|  |  |  |  | 			'honorific-prefix' => 'p-honorific-prefix', | 
					
						
							|  |  |  |  | 			'given-name' => 'p-given-name', | 
					
						
							|  |  |  |  | 			'additional-name' => 'p-additional-name', | 
					
						
							|  |  |  |  | 			'family-name' => 'p-family-name', | 
					
						
							|  |  |  |  | 			'honorific-suffix' => 'p-honorific-suffix', | 
					
						
							|  |  |  |  | 			'nickname' => 'p-nickname', | 
					
						
							|  |  |  |  | 			'email' => 'u-email', | 
					
						
							|  |  |  |  | 			'logo' => 'u-logo', | 
					
						
							|  |  |  |  | 			'photo' => 'u-photo', | 
					
						
							|  |  |  |  | 			'url' => 'u-url', | 
					
						
							|  |  |  |  | 			'uid' => 'u-uid', | 
					
						
							|  |  |  |  | 			'category' => 'p-category', | 
					
						
							|  |  |  |  | 			'adr' => 'p-adr h-adr', | 
					
						
							|  |  |  |  | 			'extended-address' => 'p-extended-address', | 
					
						
							|  |  |  |  | 			'street-address' => 'p-street-address', | 
					
						
							|  |  |  |  | 			'locality' => 'p-locality', | 
					
						
							|  |  |  |  | 			'region' => 'p-region', | 
					
						
							|  |  |  |  | 			'postal-code' => 'p-postal-code', | 
					
						
							|  |  |  |  | 			'country-name' => 'p-country-name', | 
					
						
							|  |  |  |  | 			'label' => 'p-label', | 
					
						
							|  |  |  |  | 			'geo' => 'p-geo h-geo', | 
					
						
							|  |  |  |  | 			'latitude' => 'p-latitude', | 
					
						
							|  |  |  |  | 			'longitude' => 'p-longitude', | 
					
						
							|  |  |  |  | 			'tel' => 'p-tel', | 
					
						
							|  |  |  |  | 			'note' => 'p-note', | 
					
						
							|  |  |  |  | 			'bday' => 'dt-bday', | 
					
						
							|  |  |  |  | 			'key' => 'u-key', | 
					
						
							|  |  |  |  | 			'org' => 'p-org', | 
					
						
							|  |  |  |  | 			'organization-name' => 'p-organization-name', | 
					
						
							|  |  |  |  | 			'organization-unit' => 'p-organization-unit', | 
					
						
							|  |  |  |  | 		), | 
					
						
							|  |  |  |  | 		'hentry' => array( | 
					
						
							|  |  |  |  | 			'entry-title' => 'p-name', | 
					
						
							|  |  |  |  | 			'entry-summary' => 'p-summary', | 
					
						
							|  |  |  |  | 			'entry-content' => 'e-content', | 
					
						
							|  |  |  |  | 			'published' => 'dt-published', | 
					
						
							|  |  |  |  | 			'updated' => 'dt-updated', | 
					
						
							|  |  |  |  | 			'author' => 'p-author h-card', | 
					
						
							|  |  |  |  | 			'category' => 'p-category', | 
					
						
							|  |  |  |  | 			'geo' => 'p-geo h-geo', | 
					
						
							|  |  |  |  | 			'latitude' => 'p-latitude', | 
					
						
							|  |  |  |  | 			'longitude' => 'p-longitude', | 
					
						
							|  |  |  |  | 		), | 
					
						
							|  |  |  |  | 		'hrecipe' => array( | 
					
						
							|  |  |  |  | 			'fn' => 'p-name', | 
					
						
							|  |  |  |  | 			'ingredient' => 'p-ingredient', | 
					
						
							|  |  |  |  | 			'yield' => 'p-yield', | 
					
						
							|  |  |  |  | 			'instructions' => 'e-instructions', | 
					
						
							|  |  |  |  | 			'duration' => 'dt-duration', | 
					
						
							|  |  |  |  | 			'nutrition' => 'p-nutrition', | 
					
						
							|  |  |  |  | 			'photo' => 'u-photo', | 
					
						
							|  |  |  |  | 			'summary' => 'p-summary', | 
					
						
							|  |  |  |  | 			'author' => 'p-author h-card' | 
					
						
							|  |  |  |  | 		), | 
					
						
							|  |  |  |  | 		'hresume' => array( | 
					
						
							|  |  |  |  | 			'summary' => 'p-summary', | 
					
						
							|  |  |  |  | 			'contact' => 'h-card p-contact', | 
					
						
							|  |  |  |  | 			'education' => 'h-event p-education', | 
					
						
							|  |  |  |  | 			'experience' => 'h-event p-experience', | 
					
						
							|  |  |  |  | 			'skill' => 'p-skill', | 
					
						
							|  |  |  |  | 			'affiliation' => 'p-affiliation h-card', | 
					
						
							|  |  |  |  | 		), | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 		'vevent' => array( | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 			'dtstart' => 'dt-start', | 
					
						
							|  |  |  |  | 			'dtend' => 'dt-end', | 
					
						
							|  |  |  |  | 			'duration' => 'dt-duration', | 
					
						
							|  |  |  |  | 			'description' => 'p-description', | 
					
						
							|  |  |  |  | 			'summary' => 'p-summary', | 
					
						
							|  |  |  |  | 			'description' => 'p-description', | 
					
						
							|  |  |  |  | 			'url' => 'u-url', | 
					
						
							|  |  |  |  | 			'category' => 'p-category', | 
					
						
							|  |  |  |  | 			'location' => 'h-card', | 
					
						
							|  |  |  |  | 			'geo' => 'p-location h-geo' | 
					
						
							|  |  |  |  | 		), | 
					
						
							|  |  |  |  | 		'hreview' => array( | 
					
						
							|  |  |  |  | 			'summary' => 'p-name', | 
					
						
							|  |  |  |  | 			'fn' => 'p-item h-item p-name', // doesn’t work properly, see spec
 | 
					
						
							|  |  |  |  | 			'photo' => 'u-photo', // of the item being reviewed (p-item h-item u-photo)
 | 
					
						
							|  |  |  |  | 			'url' => 'u-url', // of the item being reviewed (p-item h-item u-url)
 | 
					
						
							|  |  |  |  | 			'reviewer' => 'p-reviewer p-author h-card', | 
					
						
							|  |  |  |  | 			'dtreviewed' => 'dt-reviewed', | 
					
						
							|  |  |  |  | 			'rating' => 'p-rating', | 
					
						
							|  |  |  |  | 			'best' => 'p-best', | 
					
						
							|  |  |  |  | 			'worst' => 'p-worst', | 
					
						
							|  |  |  |  | 			'description' => 'p-description' | 
					
						
							| 
									
										
										
										
											2014-09-25 08:39:05 +02:00
										 |  |  |  | 		), | 
					
						
							|  |  |  |  | 		'hproduct' => array( | 
					
						
							|  |  |  |  | 			'fn' => 'p-name', | 
					
						
							|  |  |  |  | 			'photo' => 'u-photo', | 
					
						
							|  |  |  |  | 			'brand' => 'p-brand', | 
					
						
							|  |  |  |  | 			'category' => 'p-category', | 
					
						
							|  |  |  |  | 			'description' => 'p-description', | 
					
						
							|  |  |  |  | 			'identifier' => 'u-identifier', | 
					
						
							|  |  |  |  | 			'url' => 'u-url', | 
					
						
							|  |  |  |  | 			'review' => 'p-review h-review', | 
					
						
							|  |  |  |  | 			'price' => 'p-price' | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 		) | 
					
						
							|  |  |  |  | 	); | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | function parseUriToComponents($uri) { | 
					
						
							|  |  |  |  | 	$result = array( | 
					
						
							|  |  |  |  | 		'scheme' => null, | 
					
						
							|  |  |  |  | 		'authority' => null, | 
					
						
							|  |  |  |  | 		'path' => null, | 
					
						
							|  |  |  |  | 		'query' => null, | 
					
						
							|  |  |  |  | 		'fragment' => null | 
					
						
							|  |  |  |  | 	); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	$u = @parse_url($uri); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	if(array_key_exists('scheme', $u)) | 
					
						
							|  |  |  |  | 		$result['scheme'] = $u['scheme']; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	if(array_key_exists('host', $u)) { | 
					
						
							|  |  |  |  | 		if(array_key_exists('user', $u)) | 
					
						
							|  |  |  |  | 			$result['authority'] = $u['user']; | 
					
						
							|  |  |  |  | 		if(array_key_exists('pass', $u)) | 
					
						
							|  |  |  |  | 			$result['authority'] .= ':' . $u['pass']; | 
					
						
							|  |  |  |  | 		if(array_key_exists('user', $u) || array_key_exists('pass', $u)) | 
					
						
							|  |  |  |  | 			$result['authority'] .= '@'; | 
					
						
							|  |  |  |  | 		$result['authority'] .= $u['host']; | 
					
						
							|  |  |  |  | 		if(array_key_exists('port', $u)) | 
					
						
							|  |  |  |  | 			$result['authority'] .= ':' . $u['port']; | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	if(array_key_exists('path', $u)) | 
					
						
							|  |  |  |  | 		$result['path'] = $u['path']; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	if(array_key_exists('query', $u)) | 
					
						
							|  |  |  |  | 		$result['query'] = $u['query']; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	if(array_key_exists('fragment', $u)) | 
					
						
							|  |  |  |  | 		$result['fragment'] = $u['fragment']; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	return $result; | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | function resolveUrl($baseURI, $referenceURI) { | 
					
						
							|  |  |  |  | 	$target = array( | 
					
						
							|  |  |  |  | 		'scheme' => null, | 
					
						
							|  |  |  |  | 		'authority' => null, | 
					
						
							|  |  |  |  | 		'path' => null, | 
					
						
							|  |  |  |  | 		'query' => null, | 
					
						
							|  |  |  |  | 		'fragment' => null | 
					
						
							|  |  |  |  | 	); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	# 5.2.1 Pre-parse the Base URI
 | 
					
						
							|  |  |  |  | 	# The base URI (Base) is established according to the procedure of
 | 
					
						
							|  |  |  |  |   # Section 5.1 and parsed into the five main components described in
 | 
					
						
							|  |  |  |  |   # Section 3
 | 
					
						
							|  |  |  |  | 	$base = parseUriToComponents($baseURI); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	# If base path is blank (http://example.com) then set it to /
 | 
					
						
							|  |  |  |  | 	# (I can't tell if this is actually in the RFC or not, but seems like it makes sense)
 | 
					
						
							|  |  |  |  | 	if($base['path'] == null) | 
					
						
							|  |  |  |  | 		$base['path'] = '/'; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	# 5.2.2. Transform References
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	# The URI reference is parsed into the five URI components
 | 
					
						
							|  |  |  |  | 	# (R.scheme, R.authority, R.path, R.query, R.fragment) = parse(R);
 | 
					
						
							|  |  |  |  | 	$reference = parseUriToComponents($referenceURI); | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	# A non-strict parser may ignore a scheme in the reference
 | 
					
						
							|  |  |  |  | 	# if it is identical to the base URI's scheme.
 | 
					
						
							|  |  |  |  | 	# TODO
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	if($reference['scheme']) { | 
					
						
							|  |  |  |  | 		$target['scheme'] = $reference['scheme']; | 
					
						
							|  |  |  |  | 		$target['authority'] = $reference['authority']; | 
					
						
							|  |  |  |  | 		$target['path'] = removeDotSegments($reference['path']); | 
					
						
							|  |  |  |  | 		$target['query'] = $reference['query']; | 
					
						
							|  |  |  |  | 	} else { | 
					
						
							|  |  |  |  | 		if($reference['authority']) { | 
					
						
							|  |  |  |  | 			$target['authority'] = $reference['authority']; | 
					
						
							|  |  |  |  | 			$target['path'] = removeDotSegments($reference['path']); | 
					
						
							|  |  |  |  | 			$target['query'] = $reference['query']; | 
					
						
							|  |  |  |  | 		} else { | 
					
						
							|  |  |  |  | 			if($reference['path'] == '') { | 
					
						
							|  |  |  |  | 				$target['path'] = $base['path']; | 
					
						
							|  |  |  |  | 				if($reference['query']) { | 
					
						
							|  |  |  |  | 					$target['query'] = $reference['query']; | 
					
						
							|  |  |  |  | 				} else { | 
					
						
							|  |  |  |  | 					$target['query'] = $base['query']; | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 			} else { | 
					
						
							|  |  |  |  | 				if(substr($reference['path'], 0, 1) == '/') { | 
					
						
							|  |  |  |  | 					$target['path'] = removeDotSegments($reference['path']); | 
					
						
							|  |  |  |  | 				} else { | 
					
						
							|  |  |  |  | 					$target['path'] = mergePaths($base, $reference); | 
					
						
							|  |  |  |  | 					$target['path'] = removeDotSegments($target['path']); | 
					
						
							|  |  |  |  | 				} | 
					
						
							|  |  |  |  | 				$target['query'] = $reference['query']; | 
					
						
							|  |  |  |  | 			} | 
					
						
							|  |  |  |  | 			$target['authority'] = $base['authority']; | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 		$target['scheme'] = $base['scheme']; | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 	$target['fragment'] = $reference['fragment']; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	# 5.3 Component Recomposition
 | 
					
						
							|  |  |  |  | 	$result = ''; | 
					
						
							|  |  |  |  | 	if($target['scheme']) { | 
					
						
							|  |  |  |  | 		$result .= $target['scheme'] . ':'; | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 	if($target['authority']) { | 
					
						
							|  |  |  |  | 		$result .= '//' . $target['authority']; | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 	$result .= $target['path']; | 
					
						
							|  |  |  |  | 	if($target['query']) { | 
					
						
							|  |  |  |  | 		$result .= '?' . $target['query']; | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 	if($target['fragment']) { | 
					
						
							|  |  |  |  | 		$result .= '#' . $target['fragment']; | 
					
						
							|  |  |  |  | 	} elseif($referenceURI == '#') { | 
					
						
							|  |  |  |  | 		$result .= '#'; | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 	return $result; | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # 5.2.3 Merge Paths
 | 
					
						
							|  |  |  |  | function mergePaths($base, $reference) { | 
					
						
							|  |  |  |  | 	# If the base URI has a defined authority component and an empty
 | 
					
						
							| 
									
										
										
										
											2015-08-15 11:48:39 +02:00
										 |  |  |  | 	#    path,
 | 
					
						
							| 
									
										
										
										
											2014-06-23 20:52:01 +02:00
										 |  |  |  | 	if($base['authority'] && $base['path'] == null) { | 
					
						
							|  |  |  |  | 		# then return a string consisting of "/" concatenated with the
 | 
					
						
							|  |  |  |  | 		# reference's path; otherwise,
 | 
					
						
							|  |  |  |  | 		$merged = '/' . $reference['path']; | 
					
						
							|  |  |  |  | 	} else { | 
					
						
							|  |  |  |  | 		if(($pos=strrpos($base['path'], '/')) !== false) { | 
					
						
							|  |  |  |  | 			# return a string consisting of the reference's path component
 | 
					
						
							|  |  |  |  | 			#    appended to all but the last segment of the base URI's path (i.e.,
 | 
					
						
							|  |  |  |  | 			#    excluding any characters after the right-most "/" in the base URI
 | 
					
						
							|  |  |  |  | 			#    path,
 | 
					
						
							|  |  |  |  | 			$merged = substr($base['path'], 0, $pos + 1) . $reference['path']; | 
					
						
							|  |  |  |  | 		} else { | 
					
						
							|  |  |  |  | 			#    or excluding the entire base URI path if it does not contain
 | 
					
						
							|  |  |  |  | 			#    any "/" characters).
 | 
					
						
							|  |  |  |  | 			$merged = $base['path']; | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 	return $merged; | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # 5.2.4.A Remove leading ../ or ./
 | 
					
						
							|  |  |  |  | function removeLeadingDotSlash(&$input) { | 
					
						
							|  |  |  |  | 	if(substr($input, 0, 3) == '../') { | 
					
						
							|  |  |  |  | 		$input = substr($input, 3); | 
					
						
							|  |  |  |  | 	} elseif(substr($input, 0, 2) == './') { | 
					
						
							|  |  |  |  | 		$input = substr($input, 2); | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # 5.2.4.B Replace leading /. with /
 | 
					
						
							|  |  |  |  | function removeLeadingSlashDot(&$input) { | 
					
						
							|  |  |  |  | 	if(substr($input, 0, 3) == '/./') { | 
					
						
							|  |  |  |  | 		$input = '/' . substr($input, 3); | 
					
						
							|  |  |  |  | 	} else { | 
					
						
							|  |  |  |  | 		$input = '/' . substr($input, 2); | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # 5.2.4.C Given leading /../ remove component from output buffer
 | 
					
						
							|  |  |  |  | function removeOneDirLevel(&$input, &$output) { | 
					
						
							|  |  |  |  | 	if(substr($input, 0, 4) == '/../') { | 
					
						
							|  |  |  |  | 		$input = '/' . substr($input, 4); | 
					
						
							|  |  |  |  | 	} else { | 
					
						
							|  |  |  |  | 		$input = '/' . substr($input, 3); | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 	$output = substr($output, 0, strrpos($output, '/')); | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # 5.2.4.D Remove . and .. if it's the only thing in the input
 | 
					
						
							|  |  |  |  | function removeLoneDotDot(&$input) { | 
					
						
							|  |  |  |  | 	if($input == '.') { | 
					
						
							|  |  |  |  | 		$input = substr($input, 1); | 
					
						
							|  |  |  |  | 	} else { | 
					
						
							|  |  |  |  | 		$input = substr($input, 2); | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # 5.2.4.E Move one segment from input to output
 | 
					
						
							|  |  |  |  | function moveOneSegmentFromInput(&$input, &$output) { | 
					
						
							|  |  |  |  | 	if(substr($input, 0, 1) != '/') { | 
					
						
							|  |  |  |  | 		$pos = strpos($input, '/'); | 
					
						
							|  |  |  |  | 	} else { | 
					
						
							|  |  |  |  | 		$pos = strpos($input, '/', 1); | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	if($pos === false) { | 
					
						
							|  |  |  |  | 		$output .= $input; | 
					
						
							|  |  |  |  | 		$input = ''; | 
					
						
							|  |  |  |  | 	} else { | 
					
						
							|  |  |  |  | 		$output .= substr($input, 0, $pos); | 
					
						
							|  |  |  |  | 		$input = substr($input, $pos); | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | # 5.2.4 Remove Dot Segments
 | 
					
						
							|  |  |  |  | function removeDotSegments($path) { | 
					
						
							|  |  |  |  | 	# 1.  The input buffer is initialized with the now-appended path
 | 
					
						
							|  |  |  |  | 	#     components and the output buffer is initialized to the empty
 | 
					
						
							|  |  |  |  | 	#     string.
 | 
					
						
							|  |  |  |  | 	$input = $path; | 
					
						
							|  |  |  |  | 	$output = ''; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	$step = 0; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	# 2.  While the input buffer is not empty, loop as follows:
 | 
					
						
							|  |  |  |  | 	while($input) { | 
					
						
							|  |  |  |  | 		$step++; | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 		if(substr($input, 0, 3) == '../' || substr($input, 0, 2) == './') { | 
					
						
							|  |  |  |  | 			#     A.  If the input buffer begins with a prefix of "../" or "./",
 | 
					
						
							|  |  |  |  | 			#         then remove that prefix from the input buffer; otherwise,
 | 
					
						
							|  |  |  |  | 			removeLeadingDotSlash($input); | 
					
						
							|  |  |  |  | 		} elseif(substr($input, 0, 3) == '/./' || $input == '/.') { | 
					
						
							|  |  |  |  | 			#     B.  if the input buffer begins with a prefix of "/./" or "/.",
 | 
					
						
							|  |  |  |  | 			#         where "." is a complete path segment, then replace that
 | 
					
						
							|  |  |  |  | 			#         prefix with "/" in the input buffer; otherwise,
 | 
					
						
							|  |  |  |  | 			removeLeadingSlashDot($input); | 
					
						
							|  |  |  |  | 		} elseif(substr($input, 0, 4) == '/../' || $input == '/..') { | 
					
						
							|  |  |  |  | 			#     C.  if the input buffer begins with a prefix of "/../" or "/..",
 | 
					
						
							|  |  |  |  | 			#          where ".." is a complete path segment, then replace that
 | 
					
						
							|  |  |  |  | 			#          prefix with "/" in the input buffer and remove the last
 | 
					
						
							|  |  |  |  | 			#          segment and its preceding "/" (if any) from the output
 | 
					
						
							|  |  |  |  | 			#          buffer; otherwise,
 | 
					
						
							|  |  |  |  | 			removeOneDirLevel($input, $output); | 
					
						
							|  |  |  |  | 		} elseif($input == '.' || $input == '..') { | 
					
						
							|  |  |  |  | 			#     D.  if the input buffer consists only of "." or "..", then remove
 | 
					
						
							|  |  |  |  | 			#         that from the input buffer; otherwise,
 | 
					
						
							|  |  |  |  | 			removeLoneDotDot($input); | 
					
						
							|  |  |  |  | 		} else { | 
					
						
							|  |  |  |  | 			#     E.  move the first path segment in the input buffer to the end of
 | 
					
						
							|  |  |  |  | 			#         the output buffer and any subsequent characters up to, but not including,
 | 
					
						
							|  |  |  |  | 			#         the next "/" character or the end of the input buffer
 | 
					
						
							|  |  |  |  | 			moveOneSegmentFromInput($input, $output); | 
					
						
							|  |  |  |  | 		} | 
					
						
							|  |  |  |  | 	} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 	return $output; | 
					
						
							|  |  |  |  | } |