forked from GNUsocial/gnu-social
Mf2 extlib update from https://github.com/indieweb/php-mf2/
This commit is contained in:
parent
b434243416
commit
c77bce12e5
@ -13,17 +13,17 @@ use stdClass;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse Microformats2
|
* Parse Microformats2
|
||||||
*
|
*
|
||||||
* Functional shortcut for the commonest cases of parsing microformats2 from HTML.
|
* Functional shortcut for the commonest cases of parsing microformats2 from HTML.
|
||||||
*
|
*
|
||||||
* Example usage:
|
* Example usage:
|
||||||
*
|
*
|
||||||
* use Mf2;
|
* use Mf2;
|
||||||
* $output = Mf2\parse('<span class="h-card">Barnaby Walters</span>');
|
* $output = Mf2\parse('<span class="h-card">Barnaby Walters</span>');
|
||||||
* echo json_encode($output, JSON_PRETTY_PRINT);
|
* echo json_encode($output, JSON_PRETTY_PRINT);
|
||||||
*
|
*
|
||||||
* Produces:
|
* Produces:
|
||||||
*
|
*
|
||||||
* {
|
* {
|
||||||
* "items": [
|
* "items": [
|
||||||
* {
|
* {
|
||||||
@ -35,7 +35,7 @@ use stdClass;
|
|||||||
* ],
|
* ],
|
||||||
* "rels": {}
|
* "rels": {}
|
||||||
* }
|
* }
|
||||||
*
|
*
|
||||||
* @param string|DOMDocument $input The HTML string or DOMDocument object to parse
|
* @param string|DOMDocument $input The HTML string or DOMDocument object to parse
|
||||||
* @param string $url The URL the input document was found at, for relative URL resolution
|
* @param string $url The URL the input document was found at, for relative URL resolution
|
||||||
* @param bool $convertClassic whether or not to convert classic microformats
|
* @param bool $convertClassic whether or not to convert classic microformats
|
||||||
@ -84,7 +84,7 @@ function fetch($url, $convertClassic = true, &$curlInfo=null) {
|
|||||||
/**
|
/**
|
||||||
* Unicode to HTML Entities
|
* Unicode to HTML Entities
|
||||||
* @param string $input String containing characters to convert into HTML entities
|
* @param string $input String containing characters to convert into HTML entities
|
||||||
* @return string
|
* @return string
|
||||||
*/
|
*/
|
||||||
function unicodeToHtmlEntities($input) {
|
function unicodeToHtmlEntities($input) {
|
||||||
return mb_convert_encoding($input, 'HTML-ENTITIES', mb_detect_encoding($input));
|
return mb_convert_encoding($input, 'HTML-ENTITIES', mb_detect_encoding($input));
|
||||||
@ -92,10 +92,10 @@ function unicodeToHtmlEntities($input) {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Collapse Whitespace
|
* Collapse Whitespace
|
||||||
*
|
*
|
||||||
* Collapses any sequences of whitespace within a string into a single space
|
* Collapses any sequences of whitespace within a string into a single space
|
||||||
* character.
|
* character.
|
||||||
*
|
*
|
||||||
* @deprecated since v0.2.3
|
* @deprecated since v0.2.3
|
||||||
* @param string $str
|
* @param string $str
|
||||||
* @return string
|
* @return string
|
||||||
@ -113,10 +113,10 @@ function unicodeTrim($str) {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Microformat Name From Class string
|
* Microformat Name From Class string
|
||||||
*
|
*
|
||||||
* Given the value of @class, get the relevant mf classnames (e.g. h-card,
|
* Given the value of @class, get the relevant mf classnames (e.g. h-card,
|
||||||
* p-name).
|
* p-name).
|
||||||
*
|
*
|
||||||
* @param string $class A space delimited list of classnames
|
* @param string $class A space delimited list of classnames
|
||||||
* @param string $prefix The prefix to look for
|
* @param string $prefix The prefix to look for
|
||||||
* @return string|array The prefixed name of the first microfomats class found or false
|
* @return string|array The prefixed name of the first microfomats class found or false
|
||||||
@ -127,9 +127,9 @@ function mfNamesFromClass($class, $prefix='h-') {
|
|||||||
$matches = array();
|
$matches = array();
|
||||||
|
|
||||||
foreach ($classes as $classname) {
|
foreach ($classes as $classname) {
|
||||||
$compare_classname = strtolower(' ' . $classname);
|
$compare_classname = ' ' . $classname;
|
||||||
$compare_prefix = strtolower(' ' . $prefix);
|
$compare_prefix = ' ' . $prefix;
|
||||||
if (stristr($compare_classname, $compare_prefix) !== false && ($compare_classname != $compare_prefix)) {
|
if (strstr($compare_classname, $compare_prefix) !== false && ($compare_classname != $compare_prefix)) {
|
||||||
$matches[] = ($prefix === 'h-') ? $classname : substr($classname, strlen($prefix));
|
$matches[] = ($prefix === 'h-') ? $classname : substr($classname, strlen($prefix));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -139,10 +139,10 @@ function mfNamesFromClass($class, $prefix='h-') {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Get Nested µf Property Name From Class
|
* Get Nested µf Property Name From Class
|
||||||
*
|
*
|
||||||
* Returns all the p-, u-, dt- or e- prefixed classnames it finds in a
|
* Returns all the p-, u-, dt- or e- prefixed classnames it finds in a
|
||||||
* space-separated string.
|
* space-separated string.
|
||||||
*
|
*
|
||||||
* @param string $class
|
* @param string $class
|
||||||
* @return array
|
* @return array
|
||||||
*/
|
*/
|
||||||
@ -153,19 +153,24 @@ function nestedMfPropertyNamesFromClass($class) {
|
|||||||
$class = str_replace(array(' ', ' ', "\n"), ' ', $class);
|
$class = str_replace(array(' ', ' ', "\n"), ' ', $class);
|
||||||
foreach (explode(' ', $class) as $classname) {
|
foreach (explode(' ', $class) as $classname) {
|
||||||
foreach ($prefixes as $prefix) {
|
foreach ($prefixes as $prefix) {
|
||||||
$compare_classname = strtolower(' ' . $classname);
|
// Check if $classname is a valid property classname for $prefix.
|
||||||
if (stristr($compare_classname, $prefix) && ($compare_classname != $prefix)) {
|
if (mb_substr($classname, 0, mb_strlen($prefix)) == $prefix && $classname != $prefix) {
|
||||||
$propertyNames = array_merge($propertyNames, mfNamesFromClass($classname, ltrim($prefix)));
|
$propertyName = mb_substr($classname, mb_strlen($prefix));
|
||||||
|
$propertyNames[$propertyName][] = $prefix;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
foreach ($propertyNames as $property => $prefixes) {
|
||||||
|
$propertyNames[$property] = array_unique($prefixes);
|
||||||
|
}
|
||||||
|
|
||||||
return $propertyNames;
|
return $propertyNames;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wraps mfNamesFromClass to handle an element as input (common)
|
* Wraps mfNamesFromClass to handle an element as input (common)
|
||||||
*
|
*
|
||||||
* @param DOMElement $e The element to get the classname for
|
* @param DOMElement $e The element to get the classname for
|
||||||
* @param string $prefix The prefix to look for
|
* @param string $prefix The prefix to look for
|
||||||
* @return mixed See return value of mf2\Parser::mfNameFromClass()
|
* @return mixed See return value of mf2\Parser::mfNameFromClass()
|
||||||
@ -192,28 +197,27 @@ function convertTimeFormat($time) {
|
|||||||
$hh = $mm = $ss = '';
|
$hh = $mm = $ss = '';
|
||||||
preg_match('/(\d{1,2}):?(\d{2})?:?(\d{2})?(a\.?m\.?|p\.?m\.?)?/i', $time, $matches);
|
preg_match('/(\d{1,2}):?(\d{2})?:?(\d{2})?(a\.?m\.?|p\.?m\.?)?/i', $time, $matches);
|
||||||
|
|
||||||
// if no am/pm specified
|
// If no am/pm is specified:
|
||||||
if (empty($matches[4])) {
|
if (empty($matches[4])) {
|
||||||
return $time;
|
return $time;
|
||||||
}
|
} else {
|
||||||
// else am/pm specified
|
// Otherwise, am/pm is specified.
|
||||||
else {
|
|
||||||
$meridiem = strtolower(str_replace('.', '', $matches[4]));
|
$meridiem = strtolower(str_replace('.', '', $matches[4]));
|
||||||
|
|
||||||
// hours
|
// Hours.
|
||||||
$hh = $matches[1];
|
$hh = $matches[1];
|
||||||
|
|
||||||
// add 12 to the pm hours
|
// Add 12 to hours if pm applies.
|
||||||
if ($meridiem == 'pm' && ($hh < 12)) {
|
if ($meridiem == 'pm' && ($hh < 12)) {
|
||||||
$hh += 12;
|
$hh += 12;
|
||||||
}
|
}
|
||||||
|
|
||||||
$hh = str_pad($hh, 2, '0', STR_PAD_LEFT);
|
$hh = str_pad($hh, 2, '0', STR_PAD_LEFT);
|
||||||
|
|
||||||
// minutes
|
// Minutes.
|
||||||
$mm = (empty($matches[2]) ) ? '00' : $matches[2];
|
$mm = (empty($matches[2]) ) ? '00' : $matches[2];
|
||||||
|
|
||||||
// seconds, only if supplied
|
// Seconds, only if supplied.
|
||||||
if (!empty($matches[3])) {
|
if (!empty($matches[3])) {
|
||||||
$ss = $matches[3];
|
$ss = $matches[3];
|
||||||
}
|
}
|
||||||
@ -229,11 +233,11 @@ function convertTimeFormat($time) {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Microformats2 Parser
|
* Microformats2 Parser
|
||||||
*
|
*
|
||||||
* A class which holds state for parsing microformats2 from HTML.
|
* A class which holds state for parsing microformats2 from HTML.
|
||||||
*
|
*
|
||||||
* Example usage:
|
* Example usage:
|
||||||
*
|
*
|
||||||
* use Mf2;
|
* use Mf2;
|
||||||
* $parser = new Mf2\Parser('<p class="h-card">Barnaby Walters</p>');
|
* $parser = new Mf2\Parser('<p class="h-card">Barnaby Walters</p>');
|
||||||
* $output = $parser->parse();
|
* $output = $parser->parse();
|
||||||
@ -244,18 +248,18 @@ class Parser {
|
|||||||
|
|
||||||
/** @var DOMXPath object which can be used to query over any fragment*/
|
/** @var DOMXPath object which can be used to query over any fragment*/
|
||||||
public $xpath;
|
public $xpath;
|
||||||
|
|
||||||
/** @var DOMDocument */
|
/** @var DOMDocument */
|
||||||
public $doc;
|
public $doc;
|
||||||
|
|
||||||
/** @var SplObjectStorage */
|
/** @var SplObjectStorage */
|
||||||
protected $parsed;
|
protected $parsed;
|
||||||
|
|
||||||
public $jsonMode;
|
public $jsonMode;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor
|
* Constructor
|
||||||
*
|
*
|
||||||
* @param DOMDocument|string $input The data to parse. A string of HTML or a DOMDocument
|
* @param DOMDocument|string $input The data to parse. A string of HTML or a DOMDocument
|
||||||
* @param string $url The URL of the parsed document, for relative URL resolution
|
* @param string $url The URL of the parsed document, for relative URL resolution
|
||||||
* @param boolean $jsonMode Whether or not to use a stdClass instance for an empty `rels` dictionary. This breaks PHP looping over rels, but allows the output to be correctly serialized as JSON.
|
* @param boolean $jsonMode Whether or not to use a stdClass instance for an empty `rels` dictionary. This breaks PHP looping over rels, but allows the output to be correctly serialized as JSON.
|
||||||
@ -271,20 +275,20 @@ class Parser {
|
|||||||
$doc = new DOMDocument();
|
$doc = new DOMDocument();
|
||||||
@$doc->loadHTML('');
|
@$doc->loadHTML('');
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->xpath = new DOMXPath($doc);
|
$this->xpath = new DOMXPath($doc);
|
||||||
|
|
||||||
$baseurl = $url;
|
$baseurl = $url;
|
||||||
foreach ($this->xpath->query('//base[@href]') as $base) {
|
foreach ($this->xpath->query('//base[@href]') as $base) {
|
||||||
$baseElementUrl = $base->getAttribute('href');
|
$baseElementUrl = $base->getAttribute('href');
|
||||||
|
|
||||||
if (parse_url($baseElementUrl, PHP_URL_SCHEME) === null) {
|
if (parse_url($baseElementUrl, PHP_URL_SCHEME) === null) {
|
||||||
/* The base element URL is relative to the document URL.
|
/* The base element URL is relative to the document URL.
|
||||||
*
|
*
|
||||||
* :/
|
* :/
|
||||||
*
|
*
|
||||||
* Perhaps the author was high? */
|
* Perhaps the author was high? */
|
||||||
|
|
||||||
$baseurl = resolveUrl($url, $baseElementUrl);
|
$baseurl = resolveUrl($url, $baseElementUrl);
|
||||||
} else {
|
} else {
|
||||||
$baseurl = $baseElementUrl;
|
$baseurl = $baseElementUrl;
|
||||||
@ -296,31 +300,31 @@ class Parser {
|
|||||||
foreach ($this->xpath->query('//template') as $templateEl) {
|
foreach ($this->xpath->query('//template') as $templateEl) {
|
||||||
$templateEl->parentNode->removeChild($templateEl);
|
$templateEl->parentNode->removeChild($templateEl);
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->baseurl = $baseurl;
|
$this->baseurl = $baseurl;
|
||||||
$this->doc = $doc;
|
$this->doc = $doc;
|
||||||
$this->parsed = new SplObjectStorage();
|
$this->parsed = new SplObjectStorage();
|
||||||
$this->jsonMode = $jsonMode;
|
$this->jsonMode = $jsonMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function elementPrefixParsed(\DOMElement $e, $prefix) {
|
private function elementPrefixParsed(\DOMElement $e, $prefix) {
|
||||||
if (!$this->parsed->contains($e))
|
if (!$this->parsed->contains($e))
|
||||||
$this->parsed->attach($e, array());
|
$this->parsed->attach($e, array());
|
||||||
|
|
||||||
$prefixes = $this->parsed[$e];
|
$prefixes = $this->parsed[$e];
|
||||||
$prefixes[] = $prefix;
|
$prefixes[] = $prefix;
|
||||||
$this->parsed[$e] = $prefixes;
|
$this->parsed[$e] = $prefixes;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function isElementParsed(\DOMElement $e, $prefix) {
|
private function isElementParsed(\DOMElement $e, $prefix) {
|
||||||
if (!$this->parsed->contains($e))
|
if (!$this->parsed->contains($e))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
$prefixes = $this->parsed[$e];
|
$prefixes = $this->parsed[$e];
|
||||||
|
|
||||||
if (!in_array($prefix, $prefixes))
|
if (!in_array($prefix, $prefixes))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -352,72 +356,72 @@ class Parser {
|
|||||||
|
|
||||||
// TODO: figure out if this has problems with sms: and geo: URLs
|
// TODO: figure out if this has problems with sms: and geo: URLs
|
||||||
public function resolveUrl($url) {
|
public function resolveUrl($url) {
|
||||||
// If the URL is seriously malformed it’s probably beyond the scope of this
|
// If the URL is seriously malformed it’s probably beyond the scope of this
|
||||||
// parser to try to do anything with it.
|
// parser to try to do anything with it.
|
||||||
if (parse_url($url) === false)
|
if (parse_url($url) === false)
|
||||||
return $url;
|
return $url;
|
||||||
|
|
||||||
$scheme = parse_url($url, PHP_URL_SCHEME);
|
$scheme = parse_url($url, PHP_URL_SCHEME);
|
||||||
|
|
||||||
if (empty($scheme) and !empty($this->baseurl)) {
|
if (empty($scheme) and !empty($this->baseurl)) {
|
||||||
return resolveUrl($this->baseurl, $url);
|
return resolveUrl($this->baseurl, $url);
|
||||||
} else {
|
} else {
|
||||||
return $url;
|
return $url;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parsing Functions
|
// Parsing Functions
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse value-class/value-title on an element, joining with $separator if
|
* Parse value-class/value-title on an element, joining with $separator if
|
||||||
* there are multiple.
|
* there are multiple.
|
||||||
*
|
*
|
||||||
* @param \DOMElement $e
|
* @param \DOMElement $e
|
||||||
* @param string $separator = '' if multiple value-title elements, join with this string
|
* @param string $separator = '' if multiple value-title elements, join with this string
|
||||||
* @return string|null the parsed value or null if value-class or -title aren’t in use
|
* @return string|null the parsed value or null if value-class or -title aren’t in use
|
||||||
*/
|
*/
|
||||||
public function parseValueClassTitle(\DOMElement $e, $separator = '') {
|
public function parseValueClassTitle(\DOMElement $e, $separator = '') {
|
||||||
$valueClassElements = $this->xpath->query('./*[contains(concat(" ", @class, " "), " value ")]', $e);
|
$valueClassElements = $this->xpath->query('./*[contains(concat(" ", @class, " "), " value ")]', $e);
|
||||||
|
|
||||||
if ($valueClassElements->length !== 0) {
|
if ($valueClassElements->length !== 0) {
|
||||||
// Process value-class stuff
|
// Process value-class stuff
|
||||||
$val = '';
|
$val = '';
|
||||||
foreach ($valueClassElements as $el) {
|
foreach ($valueClassElements as $el) {
|
||||||
$val .= $this->textContent($el);
|
$val .= $this->textContent($el);
|
||||||
}
|
}
|
||||||
|
|
||||||
return unicodeTrim($val);
|
return unicodeTrim($val);
|
||||||
}
|
}
|
||||||
|
|
||||||
$valueTitleElements = $this->xpath->query('./*[contains(concat(" ", @class, " "), " value-title ")]', $e);
|
$valueTitleElements = $this->xpath->query('./*[contains(concat(" ", @class, " "), " value-title ")]', $e);
|
||||||
|
|
||||||
if ($valueTitleElements->length !== 0) {
|
if ($valueTitleElements->length !== 0) {
|
||||||
// Process value-title stuff
|
// Process value-title stuff
|
||||||
$val = '';
|
$val = '';
|
||||||
foreach ($valueTitleElements as $el) {
|
foreach ($valueTitleElements as $el) {
|
||||||
$val .= $el->getAttribute('title');
|
$val .= $el->getAttribute('title');
|
||||||
}
|
}
|
||||||
|
|
||||||
return unicodeTrim($val);
|
return unicodeTrim($val);
|
||||||
}
|
}
|
||||||
|
|
||||||
// No value-title or -class in this element
|
// No value-title or -class in this element
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given an element with class="p-*", get it’s value
|
* Given an element with class="p-*", get it’s value
|
||||||
*
|
*
|
||||||
* @param DOMElement $p The element to parse
|
* @param DOMElement $p The element to parse
|
||||||
* @return string The plaintext value of $p, dependant on type
|
* @return string The plaintext value of $p, dependant on type
|
||||||
* @todo Make this adhere to value-class
|
* @todo Make this adhere to value-class
|
||||||
*/
|
*/
|
||||||
public function parseP(\DOMElement $p) {
|
public function parseP(\DOMElement $p) {
|
||||||
$classTitle = $this->parseValueClassTitle($p, ' ');
|
$classTitle = $this->parseValueClassTitle($p, ' ');
|
||||||
|
|
||||||
if ($classTitle !== null)
|
if ($classTitle !== null)
|
||||||
return $classTitle;
|
return $classTitle;
|
||||||
|
|
||||||
if ($p->tagName == 'img' and $p->getAttribute('alt') !== '') {
|
if ($p->tagName == 'img' and $p->getAttribute('alt') !== '') {
|
||||||
$pValue = $p->getAttribute('alt');
|
$pValue = $p->getAttribute('alt');
|
||||||
} elseif ($p->tagName == 'area' and $p->getAttribute('alt') !== '') {
|
} elseif ($p->tagName == 'area' and $p->getAttribute('alt') !== '') {
|
||||||
@ -429,13 +433,13 @@ class Parser {
|
|||||||
} else {
|
} else {
|
||||||
$pValue = unicodeTrim($this->textContent($p));
|
$pValue = unicodeTrim($this->textContent($p));
|
||||||
}
|
}
|
||||||
|
|
||||||
return $pValue;
|
return $pValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given an element with class="u-*", get the value of the URL
|
* Given an element with class="u-*", get the value of the URL
|
||||||
*
|
*
|
||||||
* @param DOMElement $u The element to parse
|
* @param DOMElement $u The element to parse
|
||||||
* @return string The plaintext value of $u, dependant on type
|
* @return string The plaintext value of $u, dependant on type
|
||||||
* @todo make this adhere to value-class
|
* @todo make this adhere to value-class
|
||||||
@ -443,18 +447,18 @@ class Parser {
|
|||||||
public function parseU(\DOMElement $u) {
|
public function parseU(\DOMElement $u) {
|
||||||
if (($u->tagName == 'a' or $u->tagName == 'area') and $u->getAttribute('href') !== null) {
|
if (($u->tagName == 'a' or $u->tagName == 'area') and $u->getAttribute('href') !== null) {
|
||||||
$uValue = $u->getAttribute('href');
|
$uValue = $u->getAttribute('href');
|
||||||
} elseif ($u->tagName == 'img' and $u->getAttribute('src') !== null) {
|
} elseif (in_array($u->tagName, array('img', 'audio', 'video', 'source')) and $u->getAttribute('src') !== null) {
|
||||||
$uValue = $u->getAttribute('src');
|
$uValue = $u->getAttribute('src');
|
||||||
} elseif ($u->tagName == 'object' and $u->getAttribute('data') !== null) {
|
} elseif ($u->tagName == 'object' and $u->getAttribute('data') !== null) {
|
||||||
$uValue = $u->getAttribute('data');
|
$uValue = $u->getAttribute('data');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isset($uValue)) {
|
if (isset($uValue)) {
|
||||||
return $this->resolveUrl($uValue);
|
return $this->resolveUrl($uValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
$classTitle = $this->parseValueClassTitle($u);
|
$classTitle = $this->parseValueClassTitle($u);
|
||||||
|
|
||||||
if ($classTitle !== null) {
|
if ($classTitle !== null) {
|
||||||
return $classTitle;
|
return $classTitle;
|
||||||
} elseif ($u->tagName == 'abbr' and $u->getAttribute('title') !== null) {
|
} elseif ($u->tagName == 'abbr' and $u->getAttribute('title') !== null) {
|
||||||
@ -468,7 +472,7 @@ class Parser {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Given an element with class="dt-*", get the value of the datetime as a php date object
|
* Given an element with class="dt-*", get the value of the datetime as a php date object
|
||||||
*
|
*
|
||||||
* @param DOMElement $dt The element to parse
|
* @param DOMElement $dt The element to parse
|
||||||
* @param array $dates Array of dates processed so far
|
* @param array $dates Array of dates processed so far
|
||||||
* @return string The datetime string found
|
* @return string The datetime string found
|
||||||
@ -477,11 +481,11 @@ class Parser {
|
|||||||
// Check for value-class pattern
|
// Check for value-class pattern
|
||||||
$valueClassChildren = $this->xpath->query('./*[contains(concat(" ", @class, " "), " value ") or contains(concat(" ", @class, " "), " value-title ")]', $dt);
|
$valueClassChildren = $this->xpath->query('./*[contains(concat(" ", @class, " "), " value ") or contains(concat(" ", @class, " "), " value-title ")]', $dt);
|
||||||
$dtValue = false;
|
$dtValue = false;
|
||||||
|
|
||||||
if ($valueClassChildren->length > 0) {
|
if ($valueClassChildren->length > 0) {
|
||||||
// They’re using value-class
|
// They’re using value-class
|
||||||
$dateParts = array();
|
$dateParts = array();
|
||||||
|
|
||||||
foreach ($valueClassChildren as $e) {
|
foreach ($valueClassChildren as $e) {
|
||||||
if (strstr(' ' . $e->getAttribute('class') . ' ', ' value-title ')) {
|
if (strstr(' ' . $e->getAttribute('class') . ' ', ' value-title ')) {
|
||||||
$title = $e->getAttribute('title');
|
$title = $e->getAttribute('title');
|
||||||
@ -591,16 +595,16 @@ class Parser {
|
|||||||
$dtValue = $dt->nodeValue;
|
$dtValue = $dt->nodeValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( preg_match('/(\d{4}-\d{2}-\d{2})/', $dtValue, $matches) ) {
|
if (preg_match('/(\d{4}-\d{2}-\d{2})/', $dtValue, $matches)) {
|
||||||
$dates[] = $matches[0];
|
$dates[] = $matches[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* if $dtValue is only a time and there are recently parsed dates,
|
* if $dtValue is only a time and there are recently parsed dates,
|
||||||
* form the full date-time using the most recnetly parsed dt- value
|
* form the full date-time using the most recently parsed dt- value
|
||||||
*/
|
*/
|
||||||
if ( (preg_match('/^\d{1,2}:\d{1,2}(Z?[+|-]\d{2}:?\d{2})?/', $dtValue) or preg_match('/^\d{1,2}[a|p]m/', $dtValue)) && !empty($dates) ) {
|
if ((preg_match('/^\d{1,2}:\d{1,2}(Z?[+|-]\d{2}:?\d{2})?/', $dtValue) or preg_match('/^\d{1,2}[a|p]m/', $dtValue)) && !empty($dates)) {
|
||||||
$dtValue = convertTimeFormat($dtValue);
|
$dtValue = convertTimeFormat($dtValue);
|
||||||
$dtValue = end($dates) . 'T' . unicodeTrim($dtValue, 'T');
|
$dtValue = end($dates) . 'T' . unicodeTrim($dtValue, 'T');
|
||||||
}
|
}
|
||||||
@ -613,15 +617,15 @@ class Parser {
|
|||||||
*
|
*
|
||||||
* @param DOMElement $e The element to parse
|
* @param DOMElement $e The element to parse
|
||||||
* @return string $e’s innerHTML
|
* @return string $e’s innerHTML
|
||||||
*
|
*
|
||||||
* @todo need to mark this element as e- parsed so it doesn’t get parsed as it’s parent’s e-* too
|
* @todo need to mark this element as e- parsed so it doesn’t get parsed as it’s parent’s e-* too
|
||||||
*/
|
*/
|
||||||
public function parseE(\DOMElement $e) {
|
public function parseE(\DOMElement $e) {
|
||||||
$classTitle = $this->parseValueClassTitle($e);
|
$classTitle = $this->parseValueClassTitle($e);
|
||||||
|
|
||||||
if ($classTitle !== null)
|
if ($classTitle !== null)
|
||||||
return $classTitle;
|
return $classTitle;
|
||||||
|
|
||||||
// Expand relative URLs within children of this element
|
// Expand relative URLs within children of this element
|
||||||
// TODO: as it is this is not relative to only children, make this .// and rerun tests
|
// TODO: as it is this is not relative to only children, make this .// and rerun tests
|
||||||
$this->resolveChildUrls($e);
|
$this->resolveChildUrls($e);
|
||||||
@ -630,7 +634,7 @@ class Parser {
|
|||||||
foreach ($e->childNodes as $node) {
|
foreach ($e->childNodes as $node) {
|
||||||
$html .= $node->C14N();
|
$html .= $node->C14N();
|
||||||
}
|
}
|
||||||
|
|
||||||
return array(
|
return array(
|
||||||
'html' => $html,
|
'html' => $html,
|
||||||
'value' => unicodeTrim($this->textContent($e))
|
'value' => unicodeTrim($this->textContent($e))
|
||||||
@ -639,7 +643,7 @@ class Parser {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Recursively parse microformats
|
* Recursively parse microformats
|
||||||
*
|
*
|
||||||
* @param DOMElement $e The element to parse
|
* @param DOMElement $e The element to parse
|
||||||
* @return array A representation of the values contained within microformat $e
|
* @return array A representation of the values contained within microformat $e
|
||||||
*/
|
*/
|
||||||
@ -660,26 +664,39 @@ class Parser {
|
|||||||
foreach ($this->xpath->query('.//*[contains(concat(" ", @class)," h-")]', $e) as $subMF) {
|
foreach ($this->xpath->query('.//*[contains(concat(" ", @class)," h-")]', $e) as $subMF) {
|
||||||
// Parse
|
// Parse
|
||||||
$result = $this->parseH($subMF);
|
$result = $this->parseH($subMF);
|
||||||
|
|
||||||
// If result was already parsed, skip it
|
// If result was already parsed, skip it
|
||||||
if (null === $result)
|
if (null === $result)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
// In most cases, the value attribute of the nested microformat should be the p- parsed value of the elemnt.
|
||||||
|
// The only times this is different is when the microformat is nested under certain prefixes, which are handled below.
|
||||||
$result['value'] = $this->parseP($subMF);
|
$result['value'] = $this->parseP($subMF);
|
||||||
|
|
||||||
// Does this µf have any property names other than h-*?
|
// Does this µf have any property names other than h-*?
|
||||||
$properties = nestedMfPropertyNamesFromElement($subMF);
|
$properties = nestedMfPropertyNamesFromElement($subMF);
|
||||||
|
|
||||||
if (!empty($properties)) {
|
if (!empty($properties)) {
|
||||||
// Yes! It’s a nested property µf
|
// Yes! It’s a nested property µf
|
||||||
foreach ($properties as $property) {
|
foreach ($properties as $property => $prefixes) {
|
||||||
$return[$property][] = $result;
|
// Note: handling microformat nesting under multiple conflicting prefixes is not currently specified by the mf2 parsing spec.
|
||||||
|
$prefixSpecificResult = $result;
|
||||||
|
if (in_array('p-', $prefixes)) {
|
||||||
|
$prefixSpecificResult['value'] = $prefixSpecificResult['properties']['name'][0];
|
||||||
|
} elseif (in_array('e-', $prefixes)) {
|
||||||
|
$eParsedResult = $this->parseE($subMF);
|
||||||
|
$prefixSpecificResult['html'] = $eParsedResult['html'];
|
||||||
|
$prefixSpecificResult['value'] = $eParsedResult['value'];
|
||||||
|
} elseif (in_array('u-', $prefixes)) {
|
||||||
|
$prefixSpecificResult['value'] = $this->parseU($subMF);
|
||||||
|
}
|
||||||
|
$return[$property][] = $prefixSpecificResult;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// No, it’s a child µf
|
// No, it’s a child µf
|
||||||
$children[] = $result;
|
$children[] = $result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure this sub-mf won’t get parsed as a µf or property
|
// Make sure this sub-mf won’t get parsed as a µf or property
|
||||||
// TODO: Determine if clearing this is required?
|
// TODO: Determine if clearing this is required?
|
||||||
$this->elementPrefixParsed($subMF, 'h');
|
$this->elementPrefixParsed($subMF, 'h');
|
||||||
@ -689,19 +706,24 @@ class Parser {
|
|||||||
$this->elementPrefixParsed($subMF, 'e');
|
$this->elementPrefixParsed($subMF, 'e');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if($e->tagName == 'area') {
|
||||||
|
$coords = $e->getAttribute('coords');
|
||||||
|
$shape = $e->getAttribute('shape');
|
||||||
|
}
|
||||||
|
|
||||||
// Handle p-*
|
// Handle p-*
|
||||||
foreach ($this->xpath->query('.//*[contains(concat(" ", @class) ," p-")]', $e) as $p) {
|
foreach ($this->xpath->query('.//*[contains(concat(" ", @class) ," p-")]', $e) as $p) {
|
||||||
if ($this->isElementParsed($p, 'p'))
|
if ($this->isElementParsed($p, 'p'))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
$pValue = $this->parseP($p);
|
$pValue = $this->parseP($p);
|
||||||
|
|
||||||
// Add the value to the array for it’s p- properties
|
// Add the value to the array for it’s p- properties
|
||||||
foreach (mfNamesFromElement($p, 'p-') as $propName) {
|
foreach (mfNamesFromElement($p, 'p-') as $propName) {
|
||||||
if (!empty($propName))
|
if (!empty($propName))
|
||||||
$return[$propName][] = $pValue;
|
$return[$propName][] = $pValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure this sub-mf won’t get parsed as a top level mf
|
// Make sure this sub-mf won’t get parsed as a top level mf
|
||||||
$this->elementPrefixParsed($p, 'p');
|
$this->elementPrefixParsed($p, 'p');
|
||||||
}
|
}
|
||||||
@ -710,32 +732,32 @@ class Parser {
|
|||||||
foreach ($this->xpath->query('.//*[contains(concat(" ", @class)," u-")]', $e) as $u) {
|
foreach ($this->xpath->query('.//*[contains(concat(" ", @class)," u-")]', $e) as $u) {
|
||||||
if ($this->isElementParsed($u, 'u'))
|
if ($this->isElementParsed($u, 'u'))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
$uValue = $this->parseU($u);
|
$uValue = $this->parseU($u);
|
||||||
|
|
||||||
// Add the value to the array for it’s property types
|
// Add the value to the array for it’s property types
|
||||||
foreach (mfNamesFromElement($u, 'u-') as $propName) {
|
foreach (mfNamesFromElement($u, 'u-') as $propName) {
|
||||||
$return[$propName][] = $uValue;
|
$return[$propName][] = $uValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure this sub-mf won’t get parsed as a top level mf
|
// Make sure this sub-mf won’t get parsed as a top level mf
|
||||||
$this->elementPrefixParsed($u, 'u');
|
$this->elementPrefixParsed($u, 'u');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle dt-*
|
// Handle dt-*
|
||||||
foreach ($this->xpath->query('.//*[contains(concat(" ", @class), " dt-")]', $e) as $dt) {
|
foreach ($this->xpath->query('.//*[contains(concat(" ", @class), " dt-")]', $e) as $dt) {
|
||||||
if ($this->isElementParsed($dt, 'dt'))
|
if ($this->isElementParsed($dt, 'dt'))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
$dtValue = $this->parseDT($dt, $dates);
|
$dtValue = $this->parseDT($dt, $dates);
|
||||||
|
|
||||||
if ($dtValue) {
|
if ($dtValue) {
|
||||||
// Add the value to the array for dt- properties
|
// Add the value to the array for dt- properties
|
||||||
foreach (mfNamesFromElement($dt, 'dt-') as $propName) {
|
foreach (mfNamesFromElement($dt, 'dt-') as $propName) {
|
||||||
$return[$propName][] = $dtValue;
|
$return[$propName][] = $dtValue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure this sub-mf won’t get parsed as a top level mf
|
// Make sure this sub-mf won’t get parsed as a top level mf
|
||||||
$this->elementPrefixParsed($dt, 'dt');
|
$this->elementPrefixParsed($dt, 'dt');
|
||||||
}
|
}
|
||||||
@ -762,22 +784,43 @@ class Parser {
|
|||||||
if (!array_key_exists('name', $return)) {
|
if (!array_key_exists('name', $return)) {
|
||||||
try {
|
try {
|
||||||
// Look for img @alt
|
// Look for img @alt
|
||||||
if ($e->tagName == 'img' and $e->getAttribute('alt') != '')
|
if (($e->tagName == 'img' or $e->tagName == 'area') and $e->getAttribute('alt') != '')
|
||||||
throw new Exception($e->getAttribute('alt'));
|
throw new Exception($e->getAttribute('alt'));
|
||||||
|
|
||||||
if ($e->tagName == 'abbr' and $e->hasAttribute('title'))
|
if ($e->tagName == 'abbr' and $e->hasAttribute('title'))
|
||||||
throw new Exception($e->getAttribute('title'));
|
throw new Exception($e->getAttribute('title'));
|
||||||
|
|
||||||
// Look for nested img @alt
|
// Look for nested img @alt
|
||||||
foreach ($this->xpath->query('./img[count(preceding-sibling::*)+count(following-sibling::*)=0]', $e) as $em) {
|
foreach ($this->xpath->query('./img[count(preceding-sibling::*)+count(following-sibling::*)=0]', $e) as $em) {
|
||||||
if ($em->getAttribute('alt') != '')
|
$emNames = mfNamesFromElement($em, 'h-');
|
||||||
|
if (empty($emNames) && $em->getAttribute('alt') != '') {
|
||||||
throw new Exception($em->getAttribute('alt'));
|
throw new Exception($em->getAttribute('alt'));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Look for nested area @alt
|
||||||
|
foreach ($this->xpath->query('./area[count(preceding-sibling::*)+count(following-sibling::*)=0]', $e) as $em) {
|
||||||
|
$emNames = mfNamesFromElement($em, 'h-');
|
||||||
|
if (empty($emNames) && $em->getAttribute('alt') != '') {
|
||||||
|
throw new Exception($em->getAttribute('alt'));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Look for double nested img @alt
|
// Look for double nested img @alt
|
||||||
foreach ($this->xpath->query('./*[count(preceding-sibling::*)+count(following-sibling::*)=0]/img[count(preceding-sibling::*)+count(following-sibling::*)=0]', $e) as $em) {
|
foreach ($this->xpath->query('./*[count(preceding-sibling::*)+count(following-sibling::*)=0]/img[count(preceding-sibling::*)+count(following-sibling::*)=0]', $e) as $em) {
|
||||||
if ($em->getAttribute('alt') != '')
|
$emNames = mfNamesFromElement($em, 'h-');
|
||||||
|
if (empty($emNames) && $em->getAttribute('alt') != '') {
|
||||||
throw new Exception($em->getAttribute('alt'));
|
throw new Exception($em->getAttribute('alt'));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look for double nested img @alt
|
||||||
|
foreach ($this->xpath->query('./*[count(preceding-sibling::*)+count(following-sibling::*)=0]/area[count(preceding-sibling::*)+count(following-sibling::*)=0]', $e) as $em) {
|
||||||
|
$emNames = mfNamesFromElement($em, 'h-');
|
||||||
|
if (empty($emNames) && $em->getAttribute('alt') != '') {
|
||||||
|
throw new Exception($em->getAttribute('alt'));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new Exception($e->nodeValue);
|
throw new Exception($e->nodeValue);
|
||||||
@ -812,36 +855,58 @@ class Parser {
|
|||||||
// Check for u-url
|
// Check for u-url
|
||||||
if (!array_key_exists('url', $return)) {
|
if (!array_key_exists('url', $return)) {
|
||||||
// Look for img @src
|
// Look for img @src
|
||||||
if ($e->tagName == 'a')
|
if ($e->tagName == 'a' or $e->tagName == 'area')
|
||||||
$url = $e->getAttribute('href');
|
$url = $e->getAttribute('href');
|
||||||
|
|
||||||
// Look for nested img @src
|
// Look for nested a @href
|
||||||
foreach ($this->xpath->query('./a[count(preceding-sibling::a)+count(following-sibling::a)=0]', $e) as $em) {
|
foreach ($this->xpath->query('./a[count(preceding-sibling::a)+count(following-sibling::a)=0]', $e) as $em) {
|
||||||
$url = $em->getAttribute('href');
|
$emNames = mfNamesFromElement($em, 'h-');
|
||||||
break;
|
if (empty($emNames)) {
|
||||||
|
$url = $em->getAttribute('href');
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Look for nested area @src
|
||||||
|
foreach ($this->xpath->query('./area[count(preceding-sibling::area)+count(following-sibling::area)=0]', $e) as $em) {
|
||||||
|
$emNames = mfNamesFromElement($em, 'h-');
|
||||||
|
if (empty($emNames)) {
|
||||||
|
$url = $em->getAttribute('href');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!empty($url))
|
if (!empty($url))
|
||||||
$return['url'][] = $this->resolveUrl($url);
|
$return['url'][] = $this->resolveUrl($url);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure things are in alphabetical order
|
// Make sure things are in alphabetical order
|
||||||
sort($mfTypes);
|
sort($mfTypes);
|
||||||
|
|
||||||
// Phew. Return the final result.
|
// Phew. Return the final result.
|
||||||
$parsed = array(
|
$parsed = array(
|
||||||
'type' => $mfTypes,
|
'type' => $mfTypes,
|
||||||
'properties' => $return
|
'properties' => $return
|
||||||
);
|
);
|
||||||
if (!empty($children))
|
|
||||||
|
if (!empty($shape)) {
|
||||||
|
$parsed['shape'] = $shape;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!empty($coords)) {
|
||||||
|
$parsed['coords'] = $coords;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!empty($children)) {
|
||||||
$parsed['children'] = array_values(array_filter($children));
|
$parsed['children'] = array_values(array_filter($children));
|
||||||
|
}
|
||||||
return $parsed;
|
return $parsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse Rels and Alternatives
|
* Parse Rels and Alternatives
|
||||||
*
|
*
|
||||||
* Returns [$rels, $alternatives]. If the $rels value is to be empty, i.e. there are no links on the page
|
* Returns [$rels, $alternatives]. If the $rels value is to be empty, i.e. there are no links on the page
|
||||||
* with a rel value *not* containing `alternate`, then the type of $rels depends on $this->jsonMode. If set
|
* with a rel value *not* containing `alternate`, then the type of $rels depends on $this->jsonMode. If set
|
||||||
* to true, it will be a stdClass instance, optimising for JSON serialisation. Otherwise (the default case),
|
* to true, it will be a stdClass instance, optimising for JSON serialisation. Otherwise (the default case),
|
||||||
* it will be an empty array.
|
* it will be an empty array.
|
||||||
@ -849,18 +914,18 @@ class Parser {
|
|||||||
public function parseRelsAndAlternates() {
|
public function parseRelsAndAlternates() {
|
||||||
$rels = array();
|
$rels = array();
|
||||||
$alternates = array();
|
$alternates = array();
|
||||||
|
|
||||||
// Iterate through all a, area and link elements with rel attributes
|
// Iterate through all a, area and link elements with rel attributes
|
||||||
foreach ($this->xpath->query('//*[@rel and @href]') as $hyperlink) {
|
foreach ($this->xpath->query('//*[@rel and @href]') as $hyperlink) {
|
||||||
if ($hyperlink->getAttribute('rel') == '')
|
if ($hyperlink->getAttribute('rel') == '')
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Resolve the href
|
// Resolve the href
|
||||||
$href = $this->resolveUrl($hyperlink->getAttribute('href'));
|
$href = $this->resolveUrl($hyperlink->getAttribute('href'));
|
||||||
|
|
||||||
// Split up the rel into space-separated values
|
// Split up the rel into space-separated values
|
||||||
$linkRels = array_filter(explode(' ', $hyperlink->getAttribute('rel')));
|
$linkRels = array_filter(explode(' ', $hyperlink->getAttribute('rel')));
|
||||||
|
|
||||||
// If alternate in rels, create alternate structure, append
|
// If alternate in rels, create alternate structure, append
|
||||||
if (in_array('alternate', $linkRels)) {
|
if (in_array('alternate', $linkRels)) {
|
||||||
$alt = array(
|
$alt = array(
|
||||||
@ -869,10 +934,19 @@ class Parser {
|
|||||||
);
|
);
|
||||||
if ($hyperlink->hasAttribute('media'))
|
if ($hyperlink->hasAttribute('media'))
|
||||||
$alt['media'] = $hyperlink->getAttribute('media');
|
$alt['media'] = $hyperlink->getAttribute('media');
|
||||||
|
|
||||||
if ($hyperlink->hasAttribute('hreflang'))
|
if ($hyperlink->hasAttribute('hreflang'))
|
||||||
$alt['hreflang'] = $hyperlink->getAttribute('hreflang');
|
$alt['hreflang'] = $hyperlink->getAttribute('hreflang');
|
||||||
|
|
||||||
|
if ($hyperlink->hasAttribute('title'))
|
||||||
|
$alt['title'] = $hyperlink->getAttribute('title');
|
||||||
|
|
||||||
|
if ($hyperlink->hasAttribute('type'))
|
||||||
|
$alt['type'] = $hyperlink->getAttribute('type');
|
||||||
|
|
||||||
|
if ($hyperlink->nodeValue)
|
||||||
|
$alt['text'] = $hyperlink->nodeValue;
|
||||||
|
|
||||||
$alternates[] = $alt;
|
$alternates[] = $alt;
|
||||||
} else {
|
} else {
|
||||||
foreach ($linkRels as $rel) {
|
foreach ($linkRels as $rel) {
|
||||||
@ -880,38 +954,38 @@ class Parser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (empty($rels) and $this->jsonMode) {
|
if (empty($rels) and $this->jsonMode) {
|
||||||
$rels = new stdClass();
|
$rels = new stdClass();
|
||||||
}
|
}
|
||||||
|
|
||||||
return array($rels, $alternates);
|
return array($rels, $alternates);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Kicks off the parsing routine
|
* Kicks off the parsing routine
|
||||||
*
|
*
|
||||||
* If `$htmlSafe` is set, any angle brackets in the results from non e-* properties
|
* If `$htmlSafe` is set, any angle brackets in the results from non e-* properties
|
||||||
* will be HTML-encoded, bringing all output to the same level of encoding.
|
* will be HTML-encoded, bringing all output to the same level of encoding.
|
||||||
*
|
*
|
||||||
* If a DOMElement is set as the $context, only descendants of that element will
|
* If a DOMElement is set as the $context, only descendants of that element will
|
||||||
* be parsed for microformats.
|
* be parsed for microformats.
|
||||||
*
|
*
|
||||||
* @param bool $htmlSafe whether or not to html-encode non e-* properties. Defaults to false
|
* @param bool $htmlSafe whether or not to html-encode non e-* properties. Defaults to false
|
||||||
* @param DOMElement $context optionally an element from which to parse microformats
|
* @param DOMElement $context optionally an element from which to parse microformats
|
||||||
* @return array An array containing all the µfs found in the current document
|
* @return array An array containing all the µfs found in the current document
|
||||||
*/
|
*/
|
||||||
public function parse($convertClassic = true, DOMElement $context = null) {
|
public function parse($convertClassic = true, DOMElement $context = null) {
|
||||||
$mfs = array();
|
$mfs = array();
|
||||||
|
|
||||||
if ($convertClassic) {
|
if ($convertClassic) {
|
||||||
$this->convertLegacy();
|
$this->convertLegacy();
|
||||||
}
|
}
|
||||||
|
|
||||||
$mfElements = null === $context
|
$mfElements = null === $context
|
||||||
? $this->xpath->query('//*[contains(concat(" ", @class), " h-")]')
|
? $this->xpath->query('//*[contains(concat(" ", @class), " h-")]')
|
||||||
: $this->xpath->query('.//*[contains(concat(" ", @class), " h-")]', $context);
|
: $this->xpath->query('.//*[contains(concat(" ", @class), " h-")]', $context);
|
||||||
|
|
||||||
// Parser microformats
|
// Parser microformats
|
||||||
foreach ($mfElements as $node) {
|
foreach ($mfElements as $node) {
|
||||||
// For each microformat
|
// For each microformat
|
||||||
@ -920,64 +994,64 @@ class Parser {
|
|||||||
// Add the value to the array for this property type
|
// Add the value to the array for this property type
|
||||||
$mfs[] = $result;
|
$mfs[] = $result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse rels
|
// Parse rels
|
||||||
list($rels, $alternates) = $this->parseRelsAndAlternates();
|
list($rels, $alternates) = $this->parseRelsAndAlternates();
|
||||||
|
|
||||||
$top = array(
|
$top = array(
|
||||||
'items' => array_values(array_filter($mfs)),
|
'items' => array_values(array_filter($mfs)),
|
||||||
'rels' => $rels
|
'rels' => $rels
|
||||||
);
|
);
|
||||||
|
|
||||||
if (count($alternates))
|
if (count($alternates))
|
||||||
$top['alternates'] = $alternates;
|
$top['alternates'] = $alternates;
|
||||||
|
|
||||||
return $top;
|
return $top;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse From ID
|
* Parse From ID
|
||||||
*
|
*
|
||||||
* Given an ID, parse all microformats which are children of the element with
|
* Given an ID, parse all microformats which are children of the element with
|
||||||
* that ID.
|
* that ID.
|
||||||
*
|
*
|
||||||
* Note that rel values are still document-wide.
|
* Note that rel values are still document-wide.
|
||||||
*
|
*
|
||||||
* If an element with the ID is not found, an empty skeleton mf2 array structure
|
* If an element with the ID is not found, an empty skeleton mf2 array structure
|
||||||
* will be returned.
|
* will be returned.
|
||||||
*
|
*
|
||||||
* @param string $id
|
* @param string $id
|
||||||
* @param bool $htmlSafe = false whether or not to HTML-encode angle brackets in non e-* properties
|
* @param bool $htmlSafe = false whether or not to HTML-encode angle brackets in non e-* properties
|
||||||
* @return array
|
* @return array
|
||||||
*/
|
*/
|
||||||
public function parseFromId($id, $convertClassic=true) {
|
public function parseFromId($id, $convertClassic=true) {
|
||||||
$matches = $this->xpath->query("//*[@id='{$id}']");
|
$matches = $this->xpath->query("//*[@id='{$id}']");
|
||||||
|
|
||||||
if (empty($matches))
|
if (empty($matches))
|
||||||
return array('items' => array(), 'rels' => array(), 'alternates' => array());
|
return array('items' => array(), 'rels' => array(), 'alternates' => array());
|
||||||
|
|
||||||
return $this->parse($convertClassic, $matches->item(0));
|
return $this->parse($convertClassic, $matches->item(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert Legacy Classnames
|
* Convert Legacy Classnames
|
||||||
*
|
*
|
||||||
* Adds microformats2 classnames into a document containing only legacy
|
* Adds microformats2 classnames into a document containing only legacy
|
||||||
* semantic classnames.
|
* semantic classnames.
|
||||||
*
|
*
|
||||||
* @return Parser $this
|
* @return Parser $this
|
||||||
*/
|
*/
|
||||||
public function convertLegacy() {
|
public function convertLegacy() {
|
||||||
$doc = $this->doc;
|
$doc = $this->doc;
|
||||||
$xp = new DOMXPath($doc);
|
$xp = new DOMXPath($doc);
|
||||||
|
|
||||||
// replace all roots
|
// replace all roots
|
||||||
foreach ($this->classicRootMap as $old => $new) {
|
foreach ($this->classicRootMap as $old => $new) {
|
||||||
foreach ($xp->query('//*[contains(concat(" ", @class, " "), " ' . $old . ' ") and not(contains(concat(" ", @class, " "), " ' . $new . ' "))]') as $el) {
|
foreach ($xp->query('//*[contains(concat(" ", @class, " "), " ' . $old . ' ") and not(contains(concat(" ", @class, " "), " ' . $new . ' "))]') as $el) {
|
||||||
$el->setAttribute('class', $el->getAttribute('class') . ' ' . $new);
|
$el->setAttribute('class', $el->getAttribute('class') . ' ' . $new);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach ($this->classicPropertyMap as $oldRoot => $properties) {
|
foreach ($this->classicPropertyMap as $oldRoot => $properties) {
|
||||||
$newRoot = $this->classicRootMap[$oldRoot];
|
$newRoot = $this->classicRootMap[$oldRoot];
|
||||||
foreach ($properties as $old => $new) {
|
foreach ($properties as $old => $new) {
|
||||||
@ -986,16 +1060,16 @@ class Parser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return $this;
|
return $this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* XPath Query
|
* XPath Query
|
||||||
*
|
*
|
||||||
* Runs an XPath query over the current document. Works in exactly the same
|
* Runs an XPath query over the current document. Works in exactly the same
|
||||||
* way as DOMXPath::query.
|
* way as DOMXPath::query.
|
||||||
*
|
*
|
||||||
* @param string $expression
|
* @param string $expression
|
||||||
* @param DOMNode $context
|
* @param DOMNode $context
|
||||||
* @return DOMNodeList
|
* @return DOMNodeList
|
||||||
@ -1003,7 +1077,7 @@ class Parser {
|
|||||||
public function query($expression, $context = null) {
|
public function query($expression, $context = null) {
|
||||||
return $this->xpath->query($expression, $context);
|
return $this->xpath->query($expression, $context);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Classic Root Classname map
|
* Classic Root Classname map
|
||||||
*/
|
*/
|
||||||
@ -1013,11 +1087,11 @@ class Parser {
|
|||||||
'hentry' => 'h-entry',
|
'hentry' => 'h-entry',
|
||||||
'hrecipe' => 'h-recipe',
|
'hrecipe' => 'h-recipe',
|
||||||
'hresume' => 'h-resume',
|
'hresume' => 'h-resume',
|
||||||
'hevent' => 'h-event',
|
'vevent' => 'h-event',
|
||||||
'hreview' => 'h-review',
|
'hreview' => 'h-review',
|
||||||
'hproduct' => 'h-product'
|
'hproduct' => 'h-product'
|
||||||
);
|
);
|
||||||
|
|
||||||
public $classicPropertyMap = array(
|
public $classicPropertyMap = array(
|
||||||
'vcard' => array(
|
'vcard' => array(
|
||||||
'fn' => 'p-name',
|
'fn' => 'p-name',
|
||||||
@ -1084,7 +1158,7 @@ class Parser {
|
|||||||
'skill' => 'p-skill',
|
'skill' => 'p-skill',
|
||||||
'affiliation' => 'p-affiliation h-card',
|
'affiliation' => 'p-affiliation h-card',
|
||||||
),
|
),
|
||||||
'hevent' => array(
|
'vevent' => array(
|
||||||
'dtstart' => 'dt-start',
|
'dtstart' => 'dt-start',
|
||||||
'dtend' => 'dt-end',
|
'dtend' => 'dt-end',
|
||||||
'duration' => 'dt-duration',
|
'duration' => 'dt-duration',
|
||||||
@ -1246,7 +1320,7 @@ function resolveUrl($baseURI, $referenceURI) {
|
|||||||
# 5.2.3 Merge Paths
|
# 5.2.3 Merge Paths
|
||||||
function mergePaths($base, $reference) {
|
function mergePaths($base, $reference) {
|
||||||
# If the base URI has a defined authority component and an empty
|
# If the base URI has a defined authority component and an empty
|
||||||
# path,
|
# path,
|
||||||
if($base['authority'] && $base['path'] == null) {
|
if($base['authority'] && $base['path'] == null) {
|
||||||
# then return a string consisting of "/" concatenated with the
|
# then return a string consisting of "/" concatenated with the
|
||||||
# reference's path; otherwise,
|
# reference's path; otherwise,
|
||||||
|
Loading…
Reference in New Issue
Block a user