This commit is contained in:
Mikael Nordfeldth 2015-08-15 11:48:39 +02:00
parent b434243416
commit c77bce12e5
1 changed files with 227 additions and 153 deletions

View File

@ -127,9 +127,9 @@ function mfNamesFromClass($class, $prefix='h-') {
$matches = array();
foreach ($classes as $classname) {
$compare_classname = strtolower(' ' . $classname);
$compare_prefix = strtolower(' ' . $prefix);
if (stristr($compare_classname, $compare_prefix) !== false && ($compare_classname != $compare_prefix)) {
$compare_classname = ' ' . $classname;
$compare_prefix = ' ' . $prefix;
if (strstr($compare_classname, $compare_prefix) !== false && ($compare_classname != $compare_prefix)) {
$matches[] = ($prefix === 'h-') ? $classname : substr($classname, strlen($prefix));
}
}
@ -153,13 +153,18 @@ function nestedMfPropertyNamesFromClass($class) {
$class = str_replace(array(' ', ' ', "\n"), ' ', $class);
foreach (explode(' ', $class) as $classname) {
foreach ($prefixes as $prefix) {
$compare_classname = strtolower(' ' . $classname);
if (stristr($compare_classname, $prefix) && ($compare_classname != $prefix)) {
$propertyNames = array_merge($propertyNames, mfNamesFromClass($classname, ltrim($prefix)));
// Check if $classname is a valid property classname for $prefix.
if (mb_substr($classname, 0, mb_strlen($prefix)) == $prefix && $classname != $prefix) {
$propertyName = mb_substr($classname, mb_strlen($prefix));
$propertyNames[$propertyName][] = $prefix;
}
}
}
foreach ($propertyNames as $property => $prefixes) {
$propertyNames[$property] = array_unique($prefixes);
}
return $propertyNames;
}
@ -192,28 +197,27 @@ function convertTimeFormat($time) {
$hh = $mm = $ss = '';
preg_match('/(\d{1,2}):?(\d{2})?:?(\d{2})?(a\.?m\.?|p\.?m\.?)?/i', $time, $matches);
// if no am/pm specified
// If no am/pm is specified:
if (empty($matches[4])) {
return $time;
}
// else am/pm specified
else {
} else {
// Otherwise, am/pm is specified.
$meridiem = strtolower(str_replace('.', '', $matches[4]));
// hours
// Hours.
$hh = $matches[1];
// add 12 to the pm hours
// Add 12 to hours if pm applies.
if ($meridiem == 'pm' && ($hh < 12)) {
$hh += 12;
}
$hh = str_pad($hh, 2, '0', STR_PAD_LEFT);
// minutes
// Minutes.
$mm = (empty($matches[2]) ) ? '00' : $matches[2];
// seconds, only if supplied
// Seconds, only if supplied.
if (!empty($matches[3])) {
$ss = $matches[3];
}
@ -443,7 +447,7 @@ class Parser {
public function parseU(\DOMElement $u) {
if (($u->tagName == 'a' or $u->tagName == 'area') and $u->getAttribute('href') !== null) {
$uValue = $u->getAttribute('href');
} elseif ($u->tagName == 'img' and $u->getAttribute('src') !== null) {
} elseif (in_array($u->tagName, array('img', 'audio', 'video', 'source')) and $u->getAttribute('src') !== null) {
$uValue = $u->getAttribute('src');
} elseif ($u->tagName == 'object' and $u->getAttribute('data') !== null) {
$uValue = $u->getAttribute('data');
@ -591,16 +595,16 @@ class Parser {
$dtValue = $dt->nodeValue;
}
if ( preg_match('/(\d{4}-\d{2}-\d{2})/', $dtValue, $matches) ) {
if (preg_match('/(\d{4}-\d{2}-\d{2})/', $dtValue, $matches)) {
$dates[] = $matches[0];
}
}
/**
* if $dtValue is only a time and there are recently parsed dates,
* form the full date-time using the most recnetly parsed dt- value
* form the full date-time using the most recently parsed dt- value
*/
if ( (preg_match('/^\d{1,2}:\d{1,2}(Z?[+|-]\d{2}:?\d{2})?/', $dtValue) or preg_match('/^\d{1,2}[a|p]m/', $dtValue)) && !empty($dates) ) {
if ((preg_match('/^\d{1,2}:\d{1,2}(Z?[+|-]\d{2}:?\d{2})?/', $dtValue) or preg_match('/^\d{1,2}[a|p]m/', $dtValue)) && !empty($dates)) {
$dtValue = convertTimeFormat($dtValue);
$dtValue = end($dates) . 'T' . unicodeTrim($dtValue, 'T');
}
@ -665,6 +669,8 @@ class Parser {
if (null === $result)
continue;
// In most cases, the value attribute of the nested microformat should be the p- parsed value of the elemnt.
// The only times this is different is when the microformat is nested under certain prefixes, which are handled below.
$result['value'] = $this->parseP($subMF);
// Does this µf have any property names other than h-*?
@ -672,8 +678,19 @@ class Parser {
if (!empty($properties)) {
// Yes! Its a nested property µf
foreach ($properties as $property) {
$return[$property][] = $result;
foreach ($properties as $property => $prefixes) {
// Note: handling microformat nesting under multiple conflicting prefixes is not currently specified by the mf2 parsing spec.
$prefixSpecificResult = $result;
if (in_array('p-', $prefixes)) {
$prefixSpecificResult['value'] = $prefixSpecificResult['properties']['name'][0];
} elseif (in_array('e-', $prefixes)) {
$eParsedResult = $this->parseE($subMF);
$prefixSpecificResult['html'] = $eParsedResult['html'];
$prefixSpecificResult['value'] = $eParsedResult['value'];
} elseif (in_array('u-', $prefixes)) {
$prefixSpecificResult['value'] = $this->parseU($subMF);
}
$return[$property][] = $prefixSpecificResult;
}
} else {
// No, its a child µf
@ -689,6 +706,11 @@ class Parser {
$this->elementPrefixParsed($subMF, 'e');
}
if($e->tagName == 'area') {
$coords = $e->getAttribute('coords');
$shape = $e->getAttribute('shape');
}
// Handle p-*
foreach ($this->xpath->query('.//*[contains(concat(" ", @class) ," p-")]', $e) as $p) {
if ($this->isElementParsed($p, 'p'))
@ -762,7 +784,7 @@ class Parser {
if (!array_key_exists('name', $return)) {
try {
// Look for img @alt
if ($e->tagName == 'img' and $e->getAttribute('alt') != '')
if (($e->tagName == 'img' or $e->tagName == 'area') and $e->getAttribute('alt') != '')
throw new Exception($e->getAttribute('alt'));
if ($e->tagName == 'abbr' and $e->hasAttribute('title'))
@ -770,14 +792,35 @@ class Parser {
// Look for nested img @alt
foreach ($this->xpath->query('./img[count(preceding-sibling::*)+count(following-sibling::*)=0]', $e) as $em) {
if ($em->getAttribute('alt') != '')
$emNames = mfNamesFromElement($em, 'h-');
if (empty($emNames) && $em->getAttribute('alt') != '') {
throw new Exception($em->getAttribute('alt'));
}
}
// Look for nested area @alt
foreach ($this->xpath->query('./area[count(preceding-sibling::*)+count(following-sibling::*)=0]', $e) as $em) {
$emNames = mfNamesFromElement($em, 'h-');
if (empty($emNames) && $em->getAttribute('alt') != '') {
throw new Exception($em->getAttribute('alt'));
}
}
// Look for double nested img @alt
foreach ($this->xpath->query('./*[count(preceding-sibling::*)+count(following-sibling::*)=0]/img[count(preceding-sibling::*)+count(following-sibling::*)=0]', $e) as $em) {
if ($em->getAttribute('alt') != '')
$emNames = mfNamesFromElement($em, 'h-');
if (empty($emNames) && $em->getAttribute('alt') != '') {
throw new Exception($em->getAttribute('alt'));
}
}
// Look for double nested img @alt
foreach ($this->xpath->query('./*[count(preceding-sibling::*)+count(following-sibling::*)=0]/area[count(preceding-sibling::*)+count(following-sibling::*)=0]', $e) as $em) {
$emNames = mfNamesFromElement($em, 'h-');
if (empty($emNames) && $em->getAttribute('alt') != '') {
throw new Exception($em->getAttribute('alt'));
}
}
throw new Exception($e->nodeValue);
@ -812,13 +855,25 @@ class Parser {
// Check for u-url
if (!array_key_exists('url', $return)) {
// Look for img @src
if ($e->tagName == 'a')
if ($e->tagName == 'a' or $e->tagName == 'area')
$url = $e->getAttribute('href');
// Look for nested img @src
// Look for nested a @href
foreach ($this->xpath->query('./a[count(preceding-sibling::a)+count(following-sibling::a)=0]', $e) as $em) {
$url = $em->getAttribute('href');
break;
$emNames = mfNamesFromElement($em, 'h-');
if (empty($emNames)) {
$url = $em->getAttribute('href');
break;
}
}
// Look for nested area @src
foreach ($this->xpath->query('./area[count(preceding-sibling::area)+count(following-sibling::area)=0]', $e) as $em) {
$emNames = mfNamesFromElement($em, 'h-');
if (empty($emNames)) {
$url = $em->getAttribute('href');
break;
}
}
if (!empty($url))
@ -833,8 +888,18 @@ class Parser {
'type' => $mfTypes,
'properties' => $return
);
if (!empty($children))
if (!empty($shape)) {
$parsed['shape'] = $shape;
}
if (!empty($coords)) {
$parsed['coords'] = $coords;
}
if (!empty($children)) {
$parsed['children'] = array_values(array_filter($children));
}
return $parsed;
}
@ -873,6 +938,15 @@ class Parser {
if ($hyperlink->hasAttribute('hreflang'))
$alt['hreflang'] = $hyperlink->getAttribute('hreflang');
if ($hyperlink->hasAttribute('title'))
$alt['title'] = $hyperlink->getAttribute('title');
if ($hyperlink->hasAttribute('type'))
$alt['type'] = $hyperlink->getAttribute('type');
if ($hyperlink->nodeValue)
$alt['text'] = $hyperlink->nodeValue;
$alternates[] = $alt;
} else {
foreach ($linkRels as $rel) {
@ -1013,7 +1087,7 @@ class Parser {
'hentry' => 'h-entry',
'hrecipe' => 'h-recipe',
'hresume' => 'h-resume',
'hevent' => 'h-event',
'vevent' => 'h-event',
'hreview' => 'h-review',
'hproduct' => 'h-product'
);
@ -1084,7 +1158,7 @@ class Parser {
'skill' => 'p-skill',
'affiliation' => 'p-affiliation h-card',
),
'hevent' => array(
'vevent' => array(
'dtstart' => 'dt-start',
'dtend' => 'dt-end',
'duration' => 'dt-duration',