Latest Mf2/Parser.php is compatible with PHP5.3

This commit is contained in:
Mikael Nordfeldth 2014-09-25 08:39:05 +02:00
parent ee41bc560c
commit c2998e26ec
1 changed files with 64 additions and 32 deletions

View File

@ -69,7 +69,7 @@ function fetch($url, $convertClassic = true, &$curlInfo=null) {
curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_MAXREDIRS, 5); curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
$response = curl_exec($ch); $html = curl_exec($ch);
$info = $curlInfo = curl_getinfo($ch); $info = $curlInfo = curl_getinfo($ch);
curl_close($ch); curl_close($ch);
@ -78,7 +78,6 @@ function fetch($url, $convertClassic = true, &$curlInfo=null) {
return null; return null;
} }
$html = mb_substr($response, $info['header_size']);
return parse($html, $url, $convertClassic); return parse($html, $url, $convertClassic);
} }
@ -123,12 +122,14 @@ function unicodeTrim($str) {
* @return string|array The prefixed name of the first microfomats class found or false * @return string|array The prefixed name of the first microfomats class found or false
*/ */
function mfNamesFromClass($class, $prefix='h-') { function mfNamesFromClass($class, $prefix='h-') {
$class = str_replace([' ', ' ', "\n"], ' ', $class); $class = str_replace(array(' ', ' ', "\n"), ' ', $class);
$classes = explode(' ', $class); $classes = explode(' ', $class);
$matches = array(); $matches = array();
foreach ($classes as $classname) { foreach ($classes as $classname) {
if (strpos($classname, $prefix) === 0 && $classname !== $prefix) { $compare_classname = strtolower(' ' . $classname);
$compare_prefix = strtolower(' ' . $prefix);
if (stristr($compare_classname, $compare_prefix) !== false && ($compare_classname != $compare_prefix)) {
$matches[] = ($prefix === 'h-') ? $classname : substr($classname, strlen($prefix)); $matches[] = ($prefix === 'h-') ? $classname : substr($classname, strlen($prefix));
} }
} }
@ -149,10 +150,11 @@ function nestedMfPropertyNamesFromClass($class) {
$prefixes = array('p-', 'u-', 'dt-', 'e-'); $prefixes = array('p-', 'u-', 'dt-', 'e-');
$propertyNames = array(); $propertyNames = array();
$class = str_replace([' ', ' ', "\n"], ' ', $class); $class = str_replace(array(' ', ' ', "\n"), ' ', $class);
foreach (explode(' ', $class) as $classname) { foreach (explode(' ', $class) as $classname) {
foreach ($prefixes as $prefix) { foreach ($prefixes as $prefix) {
if (strpos($classname, $prefix) === 0 and $classname !== $prefix) { $compare_classname = strtolower(' ' . $classname);
if (stristr($compare_classname, $prefix) && ($compare_classname != $prefix)) {
$propertyNames = array_merge($propertyNames, mfNamesFromClass($classname, ltrim($prefix))); $propertyNames = array_merge($propertyNames, mfNamesFromClass($classname, ltrim($prefix)));
} }
} }
@ -191,7 +193,7 @@ function convertTimeFormat($time) {
preg_match('/(\d{1,2}):?(\d{2})?:?(\d{2})?(a\.?m\.?|p\.?m\.?)?/i', $time, $matches); preg_match('/(\d{1,2}):?(\d{2})?:?(\d{2})?(a\.?m\.?|p\.?m\.?)?/i', $time, $matches);
// if no am/pm specified // if no am/pm specified
if ( empty($matches[4]) ) { if (empty($matches[4])) {
return $time; return $time;
} }
// else am/pm specified // else am/pm specified
@ -202,31 +204,27 @@ function convertTimeFormat($time) {
$hh = $matches[1]; $hh = $matches[1];
// add 12 to the pm hours // add 12 to the pm hours
if ( $meridiem == 'pm' && ($hh < 12) ) if ($meridiem == 'pm' && ($hh < 12)) {
{
$hh += 12; $hh += 12;
} }
$hh = str_pad($hh, 2, '0', STR_PAD_LEFT); $hh = str_pad($hh, 2, '0', STR_PAD_LEFT);
// minutes // minutes
$mm = ( empty($matches[2]) ) ? '00' : $matches[2]; $mm = (empty($matches[2]) ) ? '00' : $matches[2];
// seconds, only if supplied // seconds, only if supplied
if ( !empty($matches[3]) ) if (!empty($matches[3])) {
{
$ss = $matches[3]; $ss = $matches[3];
} }
if ( empty($ss) ) { if (empty($ss)) {
return sprintf('%s:%s', $hh, $mm); return sprintf('%s:%s', $hh, $mm);
} }
else { else {
return sprintf('%s:%s:%s', $hh, $mm, $ss); return sprintf('%s:%s:%s', $hh, $mm, $ss);
} }
} }
} }
/** /**
@ -293,6 +291,11 @@ class Parser {
} }
break; break;
} }
// Ignore <template> elements as per the HTML5 spec
foreach ($this->xpath->query('//template') as $templateEl) {
$templateEl->parentNode->removeChild($templateEl);
}
$this->baseurl = $baseurl; $this->baseurl = $baseurl;
$this->doc = $doc; $this->doc = $doc;
@ -320,7 +323,33 @@ class Parser {
return true; return true;
} }
private function resolveChildUrls(DOMElement $el) {
$hyperlinkChildren = $this->xpath->query('.//*[@src or @href or @data]', $el);
foreach ($hyperlinkChildren as $child) {
if ($child->hasAttribute('href'))
$child->setAttribute('href', $this->resolveUrl($child->getAttribute('href')));
if ($child->hasAttribute('src'))
$child->setAttribute('src', $this->resolveUrl($child->getAttribute('src')));
if ($child->hasAttribute('data'))
$child->setAttribute('data', $this->resolveUrl($child->getAttribute('data')));
}
}
public function textContent(DOMElement $el) {
$this->resolveChildUrls($el);
$clonedEl = $el->cloneNode(true);
foreach ($this->xpath->query('.//img', $clonedEl) as $imgEl) {
$newNode = $this->doc->createTextNode($imgEl->getAttribute($imgEl->hasAttribute('alt') ? 'alt' : 'src'));
$imgEl->parentNode->replaceChild($newNode, $imgEl);
}
return $clonedEl->textContent;
}
// TODO: figure out if this has problems with sms: and geo: URLs // TODO: figure out if this has problems with sms: and geo: URLs
public function resolveUrl($url) { public function resolveUrl($url) {
// If the URL is seriously malformed its probably beyond the scope of this // If the URL is seriously malformed its probably beyond the scope of this
@ -354,7 +383,7 @@ class Parser {
// Process value-class stuff // Process value-class stuff
$val = ''; $val = '';
foreach ($valueClassElements as $el) { foreach ($valueClassElements as $el) {
$val .= $el->textContent; $val .= $this->textContent($el);
} }
return unicodeTrim($val); return unicodeTrim($val);
@ -398,7 +427,7 @@ class Parser {
} elseif (in_array($p->tagName, array('data', 'input')) and $p->getAttribute('value') !== '') { } elseif (in_array($p->tagName, array('data', 'input')) and $p->getAttribute('value') !== '') {
$pValue = $p->getAttribute('value'); $pValue = $p->getAttribute('value');
} else { } else {
$pValue = unicodeTrim($p->textContent); $pValue = unicodeTrim($this->textContent($p));
} }
return $pValue; return $pValue;
@ -433,7 +462,7 @@ class Parser {
} elseif (in_array($u->tagName, array('data', 'input')) and $u->getAttribute('value') !== null) { } elseif (in_array($u->tagName, array('data', 'input')) and $u->getAttribute('value') !== null) {
return $u->getAttribute('value'); return $u->getAttribute('value');
} else { } else {
return unicodeTrim($u->textContent); return unicodeTrim($this->textContent($u));
} }
} }
@ -595,17 +624,8 @@ class Parser {
// Expand relative URLs within children of this element // Expand relative URLs within children of this element
// TODO: as it is this is not relative to only children, make this .// and rerun tests // TODO: as it is this is not relative to only children, make this .// and rerun tests
$hyperlinkChildren = $this->xpath->query('//*[@src or @href or @data]', $e); $this->resolveChildUrls($e);
foreach ($hyperlinkChildren as $child) {
if ($child->hasAttribute('href'))
$child->setAttribute('href', $this->resolveUrl($child->getAttribute('href')));
if ($child->hasAttribute('src'))
$child->setAttribute('src', $this->resolveUrl($child->getAttribute('src')));
if ($child->hasAttribute('data'))
$child->setAttribute('data', $this->resolveUrl($child->getAttribute('data')));
}
$html = ''; $html = '';
foreach ($e->childNodes as $node) { foreach ($e->childNodes as $node) {
$html .= $node->C14N(); $html .= $node->C14N();
@ -613,7 +633,7 @@ class Parser {
return array( return array(
'html' => $html, 'html' => $html,
'value' => unicodeTrim($e->textContent) 'value' => unicodeTrim($this->textContent($e))
); );
} }
@ -994,7 +1014,8 @@ class Parser {
'hrecipe' => 'h-recipe', 'hrecipe' => 'h-recipe',
'hresume' => 'h-resume', 'hresume' => 'h-resume',
'hevent' => 'h-event', 'hevent' => 'h-event',
'hreview' => 'h-review' 'hreview' => 'h-review',
'hproduct' => 'h-product'
); );
public $classicPropertyMap = array( public $classicPropertyMap = array(
@ -1086,6 +1107,17 @@ class Parser {
'best' => 'p-best', 'best' => 'p-best',
'worst' => 'p-worst', 'worst' => 'p-worst',
'description' => 'p-description' 'description' => 'p-description'
),
'hproduct' => array(
'fn' => 'p-name',
'photo' => 'u-photo',
'brand' => 'p-brand',
'category' => 'p-category',
'description' => 'p-description',
'identifier' => 'u-identifier',
'url' => 'u-url',
'review' => 'p-review h-review',
'price' => 'p-price'
) )
); );
} }