Latest Mf2/Parser.php is compatible with PHP5.3
This commit is contained in:
parent
ee41bc560c
commit
c2998e26ec
@ -69,7 +69,7 @@ function fetch($url, $convertClassic = true, &$curlInfo=null) {
|
||||
curl_setopt($ch, CURLOPT_HEADER, 0);
|
||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
|
||||
curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
|
||||
$response = curl_exec($ch);
|
||||
$html = curl_exec($ch);
|
||||
$info = $curlInfo = curl_getinfo($ch);
|
||||
curl_close($ch);
|
||||
|
||||
@ -78,7 +78,6 @@ function fetch($url, $convertClassic = true, &$curlInfo=null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$html = mb_substr($response, $info['header_size']);
|
||||
return parse($html, $url, $convertClassic);
|
||||
}
|
||||
|
||||
@ -123,12 +122,14 @@ function unicodeTrim($str) {
|
||||
* @return string|array The prefixed name of the first microfomats class found or false
|
||||
*/
|
||||
function mfNamesFromClass($class, $prefix='h-') {
|
||||
$class = str_replace([' ', ' ', "\n"], ' ', $class);
|
||||
$class = str_replace(array(' ', ' ', "\n"), ' ', $class);
|
||||
$classes = explode(' ', $class);
|
||||
$matches = array();
|
||||
|
||||
foreach ($classes as $classname) {
|
||||
if (strpos($classname, $prefix) === 0 && $classname !== $prefix) {
|
||||
$compare_classname = strtolower(' ' . $classname);
|
||||
$compare_prefix = strtolower(' ' . $prefix);
|
||||
if (stristr($compare_classname, $compare_prefix) !== false && ($compare_classname != $compare_prefix)) {
|
||||
$matches[] = ($prefix === 'h-') ? $classname : substr($classname, strlen($prefix));
|
||||
}
|
||||
}
|
||||
@ -149,10 +150,11 @@ function nestedMfPropertyNamesFromClass($class) {
|
||||
$prefixes = array('p-', 'u-', 'dt-', 'e-');
|
||||
$propertyNames = array();
|
||||
|
||||
$class = str_replace([' ', ' ', "\n"], ' ', $class);
|
||||
$class = str_replace(array(' ', ' ', "\n"), ' ', $class);
|
||||
foreach (explode(' ', $class) as $classname) {
|
||||
foreach ($prefixes as $prefix) {
|
||||
if (strpos($classname, $prefix) === 0 and $classname !== $prefix) {
|
||||
$compare_classname = strtolower(' ' . $classname);
|
||||
if (stristr($compare_classname, $prefix) && ($compare_classname != $prefix)) {
|
||||
$propertyNames = array_merge($propertyNames, mfNamesFromClass($classname, ltrim($prefix)));
|
||||
}
|
||||
}
|
||||
@ -202,8 +204,7 @@ function convertTimeFormat($time) {
|
||||
$hh = $matches[1];
|
||||
|
||||
// add 12 to the pm hours
|
||||
if ( $meridiem == 'pm' && ($hh < 12) )
|
||||
{
|
||||
if ($meridiem == 'pm' && ($hh < 12)) {
|
||||
$hh += 12;
|
||||
}
|
||||
|
||||
@ -213,8 +214,7 @@ function convertTimeFormat($time) {
|
||||
$mm = (empty($matches[2]) ) ? '00' : $matches[2];
|
||||
|
||||
// seconds, only if supplied
|
||||
if ( !empty($matches[3]) )
|
||||
{
|
||||
if (!empty($matches[3])) {
|
||||
$ss = $matches[3];
|
||||
}
|
||||
|
||||
@ -224,9 +224,7 @@ function convertTimeFormat($time) {
|
||||
else {
|
||||
return sprintf('%s:%s:%s', $hh, $mm, $ss);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@ -294,6 +292,11 @@ class Parser {
|
||||
break;
|
||||
}
|
||||
|
||||
// Ignore <template> elements as per the HTML5 spec
|
||||
foreach ($this->xpath->query('//template') as $templateEl) {
|
||||
$templateEl->parentNode->removeChild($templateEl);
|
||||
}
|
||||
|
||||
$this->baseurl = $baseurl;
|
||||
$this->doc = $doc;
|
||||
$this->parsed = new SplObjectStorage();
|
||||
@ -321,6 +324,32 @@ class Parser {
|
||||
return true;
|
||||
}
|
||||
|
||||
private function resolveChildUrls(DOMElement $el) {
|
||||
$hyperlinkChildren = $this->xpath->query('.//*[@src or @href or @data]', $el);
|
||||
|
||||
foreach ($hyperlinkChildren as $child) {
|
||||
if ($child->hasAttribute('href'))
|
||||
$child->setAttribute('href', $this->resolveUrl($child->getAttribute('href')));
|
||||
if ($child->hasAttribute('src'))
|
||||
$child->setAttribute('src', $this->resolveUrl($child->getAttribute('src')));
|
||||
if ($child->hasAttribute('data'))
|
||||
$child->setAttribute('data', $this->resolveUrl($child->getAttribute('data')));
|
||||
}
|
||||
}
|
||||
|
||||
public function textContent(DOMElement $el) {
|
||||
$this->resolveChildUrls($el);
|
||||
|
||||
$clonedEl = $el->cloneNode(true);
|
||||
|
||||
foreach ($this->xpath->query('.//img', $clonedEl) as $imgEl) {
|
||||
$newNode = $this->doc->createTextNode($imgEl->getAttribute($imgEl->hasAttribute('alt') ? 'alt' : 'src'));
|
||||
$imgEl->parentNode->replaceChild($newNode, $imgEl);
|
||||
}
|
||||
|
||||
return $clonedEl->textContent;
|
||||
}
|
||||
|
||||
// TODO: figure out if this has problems with sms: and geo: URLs
|
||||
public function resolveUrl($url) {
|
||||
// If the URL is seriously malformed it’s probably beyond the scope of this
|
||||
@ -354,7 +383,7 @@ class Parser {
|
||||
// Process value-class stuff
|
||||
$val = '';
|
||||
foreach ($valueClassElements as $el) {
|
||||
$val .= $el->textContent;
|
||||
$val .= $this->textContent($el);
|
||||
}
|
||||
|
||||
return unicodeTrim($val);
|
||||
@ -398,7 +427,7 @@ class Parser {
|
||||
} elseif (in_array($p->tagName, array('data', 'input')) and $p->getAttribute('value') !== '') {
|
||||
$pValue = $p->getAttribute('value');
|
||||
} else {
|
||||
$pValue = unicodeTrim($p->textContent);
|
||||
$pValue = unicodeTrim($this->textContent($p));
|
||||
}
|
||||
|
||||
return $pValue;
|
||||
@ -433,7 +462,7 @@ class Parser {
|
||||
} elseif (in_array($u->tagName, array('data', 'input')) and $u->getAttribute('value') !== null) {
|
||||
return $u->getAttribute('value');
|
||||
} else {
|
||||
return unicodeTrim($u->textContent);
|
||||
return unicodeTrim($this->textContent($u));
|
||||
}
|
||||
}
|
||||
|
||||
@ -595,16 +624,7 @@ class Parser {
|
||||
|
||||
// Expand relative URLs within children of this element
|
||||
// TODO: as it is this is not relative to only children, make this .// and rerun tests
|
||||
$hyperlinkChildren = $this->xpath->query('//*[@src or @href or @data]', $e);
|
||||
|
||||
foreach ($hyperlinkChildren as $child) {
|
||||
if ($child->hasAttribute('href'))
|
||||
$child->setAttribute('href', $this->resolveUrl($child->getAttribute('href')));
|
||||
if ($child->hasAttribute('src'))
|
||||
$child->setAttribute('src', $this->resolveUrl($child->getAttribute('src')));
|
||||
if ($child->hasAttribute('data'))
|
||||
$child->setAttribute('data', $this->resolveUrl($child->getAttribute('data')));
|
||||
}
|
||||
$this->resolveChildUrls($e);
|
||||
|
||||
$html = '';
|
||||
foreach ($e->childNodes as $node) {
|
||||
@ -613,7 +633,7 @@ class Parser {
|
||||
|
||||
return array(
|
||||
'html' => $html,
|
||||
'value' => unicodeTrim($e->textContent)
|
||||
'value' => unicodeTrim($this->textContent($e))
|
||||
);
|
||||
}
|
||||
|
||||
@ -994,7 +1014,8 @@ class Parser {
|
||||
'hrecipe' => 'h-recipe',
|
||||
'hresume' => 'h-resume',
|
||||
'hevent' => 'h-event',
|
||||
'hreview' => 'h-review'
|
||||
'hreview' => 'h-review',
|
||||
'hproduct' => 'h-product'
|
||||
);
|
||||
|
||||
public $classicPropertyMap = array(
|
||||
@ -1086,6 +1107,17 @@ class Parser {
|
||||
'best' => 'p-best',
|
||||
'worst' => 'p-worst',
|
||||
'description' => 'p-description'
|
||||
),
|
||||
'hproduct' => array(
|
||||
'fn' => 'p-name',
|
||||
'photo' => 'u-photo',
|
||||
'brand' => 'p-brand',
|
||||
'category' => 'p-category',
|
||||
'description' => 'p-description',
|
||||
'identifier' => 'u-identifier',
|
||||
'url' => 'u-url',
|
||||
'review' => 'p-review h-review',
|
||||
'price' => 'p-price'
|
||||
)
|
||||
);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user