Work around weird bug with HTML normalization via PHP DOM module; if source had xmlns and xml:lang I ended up with double output, breaking the subsequent parsing. Will have to track this down later and report upstream if not already resolved.

This commit is contained in:
Brion Vibber 2010-03-18 17:55:21 -07:00
parent 3e2e88b0df
commit 08faff2278
2 changed files with 21 additions and 2 deletions

View File

@ -174,6 +174,26 @@ class DiscoveryHints {
error_reporting($old);
if ($ok) {
// If the original had xmlns or xml:lang attributes on the
// <html>, we seen to end up with duplicates, which causes
// parse errors. Remove em!
//
// For some reason we have to iterate and remove them twice,
// *plus* they don't show up on hasAttribute() or removeAttribute().
// This might be some weird bug in PHP or libxml2, uncertain if
// it affects other folks consistently.
$root = $dom->documentElement;
foreach ($root->attributes as $i => $x) {
if ($i == 'xmlns' || $i == 'xml:lang') {
$root->removeAttributeNode($x);
}
}
foreach ($root->attributes as $i => $x) {
if ($i == 'xmlns' || $i == 'xml:lang') {
$root->removeAttributeNode($x);
}
}
// hKit doesn't give us a chance to pass the source URL for
// resolving relative links, such as the avatar photo on a
// Google profile. We'll slip it into a <base> tag if there's
@ -192,7 +212,6 @@ class DiscoveryHints {
$head = $heads->item(0);
} else {
$head = $dom->createElement('head');
$root = $dom->documentRoot;
if ($root->firstChild) {
$root->insertBefore($head, $root->firstChild);
} else {