forked from GNUsocial/gnu-social
Merge branch 'nightly' of git.gnu.io:gnu/gnu-social into mmn_fixes
This commit is contained in:
commit
daea5647b6
@ -46,11 +46,47 @@ class OembedPlugin extends Plugin
|
|||||||
'maxheight' => common_config('thumbnail', 'height'),
|
'maxheight' => common_config('thumbnail', 'height'),
|
||||||
);
|
);
|
||||||
$metadata = oEmbedHelper::getOembedFrom($api, $url, $params);
|
$metadata = oEmbedHelper::getOembedFrom($api, $url, $params);
|
||||||
|
|
||||||
|
// Facebook just gives us javascript in its oembed html,
|
||||||
|
// so use the content of the title element instead
|
||||||
|
if(strpos($url,'https://www.facebook.com/') === 0) {
|
||||||
|
$metadata->html = @$dom->getElementsByTagName('title')->item(0)->nodeValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wordpress sometimes also just gives us javascript, use og:description if it is available
|
||||||
|
$xpath = new DomXpath($dom);
|
||||||
|
$generatorNode = @$xpath->query('//meta[@name="generator"][1]')->item(0);
|
||||||
|
if ($generatorNode instanceof DomElement) {
|
||||||
|
// when wordpress only gives us javascript, the html stripped from tags
|
||||||
|
// is the same as the title, so this helps us to identify this (common) case
|
||||||
|
if(strpos($generatorNode->getAttribute('content'),'WordPress') === 0
|
||||||
|
&& trim(strip_tags($metadata->html)) == trim($metadata->title)) {
|
||||||
|
$propertyNode = @$xpath->query('//meta[@property="og:description"][1]')->item(0);
|
||||||
|
if ($propertyNode instanceof DomElement) {
|
||||||
|
$metadata->html = $propertyNode->getAttribute('content');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
} catch (Exception $e) {
|
} catch (Exception $e) {
|
||||||
common_log(LOG_INFO, 'Could not find an oEmbed endpoint using link headers, trying OpenGraph from HTML.');
|
common_log(LOG_INFO, 'Could not find an oEmbed endpoint using link headers, trying OpenGraph from HTML.');
|
||||||
// Just ignore it!
|
// Just ignore it!
|
||||||
$metadata = OpenGraphHelper::ogFromHtml($dom);
|
$metadata = OpenGraphHelper::ogFromHtml($dom);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// sometimes sites serve the path, not the full URL, for images
|
||||||
|
// let's "be liberal in what you accept from others"!
|
||||||
|
// add protocol and host if the thumbnail_url starts with /
|
||||||
|
if(substr($metadata->thumbnail_url,0,1) == '/') {
|
||||||
|
$thumbnail_url_parsed = parse_url($metadata->url);
|
||||||
|
$metadata->thumbnail_url = $thumbnail_url_parsed['scheme']."://".$thumbnail_url_parsed['host'].$metadata->thumbnail_url;
|
||||||
|
}
|
||||||
|
|
||||||
|
// some wordpress opengraph implementations sometimes return a white blank image
|
||||||
|
// no need for us to save that!
|
||||||
|
if($metadata->thumbnail_url == 'https://s0.wp.com/i/blank.jpg') {
|
||||||
|
unset($metadata->thumbnail_url);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function onEndShowHeadElements(Action $action)
|
public function onEndShowHeadElements(Action $action)
|
||||||
|
@ -74,13 +74,51 @@ class oEmbedHelper
|
|||||||
|
|
||||||
if (Event::handle('GetRemoteUrlMetadata', array($url, &$metadata))) {
|
if (Event::handle('GetRemoteUrlMetadata', array($url, &$metadata))) {
|
||||||
// If that event didn't return anything, try downloading the body and parse it
|
// If that event didn't return anything, try downloading the body and parse it
|
||||||
$body = HTTPClient::quickGet($url);
|
|
||||||
|
// don't use quickGet since we want to check Content-Type header for utf-8
|
||||||
|
$client = new HTTPClient();
|
||||||
|
$response = $client->get($url);
|
||||||
|
if (!$response->isOk()) {
|
||||||
|
// TRANS: Exception. %s is the URL we tried to GET.
|
||||||
|
throw new Exception(sprintf(_m('Could not GET URL %s.'), $url), $response->getStatus());
|
||||||
|
}
|
||||||
|
$body = $response->getBody();
|
||||||
|
|
||||||
// DOMDocument::loadHTML may throw warnings on unrecognized elements,
|
// DOMDocument::loadHTML may throw warnings on unrecognized elements,
|
||||||
// and notices on unrecognized namespaces.
|
// and notices on unrecognized namespaces.
|
||||||
$old = error_reporting(error_reporting() & ~(E_WARNING | E_NOTICE));
|
$old = error_reporting(error_reporting() & ~(E_WARNING | E_NOTICE));
|
||||||
|
|
||||||
|
// DOMDocument assumes ISO-8859-1 per HTML spec
|
||||||
|
// use UTF-8 if we find any evidence of that encoding
|
||||||
|
$utf8_evidence = false;
|
||||||
|
$unicode_check_dom = new DOMDocument();
|
||||||
|
$ok = $unicode_check_dom->loadHTML($body);
|
||||||
|
if (!$ok) throw new oEmbedHelper_BadHtmlException();
|
||||||
|
$metaNodes = $unicode_check_dom->getElementsByTagName('meta');
|
||||||
|
foreach($metaNodes as $metaNode) {
|
||||||
|
// case in-sensitive since Content-type and utf-8 can be written in many ways
|
||||||
|
if(stristr($metaNode->getAttribute('http-equiv'),'content-type')
|
||||||
|
&& stristr($metaNode->getAttribute('content'),'utf-8')) {
|
||||||
|
$utf8_evidence = true;
|
||||||
|
break;
|
||||||
|
} elseif(stristr($metaNode->getAttribute('charset'),'utf-8')) {
|
||||||
|
$utf8_evidence = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unset($unicode_check_dom);
|
||||||
|
|
||||||
|
// The Content-Type HTTP response header overrides encoding metatags in DOM
|
||||||
|
if(stristr($response->getHeader('Content-Type'),'utf-8')) {
|
||||||
|
$utf8_evidence = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// add utf-8 encoding prolog if we have reason to believe this is utf-8 content
|
||||||
|
// DOMDocument('1.0', 'UTF-8') does not work!
|
||||||
|
$utf8_tag = $utf8_evidence ? '<?xml encoding="utf-8" ?>' : '';
|
||||||
|
|
||||||
$dom = new DOMDocument();
|
$dom = new DOMDocument();
|
||||||
$ok = $dom->loadHTML($body);
|
$ok = $dom->loadHTML($utf8_tag.$body);
|
||||||
unset($body); // storing the DOM in memory is enough...
|
unset($body); // storing the DOM in memory is enough...
|
||||||
error_reporting($old);
|
error_reporting($old);
|
||||||
|
|
||||||
@ -161,11 +199,6 @@ class oEmbedHelper
|
|||||||
|
|
||||||
$oembed_data = HTTPClient::quickGetJson($api, $params);
|
$oembed_data = HTTPClient::quickGetJson($api, $params);
|
||||||
|
|
||||||
// purify html
|
|
||||||
if(isset($oembed_data->html)) {
|
|
||||||
$oembed_data->html = common_purify($oembed_data->html);
|
|
||||||
}
|
|
||||||
|
|
||||||
return $oembed_data;
|
return $oembed_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user