From 462ea2630311bd3d4a9288dbf94a7c03105a1d5e Mon Sep 17 00:00:00 2001 From: Miguel Dantas Date: Sun, 7 Jul 2019 13:26:10 +0100 Subject: [PATCH] [Embed] Using oscarotero/Embed as first attempt to get oEmbed/OpenGraph data, fallback to previous implementation otherwise. --- plugins/Embed/EmbedPlugin.php | 82 +++++++++++++++++----------- plugins/Embed/classes/File_embed.php | 4 +- plugins/Embed/lib/embedhelper.php | 12 ++-- plugins/Embed/scripts/poll_embed.php | 2 +- plugins/Embed/tests/EmbedTest.php | 2 +- 5 files changed, 61 insertions(+), 41 deletions(-) diff --git a/plugins/Embed/EmbedPlugin.php b/plugins/Embed/EmbedPlugin.php index f8d0d14d8a..b129e08ed1 100644 --- a/plugins/Embed/EmbedPlugin.php +++ b/plugins/Embed/EmbedPlugin.php @@ -29,6 +29,8 @@ defined('GNUSOCIAL') || die(); +use Embed\Embed; + /** * Base class for the Embed plugin that does most of the heavy lifting to get * and display representations for remote content. @@ -100,7 +102,7 @@ class EmbedPlugin extends Plugin /** * This event executes when GNU social encounters a remote URL we then decide - * to interrogate for metadata. Embed gloms onto it to see if we have an + * to interrogate for metadata. Embed gloms onto it to see if we have an * oEmbed endpoint or image to try to represent in the post. * * @param $url string the remote URL we're looking at @@ -110,41 +112,58 @@ class EmbedPlugin extends Plugin */ public function onGetRemoteUrlMetadataFromDom($url, DOMDocument $dom, stdClass &$metadata) { + try { - common_log(LOG_INFO, 'Trying to discover an oEmbed endpoint using link headers.'); - $api = oEmbedHelper::oEmbedEndpointFromHTML($dom); - common_log(LOG_INFO, 'Found oEmbed API endpoint ' . $api . ' for URL ' . $url); - $params = array( - 'maxwidth' => common_config('thumbnail', 'width'), - 'maxheight' => common_config('thumbnail', 'height'), - ); - $metadata = oEmbedHelper::getOembedFrom($api, $url, $params); + common_log(LOG_INFO, "Trying to find Embed data for {$url} with 'oscarotero/Embed'"); + $info = Embed::create($url); - // Facebook just gives us javascript in its oembed html, - // so use the content of the title element instead - if (strpos($url, 'https://www.facebook.com/') === 0) { - $metadata->html = @$dom->getElementsByTagName('title')->item(0)->nodeValue; - } + $metadata->version = '1.0'; // Yes. + $metadata->provider_name = $info->authorName; + $metadata->title = $info->title; + $metadata->html = common_purify($info->description); + $metadata->type = $info->type; + $metadata->url = $info->url; + $metadata->thumbnail_url = $info->image; + $metadata->thumbnail_height = $info->imageHeight; + $metadata->thumbnail_width = $info->imageWidth; + } catch (Exception $e) { + common_log(LOG_INFO, "Failed to find Embed data for {$url} with 'oscarotero/Embed'"); + try { + common_log(LOG_INFO, "Trying to discover an oEmbed endpoint for {$url} using link headers."); + $api = EmbedHelper::oEmbedEndpointFromHTML($dom); + common_log(LOG_INFO, 'Found oEmbed API endpoint ' . $api . ' for URL ' . $url); + $params = array( + 'maxwidth' => common_config('thumbnail', 'width'), + 'maxheight' => common_config('thumbnail', 'height'), + ); + $metadata = EmbedHelper::getOembedFrom($api, $url, $params); + // Facebook just gives us javascript in its oembed html, + // so use the content of the title element instead + if (strpos($url, 'https://www.facebook.com/') === 0) { + $metadata->html = @$dom->getElementsByTagName('title')->item(0)->nodeValue; + } - // Wordpress sometimes also just gives us javascript, use og:description if it is available - $xpath = new DomXpath($dom); - $generatorNode = @$xpath->query('//meta[@name="generator"][1]')->item(0); - if ($generatorNode instanceof DomElement) { - // when wordpress only gives us javascript, the html stripped from tags - // is the same as the title, so this helps us to identify this (common) case - if (strpos($generatorNode->getAttribute('content'), 'WordPress') === 0 - && trim(strip_tags($metadata->html)) == trim($metadata->title)) { - $propertyNode = @$xpath->query('//meta[@property="og:description"][1]')->item(0); - if ($propertyNode instanceof DomElement) { - $metadata->html = $propertyNode->getAttribute('content'); + // Wordpress sometimes also just gives us javascript, use og:description if it is available + $xpath = new DomXpath($dom); + $generatorNode = @$xpath->query('//meta[@name="generator"][1]')->item(0); + if ($generatorNode instanceof DomElement) { + // when wordpress only gives us javascript, the html stripped from tags + // is the same as the title, so this helps us to identify this (common) case + if (strpos($generatorNode->getAttribute('content'), 'WordPress') === 0 + && trim(strip_tags($metadata->html)) == trim($metadata->title)) { + $propertyNode = @$xpath->query('//meta[@property="og:description"][1]')->item(0); + if ($propertyNode instanceof DomElement) { + $metadata->html = $propertyNode->getAttribute('content'); + } } } + } catch (Exception $e) { + // FIXME - make sure the error was because we couldn't get metadata, not something else! -mb + common_log(LOG_INFO, 'Could not find an oEmbed endpoint using link headers, ' . + 'trying OpenGraph from HTML.'); + // Just ignore it! + $metadata = OpenGraphHelper::ogFromHtml($dom); } - } catch (Exception $e) { - // FIXME - make sure the error was because we couldn't get metadata, not something else! -mb - common_log(LOG_INFO, 'Could not find an oEmbed endpoint using link headers, trying OpenGraph from HTML.'); - // Just ignore it! - $metadata = OpenGraphHelper::ogFromHtml($dom); } if (isset($metadata->thumbnail_url)) { @@ -153,7 +172,8 @@ class EmbedPlugin extends Plugin // add protocol and host if the thumbnail_url starts with / if (substr($metadata->thumbnail_url, 0, 1) == '/') { $thumbnail_url_parsed = parse_url($metadata->url); - $metadata->thumbnail_url = $thumbnail_url_parsed['scheme']."://".$thumbnail_url_parsed['host'].$metadata->thumbnail_url; + $metadata->thumbnail_url = $thumbnail_url_parsed['scheme']."://". + $thumbnail_url_parsed['host'].$metadata->thumbnail_url; } // some wordpress opengraph implementations sometimes return a white blank image diff --git a/plugins/Embed/classes/File_embed.php b/plugins/Embed/classes/File_embed.php index 8dc008bbdf..c927da9f5b 100644 --- a/plugins/Embed/classes/File_embed.php +++ b/plugins/Embed/classes/File_embed.php @@ -77,10 +77,10 @@ class File_embed extends Managed_DataObject ); } - public static function _getOembed($url) + public static function _getEmbed($url) { try { - return oEmbedHelper::getObject($url); + return EmbedHelper::getObject($url); } catch (Exception $e) { common_log(LOG_INFO, "Error during oembed lookup for $url - " . $e->getMessage()); return false; diff --git a/plugins/Embed/lib/embedhelper.php b/plugins/Embed/lib/embedhelper.php index 683b9ad6ce..a2c8e64142 100644 --- a/plugins/Embed/lib/embedhelper.php +++ b/plugins/Embed/lib/embedhelper.php @@ -104,7 +104,7 @@ class EmbedHelper $unicode_check_dom = new DOMDocument(); $ok = $unicode_check_dom->loadHTML($body); if (!$ok) { - throw new oEmbedHelper_BadHtmlException(); + throw new EmbedHelper_BadHtmlException(); } $metaNodes = $unicode_check_dom->getElementsByTagName('meta'); foreach ($metaNodes as $metaNode) { @@ -135,7 +135,7 @@ class EmbedHelper error_reporting($old); if (!$ok) { - throw new oEmbedHelper_BadHtmlException(); + throw new EmbedHelper_BadHtmlException(); } Event::handle('GetRemoteUrlMetadataFromDom', array($url, $dom, &$metadata)); @@ -185,7 +185,7 @@ class EmbedHelper } } - throw new oEmbedHelper_DiscoveryException(); + throw new EmbedHelper_DiscoveryException(); } /** @@ -245,7 +245,7 @@ class EmbedHelper } } -class oEmbedHelper_Exception extends Exception +class EmbedHelper_Exception extends Exception { public function __construct($message = "", $code = 0, $previous = null) { @@ -253,7 +253,7 @@ class oEmbedHelper_Exception extends Exception } } -class oEmbedHelper_BadHtmlException extends oEmbedHelper_Exception +class EmbedHelper_BadHtmlException extends EmbedHelper_Exception { public function __construct($previous=null) { @@ -261,7 +261,7 @@ class oEmbedHelper_BadHtmlException extends oEmbedHelper_Exception } } -class oEmbedHelper_DiscoveryException extends oEmbedHelper_Exception +class EmbedHelper_DiscoveryException extends EmbedHelper_Exception { public function __construct($previous=null) { diff --git a/plugins/Embed/scripts/poll_embed.php b/plugins/Embed/scripts/poll_embed.php index 135f88cf15..2eb314bc74 100755 --- a/plugins/Embed/scripts/poll_embed.php +++ b/plugins/Embed/scripts/poll_embed.php @@ -51,7 +51,7 @@ $url = get_option_value('u', 'url'); print "Contacting URL\n"; -$oEmbed = oEmbedHelper::getObject($url); +$oEmbed = EmbedHelper::getObject($url); var_dump($oEmbed); print "\nDONE.\n"; diff --git a/plugins/Embed/tests/EmbedTest.php b/plugins/Embed/tests/EmbedTest.php index bdd01504d1..023dd269a4 100644 --- a/plugins/Embed/tests/EmbedTest.php +++ b/plugins/Embed/tests/EmbedTest.php @@ -92,7 +92,7 @@ class oEmbedTest extends PHPUnit_Framework_TestCase public function _doTest($url, $expectedType) { try { - $data = oEmbedHelper::getObject($url); + $data = EmbedHelper::getObject($url); $this->assertEquals($expectedType, $data->type); if ($data->type == 'photo') { $this->assertTrue(!empty($data->url), 'Photo must have a URL.');