[Embed] Fix usage of EmbedLib

Fix other minor bugs
This commit is contained in:
Diogo Peralta Cordeiro 2021-08-12 00:41:57 +01:00 committed by Hugo Sales
parent 968e3431e1
commit 7a0a6f1f22
Signed by: someonewithpc
GPG Key ID: 7D0C7EAFC9D835A0
2 changed files with 176 additions and 245 deletions

View File

@ -27,9 +27,8 @@
* @author hannes * @author hannes
* @author Mikael Nordfeldth * @author Mikael Nordfeldth
* @author Miguel Dantas * @author Miguel Dantas
* @author Hugo Sales <hugo@hsal.es>
* @author Diogo Peralta Cordeiro <mail@diogo.site> * @author Diogo Peralta Cordeiro <mail@diogo.site>
* @authir Hugo Sales <hugo@hsal.es>
*
* @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org * @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
* @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
*/ */
@ -41,14 +40,19 @@ use App\Core\DB\DB;
use App\Core\Event; use App\Core\Event;
use App\Core\GSFile; use App\Core\GSFile;
use App\Core\HTTPClient; use App\Core\HTTPClient;
use function App\Core\I18n\_m;
use App\Core\Log; use App\Core\Log;
use App\Core\Modules\Plugin; use App\Core\Modules\Plugin;
use App\Core\Router\RouteLoader; use App\Core\Router\RouteLoader;
use App\Core\Router\Router; use App\Core\Router\Router;
use App\Entity\Attachment; use App\Entity\Attachment;
use App\Entity\Note;
use App\Entity\RemoteURL;
use App\Util\Common; use App\Util\Common;
use App\Util\Exception\DuplicateFoundException; use App\Util\Exception\DuplicateFoundException;
use App\Util\Exception\NotFoundException; use App\Util\Exception\NotFoundException;
use App\Util\Exception\ServerException;
use App\Util\Exception\TemporaryFileException;
use App\Util\Formatting; use App\Util\Formatting;
use App\Util\TemporaryFile; use App\Util\TemporaryFile;
use Embed\Embed as LibEmbed; use Embed\Embed as LibEmbed;
@ -64,11 +68,16 @@ use Symfony\Component\HttpFoundation\Request;
*/ */
class Embed extends Plugin class Embed extends Plugin
{ {
public function version(): string
{
return '3.0.0';
}
/** /**
* Settings which can be set in social.local.yaml * Settings which can be set in social.local.yaml
* WARNING, these are _regexps_ (slashes added later). Always escape your dots and end ('$') your strings * WARNING, these are _regexps_ (slashes added later). Always escape your dots and end ('$') your strings
*/ */
public $domain_allowlist = [ public array $domain_whitelist = [
// hostname => service provider // hostname => service provider
'.*' => '', // Default to allowing any host '.*' => '', // Default to allowing any host
]; ];
@ -77,11 +86,11 @@ class Embed extends Plugin
* This code executes when GNU social creates the page routing, and we hook * This code executes when GNU social creates the page routing, and we hook
* on this event to add our action handler for Embed. * on this event to add our action handler for Embed.
* *
* @param $m URLMapper the router that was initialized. * @param $m RouteLoader the router that was initialized.
* *
* @throws Exception * @throws Exception
* *
* @return bool true if successful, the exception object if it isn't. * @return bool
* *
*/ */
public function onAddRoute(RouteLoader $m): bool public function onAddRoute(RouteLoader $m): bool
@ -121,22 +130,35 @@ class Embed extends Plugin
/** /**
* Save embedding information for an Attachment, if applicable. * Save embedding information for an Attachment, if applicable.
* *
* Normally this event is called through File::saveNew() * @param RemoteURL $remote_url
* @param Note $note
* *
* @param Attachment $attachment The newly inserted Attachment object. * @throws DuplicateFoundException
* @throws ServerException
* @throws TemporaryFileException
* *
* @return bool success * @return bool
*/ */
public function onAttachmentStoreNew(Attachment $attachment): bool public function onNewRemoteURLFromNote(RemoteURL $remote_url, Note $note): bool
{ {
try { // Only handle text mime
DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]); if ($remote_url->getMimetypeMajor() !== 'text') {
} catch (NotFoundException) { return Event::next;
if ($attachment->hasRemoteUrl() && $attachment->hasMimetype()) { }
$mimetype = $attachment->getMimetype();
// Ignore if already handled
$attachment_embed = DB::find('attachment_embed', ['remoteurl_id' => $remote_url->getId()]);
if (!is_null($attachment_embed)) {
return Event::next;
}
$mimetype = $remote_url->getMimetype();
if (Formatting::startsWith($mimetype, 'text/html') || Formatting::startsWith($mimetype, 'application/xhtml+xml')) { if (Formatting::startsWith($mimetype, 'text/html') || Formatting::startsWith($mimetype, 'application/xhtml+xml')) {
try { try {
$embed_data = $this->getEmbed($attachment->getRemoteUrl(), $attachment); $embed_data = $this->getEmbed($remote_url->getRemoteUrl());
$embed_data['remoteurl_id'] = $remote_url->getId();
// Create attachment
$embed_data['attachment_id'] = $attachment->getId(); $embed_data['attachment_id'] = $attachment->getId();
DB::persist(Entity\AttachmentEmbed::create($embed_data)); DB::persist(Entity\AttachmentEmbed::create($embed_data));
DB::flush(); DB::flush();
@ -144,50 +166,89 @@ class Embed extends Plugin
Log::warning($e); Log::warning($e);
} }
} }
}
} catch (DuplicateFoundException) {
Log::warning("Strangely, an attachment_embed object exists for new file {$attachment->getID()}");
}
return Event::next; return Event::next;
} }
/** /**
* Replace enclosure representation of an attachment with the data from embed * Perform an oEmbed or OpenGraph lookup for the given $url.
*
* Some known hosts are whitelisted with API endpoints where we
* know they exist but autodiscovery data isn't available.
*
* Throws exceptions on failure.
*
* @param string $url
*
* @return array
*/ */
public function onAttachmentFileInfo(int $attachment_id, ?array &$enclosure) public function getEmbed(string $url): array
{ {
Log::info('Checking for remote URL metadata for ' . $url);
try { try {
$embed = DB::findOneBy('attachment_embed', ['attachment_id' => $attachment_id]); Log::info("Trying to find Embed data for {$url} with 'oscarotero/Embed'");
} catch (NotFoundException) { $embed = new LibEmbed();
return Event::next; $info = $embed->get($url);
$metadata['title'] = $info->title;
$metadata['description'] = $info->description;
$metadata['author_name'] = $info->authorName;
$metadata['author_url'] = (string) $info->authorUrl;
$metadata['provider_name'] = $info->providerName;
$metadata['provider_url'] = (string) $info->providerUrl;
if (!is_null($info->image)) {
$thumbnail_url = (string) $info->image;
} else {
$thumbnail_url = (string) $info->favicon;
} }
// We know about this attachment, so we 'own' it, but know // Check thumbnail URL validity
// that it doesn't have an image $metadata['thumbnail_url'] = $thumbnail_url;
if (!$embed->isImage()) { } catch (Exception $e) {
$enclosure = null; Log::info("Failed to find Embed data for {$url} with 'oscarotero/Embed', got exception: " . $e->getMessage());
return Event::stop;
} }
$enclosure = [ $metadata = self::normalize($metadata);
'filepath' => $embed->getFilepath(), return $metadata;
'mimetype' => $embed->getMimetype(), }
'title' => $embed->getTitle(),
'width' => $embed->getWidth(),
'height' => $embed->getHeight(),
'url' => $embed->getMediaUrl(),
];
return Event::stop; /**
* Normalize fetched info.
*
* @param array $metadata
*
* @return array
*/
public static function normalize(array $metadata): array
{
if (isset($metadata['thumbnail_url'])) {
// sometimes sites serve the path, not the full URL, for images
// let's "be liberal in what you accept from others"!
// add protocol and host if the thumbnail_url starts with /
if ($metadata['thumbnail_url'][0] == '/') {
$thumbnail_url_parsed = parse_url($metadata['thumbnail_url']);
$metadata['thumbnail_url'] = "{$thumbnail_url_parsed['scheme']}://{$thumbnail_url_parsed['host']}{$metadata['url']}";
}
// Some wordpress opengraph implementations sometimes return a white blank image
// no need for us to save that!
if ($metadata['thumbnail_url'] == 'https://s0.wp.com/i/blank.jpg') {
$metadata['thumbnail_url'] = null;
}
}
return $metadata;
} }
/** /**
* Show this attachment enhanced with the corresponding Embed data, if available * Show this attachment enhanced with the corresponding Embed data, if available
*
* @param array $vars * @param array $vars
* @param array $res * @param array $res
*
* @return bool * @return bool
*/ */
public function onViewRemoteAttachment(array $vars, array &$res): bool public function onViewAttachmentText(array $vars, array &$res): bool
{ {
$attachment = $vars['attachment']; $attachment = $vars['attachment'];
try { try {
@ -199,7 +260,7 @@ class Embed extends Plugin
} catch (NotFoundException) { } catch (NotFoundException) {
return Event::next; return Event::next;
} }
if (is_null($embed) && empty($embed->getAuthorName()) && empty($embed->getProvider())) { if (is_null($embed) && empty($embed->getAuthorName()) && empty($embed->getProviderName())) {
Log::debug('Embed doesn\'t have a representation for the attachment #' . $attachment->getId()); Log::debug('Embed doesn\'t have a representation for the attachment #' . $attachment->getId());
return Event::next; return Event::next;
} }
@ -225,12 +286,12 @@ class Embed extends Plugin
{% endif %} {% endif %}
</div> </div>
{% endif %} {% endif %}
{% if embed.getProvider() is not null %} {% if embed.getProviderName() is not null %}
<div class="fn vcard"> <div class="fn vcard">
{% if embed.getProviderUrl() is null %} {% if embed.getProviderUrl() is null %}
<p>{{embed.getProvider()}}</p> <p>{{embed.getProviderName()}}</p>
{% else %} {% else %}
<a href="{{embed.getProviderUrl()}}" class="url">{{embed.getProvider()}}</a> <a href="{{embed.getProviderUrl()}}" class="url">{{embed.getProviderName()}}</a>
{% endif %} {% endif %}
</div> </div>
{% endif %} {% endif %}
@ -253,20 +314,20 @@ END, ['embed' => $embed, 'attributes' => $attributes, 'attachment' => $attachmen
* *
* *
*/ */
protected function checkAllowlist(string $url): string | bool protected function checkWhitelist(string $url): string | bool
{ {
if ($this->check_allowlist ?? false) { if ($this->check_whitelist ?? false) {
return false; // indicates "no check made" return false; // indicates "no check made"
} }
$host = parse_url($url, PHP_URL_HOST); $host = parse_url($url, PHP_URL_HOST);
foreach ($this->domain_allowlist as $regex => $provider) { foreach ($this->domain_whitelist as $regex => $provider) {
if (preg_match("/{$regex}/", $host)) { if (preg_match("/{$regex}/", $host)) {
return $provider; // we trust this source, return provider name return $provider; // we trust this source, return provider name
} }
} }
throw new ServerException(_m('Domain not in remote thumbnail source allowlist: {host}', ['host' => $host])); throw new ServerException(_m('Domain not in remote thumbnail source whitelist: {host}', ['host' => $host]));
} }
/** /**
@ -299,8 +360,8 @@ END, ['embed' => $embed, 'attributes' => $attributes, 'attachment' => $attachmen
} }
/** /**
* A private helper function that uses a HEAD request to check the mime type * A private helper function that uses a HEAD request to check the mimetype
* of a remote URL to see it it's an image. * of a remote URL to see if it's an image.
* *
* @param mixed $url * @param mixed $url
* @param null|mixed $headers * @param null|mixed $headers
@ -327,7 +388,7 @@ END, ['embed' => $embed, 'attributes' => $attributes, 'attachment' => $attachmen
} }
/** /**
* Validate that $imgData is a valid image, place it in it's folder and resize * Validate that $imgData is a valid image, place it in its folder and resize
* *
* @param $imgData - The image data to validate * @param $imgData - The image data to validate
* @param null|array $headers - The headers possible previous request to $url * @param null|array $headers - The headers possible previous request to $url
@ -377,7 +438,7 @@ END, ['embed' => $embed, 'attributes' => $attributes, 'attachment' => $attachmen
$width = $info[0]; $width = $info[0];
$height = $info[1]; $height = $info[1];
} else { } else {
$this->checkAllowlist($media_url); $this->checkWhitelist($media_url);
$head = HTTPClient::head($media_url); $head = HTTPClient::head($media_url);
$headers = $head->getHeaders(); $headers = $head->getHeaders();
$headers = array_change_key_case($headers, CASE_LOWER); $headers = array_change_key_case($headers, CASE_LOWER);
@ -419,84 +480,23 @@ END, ['embed' => $embed, 'attributes' => $attributes, 'attachment' => $attachmen
} }
/** /**
* Perform an oEmbed or OpenGraph lookup for the given $url. * Event raised when GNU social polls the plugin for information about it.
* Adds this plugin's version information to $versions array
* *
* Some known hosts are allowlisted with API endpoints where we * @param &$versions array inherited from parent
* know they exist but autodiscovery data isn't available.
* *
* Throws exceptions on failure. * @return bool true hook value
*
* @param string $url
* @param Attachment $attachment
*
* @return array
*/ */
public function getEmbed(string $url, Attachment $attachment): array public function onPluginVersion(array &$versions): bool
{ {
Log::info('Checking for remote URL metadata for ' . $url); $versions[] = [
'name' => 'Embed',
try { 'version' => $this->version(),
Log::info("Trying to find Embed data for {$url} with 'oscarotero/Embed'"); 'author' => 'Mikael Nordfeldth, Hugo Sales, Diogo Peralta Cordeiro',
$embed = new LibEmbed(); 'homepage' => GNUSOCIAL_PROJECT_URL,
$info = $embed->get($url); 'description' => // TRANS: Plugin description.
$metadata['title'] = $info->title; _m('Plugin for using and representing oEmbed, OpenGraph and other data.'),
$metadata['html'] = $info->description; ];
$metadata['author_name'] = $info->authorName; return Event::next;
$metadata['author_url'] = $info->authorUrl;
$metadata['provider_name'] = $info->providerName;
$metadata['provider_url'] = $info->providerUrl;
if (!is_null($info->image)) {
$image_url = (string) $info->image;
if (Formatting::startsWith($image_url, 'data')) {
// Inline image
$imgData = base64_decode(substr($info->image, stripos($info->image, 'base64,') + 7));
[$filepath, $width, $height, $original_name, $mimetype] = $this->validateAndWriteImage($imgData);
} else {
[$filepath, $width, $height, $original_name, $mimetype] = $this->fetchValidateWriteRemoteImage($attachment, $image_url);
}
$metadata['width'] = $width;
$metadata['height'] = $height;
$metadata['mimetype'] = $mimetype;
$metadata['media_url'] = $image_url;
$metadata['filename'] = Formatting::removePrefix($filepath, Common::config('storage', 'dir'));
}
} catch (Exception $e) {
Log::info("Failed to find Embed data for {$url} with 'oscarotero/Embed', got exception: " . $e->getMessage());
}
$metadata = self::normalize($metadata);
$attachment->setTitle($metadata['title']);
return $metadata;
}
/**
* Normalize fetched info.
*/
public static function normalize(array $data): array
{
if (isset($metadata['url'])) {
// sometimes sites serve the path, not the full URL, for images
// let's "be liberal in what you accept from others"!
// add protocol and host if the thumbnail_url starts with /
if ($metadata['url'][0] == '/') {
$thumbnail_url_parsed = parse_url($metadata['url']);
$metadata['url'] = "{$thumbnail_url_parsed['scheme']}://{$thumbnail_url_parsed['host']}{$metadata['url']}";
}
// Some wordpress opengraph implementations sometimes return a white blank image
// no need for us to save that!
if ($metadata['url'] == 'https://s0.wp.com/i/blank.jpg') {
$metadata['url'] = null;
}
if (!isset($data['width'])) {
$data['width'] = Common::config('plugin_embed', 'width');
$data['height'] = Common::config('plugin_embed', 'height');
}
}
return $data;
} }
} }

View File

@ -33,9 +33,6 @@
namespace Plugin\Embed\Entity; namespace Plugin\Embed\Entity;
use App\Core\Entity; use App\Core\Entity;
use App\Core\GSFile;
use App\Core\Router\Router;
use App\Util\Common;
use DateTimeInterface; use DateTimeInterface;
/** /**
@ -49,62 +46,53 @@ class AttachmentEmbed extends Entity
{ {
// {{{ Autocode // {{{ Autocode
// @codeCoverageIgnoreStart // @codeCoverageIgnoreStart
private int $remoteurl_id;
private int $attachment_id; private int $attachment_id;
private ?string $mimetype;
private ?string $filename;
private ?string $provider;
private ?string $provider_url;
private ?int $width;
private ?int $height;
private ?string $html;
private ?string $title; private ?string $title;
private ?string $description;
private ?string $provider_name;
private ?string $provider_url;
private ?string $author_name; private ?string $author_name;
private ?string $author_url; private ?string $author_url;
private ?string $media_url; private ?string $thumbnail_url;
private \DateTimeInterface $modified; private \DateTimeInterface $modified;
public function setAttachmentId(int $attachment_id): self public function setRemoteUrlId(int $remoteurl_id): self
{ {
$this->attachment_id = $attachment_id; $this->remoteurl_id = $remoteurl_id;
return $this; return $this;
} }
public function getRemoteUrlId(): int
{
return $this->remoteurl_id;
}
/**
* @return int
*/
public function getAttachmentId(): int public function getAttachmentId(): int
{ {
return $this->attachment_id; return $this->attachment_id;
} }
public function setMimetype(?string $mimetype): self /**
* @param int $attachment_id
*/
public function setAttachmentId(int $attachment_id): void
{ {
$this->mimetype = $mimetype; $this->attachment_id = $attachment_id;
}
public function setProviderName(?string $provider_name): self
{
$this->provider_name = $provider_name;
return $this; return $this;
} }
public function getMimetype(): ?string public function getProviderName(): ?string
{ {
return $this->mimetype; return $this->provider_name;
}
public function setFilename(?string $filename): self
{
$this->filename = $filename;
return $this;
}
public function getFilename(): ?string
{
return $this->filename;
}
public function setProvider(?string $provider): self
{
$this->provider = $provider;
return $this;
}
public function getProvider(): ?string
{
return $this->provider;
} }
public function setProviderUrl(?string $provider_url): self public function setProviderUrl(?string $provider_url): self
@ -118,37 +106,15 @@ class AttachmentEmbed extends Entity
return $this->provider_url; return $this->provider_url;
} }
public function setWidth(?int $width): self public function setDescription(?string $description): self
{ {
$this->width = $width; $this->description = $description;
return $this; return $this;
} }
public function getWidth(): ?int public function getDescription(): ?string
{ {
return $this->width; return $this->description;
}
public function setHeight(?int $height): self
{
$this->height = $height;
return $this;
}
public function getHeight(): ?int
{
return $this->height;
}
public function setHtml(?string $html): self
{
$this->html = $html;
return $this;
}
public function getHtml(): ?string
{
return $this->html;
} }
public function setTitle(?string $title): self public function setTitle(?string $title): self
@ -184,15 +150,15 @@ class AttachmentEmbed extends Entity
return $this->author_url; return $this->author_url;
} }
public function setMediaUrl(?string $media_url): self public function setThumbnailUrl(?string $thumbnail_url): self
{ {
$this->media_url = $media_url; $this->thumbnail_url = $thumbnail_url;
return $this; return $this;
} }
public function getMediaUrl(): ?string public function getThumbnailUrl(): ?string
{ {
return $this->media_url; return $this->thumbnail_url;
} }
public function setModified(DateTimeInterface $modified): self public function setModified(DateTimeInterface $modified): self
@ -209,59 +175,24 @@ class AttachmentEmbed extends Entity
// @codeCoverageIgnoreEnd // @codeCoverageIgnoreEnd
// }}} Autocode // }}} Autocode
public function getAttachmentUrl()
{
return Router::url('attachment_view', ['id' => $this->getAttachmentId()]);
}
public function isImage()
{
return isset($this->mimetype) && GSFile::mimetypeMajor($this->mimetype) == 'image';
}
/**
* Get the HTML attributes for this attachment
*/
public function getImageHTMLAttributes(array $orig = [], bool $overwrite = true)
{
if ($this->isImage()) {
$attrs = [
'height' => $this->getHeight(),
'width' => $this->getWidth(),
'src' => $this->getAttachmentUrl(),
];
return $overwrite ? array_merge($orig, $attrs) : array_merge($attrs, $orig);
} else {
return false;
}
}
public function getFilepath()
{
return Common::config('storage', 'dir') . $this->filename;
}
public static function schemaDef() public static function schemaDef()
{ {
return [ return [
'name' => 'attachment_embed', 'name' => 'attachment_embed',
'fields' => [ 'fields' => [
'attachment_id' => ['type' => 'int', 'not null' => true, 'description' => 'Embed for that URL/file'], 'remoteurl_id' => ['type' => 'int', 'not null' => true, 'description' => 'Embed for that URL/file'],
'mimetype' => ['type' => 'varchar', 'length' => 50, 'description' => 'mime type of resource'], 'attachment_id' => ['type' => 'int', 'not null' => true, 'description' => 'Attachment relation, used to show previews'],
'filename' => ['type' => 'varchar', 'length' => 191, 'description' => 'file name of resource when available'], 'provider_name' => ['type' => 'text', 'description' => 'name of this Embed provider'],
'provider' => ['type' => 'text', 'description' => 'name of this oEmbed provider'], 'provider_url' => ['type' => 'text', 'description' => 'URL of this Embed provider'],
'provider_url' => ['type' => 'text', 'description' => 'URL of this oEmbed provider'],
'width' => ['type' => 'int', 'description' => 'width of oEmbed resource when available'],
'height' => ['type' => 'int', 'description' => 'height of oEmbed resource when available'],
'html' => ['type' => 'text', 'description' => 'html representation of this Embed resource when applicable'],
'title' => ['type' => 'text', 'description' => 'title of Embed resource when available'], 'title' => ['type' => 'text', 'description' => 'title of Embed resource when available'],
'author_name' => ['type' => 'text', 'description' => 'author name for this Embed resource'], 'author_name' => ['type' => 'text', 'description' => 'author name for this Embed resource'],
'author_url' => ['type' => 'text', 'description' => 'author URL for this Embed resource'], 'author_url' => ['type' => 'text', 'description' => 'author URL for this Embed resource'],
'media_url' => ['type' => 'text', 'description' => 'URL for this Embed resource when applicable (photo, link)'], 'thumbnail_url' => ['type' => 'text', 'description' => 'URL for this Embed resource when applicable (photo, link)'],
'modified' => ['type' => 'timestamp', 'not null' => true, 'description' => 'date this record was modified'], 'modified' => ['type' => 'timestamp', 'not null' => true, 'description' => 'date this record was modified'],
], ],
'primary key' => ['attachment_id'], 'primary key' => ['remoteurl_id'],
'foreign keys' => [ 'foreign keys' => [
'attachment_embed_remoteurl_id_fkey' => ['remoteurl', ['remoteurl_id' => 'id']],
'attachment_embed_attachment_id_fkey' => ['attachment', ['attachment_id' => 'id']], 'attachment_embed_attachment_id_fkey' => ['attachment', ['attachment_id' => 'id']],
], ],
]; ];