[Embed] Fix some bugs and change AttachmentEmbed::url to ::media_url

This commit is contained in:
Diogo Peralta Cordeiro 2021-05-01 22:45:47 +01:00
parent ced6e236ce
commit 5a40d1f3e3
3 changed files with 100 additions and 100 deletions

View File

@ -45,7 +45,6 @@ use App\Core\Log;
use App\Core\Modules\Plugin; use App\Core\Modules\Plugin;
use App\Core\Router\RouteLoader; use App\Core\Router\RouteLoader;
use App\Core\Router\Router; use App\Core\Router\Router;
use App\Core\Security;
use App\Entity\Attachment; use App\Entity\Attachment;
use App\Entity\AttachmentThumbnail; use App\Entity\AttachmentThumbnail;
use App\Util\Common; use App\Util\Common;
@ -81,11 +80,11 @@ class Embed extends Plugin
* *
* @param $m URLMapper the router that was initialized. * @param $m URLMapper the router that was initialized.
* *
* @return bool true if successful, the exception object if it isn't.
* @throws Exception * @throws Exception
* *
* @return void true if successful, the exception object if it isn't.
*/ */
public function onAddRoute(RouteLoader $m) public function onAddRoute(RouteLoader $m): bool
{ {
$m->connect('oembed', 'main/oembed', Controller\Embed::class); $m->connect('oembed', 'main/oembed', Controller\Embed::class);
$m->connect('embed', 'main/embed', Controller\Embed::class); $m->connect('embed', 'main/embed', Controller\Embed::class);
@ -100,20 +99,20 @@ class Embed extends Plugin
$matches = []; $matches = [];
preg_match(',/?([^/]+)/?(.*),', $request->getPathInfo(), $matches); preg_match(',/?([^/]+)/?(.*),', $request->getPathInfo(), $matches);
switch ($matches[1]) { switch ($matches[1]) {
case 'attachment': case 'attachment':
$url = "{$matches[1]}/{$matches[2]}"; $url = "{$matches[1]}/{$matches[2]}";
break; break;
} }
if (isset($url)) { if (isset($url)) {
foreach (['xml', 'json'] as $format) { foreach (['xml', 'json'] as $format) {
$result[] = [ $result[] = [
'link' => [ 'link' => [
'rel' => 'alternate', 'rel' => 'alternate',
'type' => "application/{$format}+oembed", 'type' => "application/{$format}+oembed",
'href' => Router::url('embed', ['format' => $format, 'url' => $url]), 'href' => Router::url('embed', ['format' => $format, 'url' => $url]),
'title' => 'oEmbed', 'title' => 'oEmbed',
], ]; ],];
} }
} }
return Event::next; return Event::next;
@ -128,29 +127,26 @@ class Embed extends Plugin
* *
* @return bool success * @return bool success
*/ */
public function onAttachmentStoreNew(Attachment $attachment) public function onAttachmentStoreNew(Attachment $attachment): bool
{ {
try { try {
DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]); DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]);
} catch (NotFoundException) { } catch (NotFoundException) {
} catch (DuplicateFoundException) { if ($attachment->hasRemoteUrl() && $attachment->hasMimetype()) {
Log::warning("Strangely, an attachment_embed object exists for new file {$attachment->getID()}"); $mimetype = $attachment->getMimetype();
return Event::next; if (Formatting::startsWith($mimetype, 'text/html') || Formatting::startsWith($mimetype, 'application/xhtml+xml')) {
} try {
$embed_data = $this->getEmbed($attachment->getRemoteUrl(), $attachment);
if ($attachment->hasRemoteUrl() && $attachment->hasMimetype()) { $embed_data['attachment_id'] = $attachment->getId();
$mimetype = $attachment->getMimetype(); DB::persist(Entity\AttachmentEmbed::create($embed_data));
if (Formatting::startsWith($mimetype, 'text/html') || Formatting::startsWith($mimetype, 'application/xhtml+xml')) { DB::flush();
try { } catch (Exception $e) {
$embed_data = $this->getEmbed($attachment->getRemoteUrl(), $attachment); Log::warning($e);
$embed_data['attachment_id'] = $attachment->getId(); }
DB::persist(Entity\AttachmentEmbed::create($embed_data));
DB::flush();
} catch (Exception $e) {
Log::warning($e);
return Event::next;
} }
} }
} catch (DuplicateFoundException) {
Log::warning("Strangely, an attachment_embed object exists for new file {$attachment->getID()}");
} }
return Event::next; return Event::next;
} }
@ -176,10 +172,10 @@ class Embed extends Plugin
$enclosure = [ $enclosure = [
'filepath' => $embed->getFilepath(), 'filepath' => $embed->getFilepath(),
'mimetype' => $embed->getMimetype(), 'mimetype' => $embed->getMimetype(),
'title' => $embed->getTitle(), 'title' => $embed->getTitle(),
'width' => $embed->getWidth(), 'width' => $embed->getWidth(),
'height' => $embed->getHeight(), 'height' => $embed->getHeight(),
'url' => $embed->getUrl(), 'url' => $embed->getMediaUrl(),
]; ];
return Event::stop; return Event::stop;
@ -192,7 +188,7 @@ class Embed extends Plugin
{ {
try { try {
$embed = Cache::get('attachment-embed-' . $attachment->getId(), $embed = Cache::get('attachment-embed-' . $attachment->getId(),
fn () => DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()])); fn() => DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]));
} catch (DuplicateFoundException $e) { } catch (DuplicateFoundException $e) {
Log::waring($e); Log::waring($e);
return Event::next; return Event::next;
@ -213,7 +209,7 @@ class Embed extends Plugin
<img class="u-photo embed" width="{{attributes['width']}}" height="{{attributes['height']}}" src="{{attributes['src']}}" /> <img class="u-photo embed" width="{{attributes['width']}}" height="{{attributes['height']}}" src="{{attributes['src']}}" />
{% endif %} {% endif %}
<h5 class="p-name embed"> <h5 class="p-name embed">
<a class="u-url" href="{{embed.getUrl()}}">{{embed.getTitle() | escape}}</a> <a class="u-url" href="{{attachment.getRemoteUrl()}}">{{embed.getTitle() | escape}}</a>
</h5> </h5>
<div class="p-author embed"> <div class="p-author embed">
{% if embed.getAuthorName() is not null %} {% if embed.getAuthorName() is not null %}
@ -240,17 +236,19 @@ class Embed extends Plugin
{{ embed.getHtml() | escape }} {{ embed.getHtml() | escape }}
</div> </div>
</article> </article>
END, ['embed' => $embed, 'attributes' => $attributes]); END, ['embed' => $embed, 'attributes' => $attributes, 'attachment' => $attachment]);
return Event::stop; return Event::stop;
} }
/** /**
* @return bool false on no check made, provider name on success
* @return string|false on no check made, provider name on success
*
* @throws ServerException if check is made but fails * @throws ServerException if check is made but fails
* *
* @return bool false on no check made, provider name on success
*/ */
protected function checkAllowlist(string $url) protected function checkAllowlist(string $url): string|bool
{ {
if ($this->check_allowlist ?? false) { if ($this->check_allowlist ?? false) {
return false; // indicates "no check made" return false; // indicates "no check made"
@ -272,9 +270,9 @@ END, ['embed' => $embed, 'attributes' => $attributes]);
* reliable enough for our purposes. * reliable enough for our purposes.
* *
* @param string $url * @param string $url
* @param array $headers - if we already made a request * @param array|null $headers - if we already made a request
* *
* @return bool|string the file size if it succeeds, false otherwise. * @return int|null the file size if it succeeds, false otherwise.
*/ */
private function getRemoteFileSize(string $url, ?array $headers = null): ?int private function getRemoteFileSize(string $url, ?array $headers = null): ?int
{ {
@ -284,7 +282,7 @@ END, ['embed' => $embed, 'attributes' => $attributes]);
Log::error('Invalid URL in Embed::getRemoteFileSize()'); Log::error('Invalid URL in Embed::getRemoteFileSize()');
return false; return false;
} }
$head = HTTPClient::head($url); $head = HTTPClient::head($url);
$headers = $head->getHeaders(); $headers = $head->getHeaders();
$headers = array_change_key_case($headers, CASE_LOWER); $headers = array_change_key_case($headers, CASE_LOWER);
} }
@ -299,12 +297,12 @@ END, ['embed' => $embed, 'attributes' => $attributes]);
* A private helper function that uses a HEAD request to check the mime type * A private helper function that uses a HEAD request to check the mime type
* of a remote URL to see it it's an image. * of a remote URL to see it it's an image.
* *
* @param mixed $url * @param mixed $url
* @param null|mixed $headers * @param null|mixed $headers
* *
* @return bool true if the remote URL is an image, or false otherwise. * @return bool true if the remote URL is an image, or false otherwise.
*/ */
private function isRemoteImage(string $url, ?array $headers = null): ?int private function isRemoteImage(string $url, ?array $headers = null): bool
{ {
try { try {
if ($headers === null) { if ($headers === null) {
@ -312,13 +310,13 @@ END, ['embed' => $embed, 'attributes' => $attributes]);
Log::error('Invalid URL in Embed::getRemoteFileSize()'); Log::error('Invalid URL in Embed::getRemoteFileSize()');
return false; return false;
} }
$head = HTTPClient::head($url); $head = HTTPClient::head($url);
$headers = $head->getHeaders(); $headers = $head->getHeaders();
$headers = array_change_key_case($headers, CASE_LOWER); $headers = array_change_key_case($headers, CASE_LOWER);
} }
return !empty($headers['content-type']) && GSFile::mimetypeMajor($headers['content-type'][0]) === 'image'; return !empty($headers['content-type']) && GSFile::mimetypeMajor($headers['content-type'][0]) === 'image';
} catch (Exception $e) { } catch (Exception $e) {
Loog::error($e); Log::error($e);
return false; return false;
} }
} }
@ -327,25 +325,24 @@ END, ['embed' => $embed, 'attributes' => $attributes]);
* Validate that $imgData is a valid image, place it in it's folder and resize * Validate that $imgData is a valid image, place it in it's folder and resize
* *
* @param $imgData - The image data to validate * @param $imgData - The image data to validate
* @param null|string $url - The url where the image came from, to fetch metadata * @param null|array $headers - The headers possible previous request to $url
* @param null|array $headers - The headers possible previous request to $url
*/ */
protected function validateAndWriteImage($imgData, string $url, array $headers): array protected function validateAndWriteImage($imgData, ?array $headers = null): array
{ {
$file = new TemporaryFile(); $file = new TemporaryFile();
$file->write($imgData); $file->write($imgData);
Event::handle('HashFile', [$file->getRealPath(), &$hash]); Event::handle('HashFile', [$file->getRealPath(), &$hash]);
$filepath = Common::config('storage', 'dir') . "embed/{$hash}" . Common::config('thumbnail', 'extension'); $filepath = Common::config('storage', 'dir') . "embed/{$hash}" . Common::config('thumbnail', 'extension');
$width = Common::config('plugin_embed', 'width'); $width = Common::config('plugin_embed', 'width');
$height = Common::config('plugin_embed', 'height'); $height = Common::config('plugin_embed', 'height');
$smart_crop = Common::config('plugin_embed', 'smart_crop'); $smart_crop = Common::config('plugin_embed', 'smart_crop');
Event::handle('ResizeImagePath', [$file->getRealPath(), $filepath, &$width, &$height, $smart_crop, &$mimetype]); Event::handle('ResizeImagePath', [$file->getRealPath(), $filepath, &$width, &$height, $smart_crop, &$mimetype]);
unset($file); unset($file);
if (array_key_exists('content-disposition', $headers) && preg_match('/^.+; filename="(.+?)"$/', $headers['content-disposition'][0], $matches) === 1) { if (!is_null($headers) && array_key_exists('content-disposition', $headers) && preg_match('/^.+; filename="(.+?)"$/', $headers['content-disposition'][0], $matches) === 1) {
$original_name = $matches[1]; $original_name = $matches[1];
} }
@ -354,32 +351,34 @@ END, ['embed' => $embed, 'attributes' => $attributes]);
/** /**
* Create and store a thumbnail representation of a remote image * Create and store a thumbnail representation of a remote image
*
* @param Attachment $attachment
* @param string $media_url URL for the actual media representation
* @return array|bool
*/ */
protected function storeRemoteThumbnail(Attachment $attachment): array | bool protected function storeRemoteThumbnail(Attachment $attachment, string $media_url): array|bool
{ {
if ($attachment->hasFilename() && file_exists($attachment->getPath())) { if ($attachment->hasFilename() && file_exists($attachment->getPath())) {
throw new AlreadyFulfilledException(_m('A thumbnail seems to already exist for remote file with id=={id}', ['id' => $attachment->getId()])); throw new AlreadyFulfilledException(_m('A thumbnail seems to already exist for remote file with id=={id}', ['id' => $attachment->getId()]));
} }
$url = $attachment->getRemoteUrl(); if (Formatting::startsWith($media_url, 'file://')) {
$filename = Formatting::removePrefix($media_url, 'file://');
if (Formatting::startsWith($url, 'file://')) { $info = getimagesize($filename);
$filename = Formatting::removePrefix($url, 'file://');
$info = getimagesize($filename);
$filename = basename($filename); $filename = basename($filename);
$width = $info[0]; $width = $info[0];
$height = $info[1]; $height = $info[1];
} else { } else {
$this->checkAllowlist($url); $this->checkAllowlist($media_url);
$head = HTTPClient::head($url); $head = HTTPClient::head($media_url);
$headers = $head->getHeaders(); $headers = $head->getHeaders();
$headers = array_change_key_case($headers, CASE_LOWER); $headers = array_change_key_case($headers, CASE_LOWER);
try { try {
$is_image = $this->isRemoteImage($url, $headers); $is_image = $this->isRemoteImage($media_url, $headers);
if ($is_image == true) { if ($is_image == true) {
$file_size = $this->getRemoteFileSize($url, $headers); $file_size = $this->getRemoteFileSize($media_url, $headers);
$max_size = Common::config('attachments', 'file_quota'); $max_size = Common::config('attachments', 'file_quota');
if (($file_size != false) && ($file_size > $max_size)) { if (($file_size != false) && ($file_size > $max_size)) {
throw new \Exception("Wanted to store remote thumbnail of size {$file_size} but the upload limit is {$max_size} so we aborted."); throw new \Exception("Wanted to store remote thumbnail of size {$file_size} but the upload limit is {$max_size} so we aborted.");
} }
@ -392,11 +391,12 @@ END, ['embed' => $embed, 'attributes' => $attributes]);
} }
// First we download the file to memory and test whether it's actually an image file // First we download the file to memory and test whether it's actually an image file
Log::debug('Downloading remote thumbnail for file id==' . $attachment->getId() . " with thumbnail URL: {$url}"); Log::debug('Downloading remote thumbnail for file id==' . $attachment->getId() . " with thumbnail URL: {$media_url}");
try { try {
$imgData = HTTPClient::get($url)->getContent(); $imgData = HTTPClient::get($media_url)->getContent();
if (isset($imgData)) { if (isset($imgData)) {
[$filepath, $width, $height, $original_name, $mimetype] = $this->validateAndWriteImage($imgData, $url, $headers); [$filepath, $width, $height, $original_name, $mimetype] = $this->validateAndWriteImage($imgData, $headers);
} else { } else {
throw new UnsupportedMediaException(_m('HTTPClient returned an empty result')); throw new UnsupportedMediaException(_m('HTTPClient returned an empty result'));
} }
@ -422,11 +422,8 @@ END, ['embed' => $embed, 'attributes' => $attributes]);
* Throws exceptions on failure. * Throws exceptions on failure.
* *
* @param string $url * @param string $url
* * @param Attachment $attachment
* @throws EmbedHelper_BadHtmlException * @return array
* @throws HTTP_Request2_Exception
*
* @return object
*/ */
public function getEmbed(string $url, Attachment $attachment): array public function getEmbed(string $url, Attachment $attachment): array
{ {
@ -434,32 +431,33 @@ END, ['embed' => $embed, 'attributes' => $attributes]);
try { try {
Log::info("Trying to find Embed data for {$url} with 'oscarotero/Embed'"); Log::info("Trying to find Embed data for {$url} with 'oscarotero/Embed'");
$embed = new LibEmbed(); $embed = new LibEmbed();
$info = $embed->get($url); $info = $embed->get($url);
$metadata['title'] = $info->title; $metadata['title'] = $info->title;
$metadata['html'] = Security::sanitize($info->description); $metadata['html'] = $info->description;
$metadata['url'] = $info->url; $metadata['author_name'] = $info->authorName;
$metadata['author_name'] = $info->authorName; $metadata['author_url'] = $info->authorUrl;
$metadata['author_url'] = $info->authorUrl;
$metadata['provider_name'] = $info->providerName; $metadata['provider_name'] = $info->providerName;
$metadata['provider_url'] = $info->providerUrl; $metadata['provider_url'] = $info->providerUrl;
if (!is_null($info->image)) { if (!is_null($info->image)) {
if (Formatting::startsWith($info->image, 'data')) { $image_url = (string)$info->image;
if (Formatting::startsWith($image_url, 'data')) {
// Inline image // Inline image
$imgData = base64_decode(substr($info->image, stripos($info->image, 'base64,') + 7)); $imgData = base64_decode(substr($info->image, stripos($info->image, 'base64,') + 7));
[$filepath, $width, $height, $original_name, $mimetype] = $this->validateAndWriteImage($imgData); [$filepath, $width, $height, $original_name, $mimetype] = $this->validateAndWriteImage($imgData);
} else { } else {
$attachment->setRemoteUrl((string) $info->image); [$filepath, $width, $height, $original_name, $mimetype] = $this->storeRemoteThumbnail($attachment, $image_url);
[$filepath, $width, $height, $original_name, $mimetype] = $this->storeRemoteThumbnail($attachment);
} }
$metadata['width'] = $width; $metadata['width'] = $width;
$metadata['height'] = $height; $metadata['height'] = $height;
$metadata['mimetype'] = $mimetype; $metadata['mimetype'] = $mimetype;
$metadata['media_url'] = $image_url;
$metadata['filename'] = Formatting::removePrefix($filepath, Common::config('storage', 'dir')); $metadata['filename'] = Formatting::removePrefix($filepath, Common::config('storage', 'dir'));
} }
} catch (Exception $e) { } catch (Exception $e) {
Log::info("Failed to find Embed data for {$url} with 'oscarotero/Embed', got exception: " . get_class($e)); Log::info("Failed to find Embed data for {$url} with 'oscarotero/Embed', got exception: " . $e);
} }
$metadata = self::normalize($metadata); $metadata = self::normalize($metadata);
@ -478,7 +476,7 @@ END, ['embed' => $embed, 'attributes' => $attributes]);
// add protocol and host if the thumbnail_url starts with / // add protocol and host if the thumbnail_url starts with /
if ($metadata['url'][0] == '/') { if ($metadata['url'][0] == '/') {
$thumbnail_url_parsed = parse_url($metadata['url']); $thumbnail_url_parsed = parse_url($metadata['url']);
$metadata['url'] = "{$thumbnail_url_parsed['scheme']}://{$thumbnail_url_parsed['host']}{$metadata['url']}"; $metadata['url'] = "{$thumbnail_url_parsed['scheme']}://{$thumbnail_url_parsed['host']}{$metadata['url']}";
} }
// Some wordpress opengraph implementations sometimes return a white blank image // Some wordpress opengraph implementations sometimes return a white blank image
@ -488,7 +486,7 @@ END, ['embed' => $embed, 'attributes' => $attributes]);
} }
if (!isset($data['width'])) { if (!isset($data['width'])) {
$data['width'] = Common::config('plugin_embed', 'width'); $data['width'] = Common::config('plugin_embed', 'width');
$data['height'] = Common::config('plugin_embed', 'height'); $data['height'] = Common::config('plugin_embed', 'height');
} }
} }

View File

@ -59,7 +59,7 @@ class AttachmentEmbed extends Entity
private ?string $title; private ?string $title;
private ?string $author_name; private ?string $author_name;
private ?string $author_url; private ?string $author_url;
private ?string $url; private ?string $media_url;
private \DateTimeInterface $modified; private \DateTimeInterface $modified;
public function setAttachmentId(int $attachment_id): self public function setAttachmentId(int $attachment_id): self
@ -183,15 +183,15 @@ class AttachmentEmbed extends Entity
return $this->author_url; return $this->author_url;
} }
public function setUrl(?string $url): self public function setMediaUrl(?string $media_url): self
{ {
$this->url = $url; $this->media_url = $media_url;
return $this; return $this;
} }
public function getUrl(): ?string public function getMediaUrl(): ?string
{ {
return $this->url; return $this->media_url;
} }
public function setModified(DateTimeInterface $modified): self public function setModified(DateTimeInterface $modified): self
@ -244,18 +244,18 @@ class AttachmentEmbed extends Entity
return [ return [
'name' => 'attachment_embed', 'name' => 'attachment_embed',
'fields' => [ 'fields' => [
'attachment_id' => ['type' => 'int', 'not null' => true, 'description' => 'oEmbed for that URL/file'], 'attachment_id' => ['type' => 'int', 'not null' => true, 'description' => 'Embed for that URL/file'],
'mimetype' => ['type' => 'varchar', 'length' => 50, 'description' => 'mime type of resource'], 'mimetype' => ['type' => 'varchar', 'length' => 50, 'description' => 'mime type of resource'],
'filename' => ['type' => 'varchar', 'length' => 191, 'description' => 'file name of resource when available'], 'filename' => ['type' => 'varchar', 'length' => 191, 'description' => 'file name of resource when available'],
'provider' => ['type' => 'text', 'description' => 'name of this oEmbed provider'], 'provider' => ['type' => 'text', 'description' => 'name of this oEmbed provider'],
'provider_url' => ['type' => 'text', 'description' => 'URL of this oEmbed provider'], 'provider_url' => ['type' => 'text', 'description' => 'URL of this oEmbed provider'],
'width' => ['type' => 'int', 'description' => 'width of oEmbed resource when available'], 'width' => ['type' => 'int', 'description' => 'width of oEmbed resource when available'],
'height' => ['type' => 'int', 'description' => 'height of oEmbed resource when available'], 'height' => ['type' => 'int', 'description' => 'height of oEmbed resource when available'],
'html' => ['type' => 'text', 'description' => 'html representation of this oEmbed resource when applicable'], 'html' => ['type' => 'text', 'description' => 'html representation of this Embed resource when applicable'],
'title' => ['type' => 'text', 'description' => 'title of oEmbed resource when available'], 'title' => ['type' => 'text', 'description' => 'title of Embed resource when available'],
'author_name' => ['type' => 'text', 'description' => 'author name for this oEmbed resource'], 'author_name' => ['type' => 'text', 'description' => 'author name for this Embed resource'],
'author_url' => ['type' => 'text', 'description' => 'author URL for this oEmbed resource'], 'author_url' => ['type' => 'text', 'description' => 'author URL for this Embed resource'],
'url' => ['type' => 'text', 'description' => 'URL for this oEmbed resource when applicable (photo, link)'], 'media_url' => ['type' => 'text', 'description' => 'URL for this Embed resource when applicable (photo, link)'],
'modified' => ['type' => 'timestamp', 'not null' => true, 'description' => 'date this record was modified'], 'modified' => ['type' => 'timestamp', 'not null' => true, 'description' => 'date this record was modified'],
], ],
'primary key' => ['attachment_id'], 'primary key' => ['attachment_id'],

View File

@ -48,7 +48,7 @@ class GSFile
// The following properly gets the mimetype with `file` or other // The following properly gets the mimetype with `file` or other
// available methods, so should be safe // available methods, so should be safe
$mimetype = $sfile->getMimeType(); $mimetype = $sfile->getMimeType();
Event::handle('AttachmentValidation', [&$sfile, &$mimetype, &$title]); Event::handle('AttachmentValidation', [&$sfile, &$mimetype, &$title, &$width, &$height]);
$attachment = Attachment::create([ $attachment = Attachment::create([
'file_hash' => $hash, 'file_hash' => $hash,
'gsactor_id' => $actor_id, 'gsactor_id' => $actor_id,
@ -57,6 +57,8 @@ class GSFile
'filename' => $hash, 'filename' => $hash,
'is_local' => $is_local, 'is_local' => $is_local,
'size' => $sfile->getSize(), 'size' => $sfile->getSize(),
'width' => $width,
'height' => $height,
]); ]);
$sfile->move($dest_dir, $hash); $sfile->move($dest_dir, $hash);
DB::persist($attachment); DB::persist($attachment);