From 22b5dd85674fa965b1d263b599353b181385f2c2 Mon Sep 17 00:00:00 2001 From: Diogo Peralta Cordeiro Date: Tue, 16 Feb 2021 18:30:21 +0000 Subject: [PATCH] [Media] Fix several issues [StoreRemoteMedia] Upgrade plugin to use the new Media system API Changes: - Added getters to File to better formalize the ideas of the commit "[Media] Fix issues with database file storage" UI Changes: - Now presented thumbnails are actual thumbnails (bug fix) - Attachment actions have a slightly more extended behaviour Many other minor bug fixes... --- actions/attachment.php | 18 +- actions/attachment_download.php | 8 + actions/attachment_view.php | 17 +- actions/newnotice.php | 4 +- classes/File.php | 31 +- classes/File_thumbnail.php | 10 +- lib/media/attachmentlistitem.php | 3 +- lib/media/imagefile.php | 9 +- lib/media/mediafile.php | 11 +- lib/util/httpclient.php | 4 +- .../lib/models/Activitypub_notice.php | 4 +- plugins/Embed/EmbedPlugin.php | 90 ++-- plugins/Embed/{README => README.md} | 19 +- plugins/StoreRemoteMedia/README | 34 -- plugins/StoreRemoteMedia/README.md | 31 ++ .../StoreRemoteMediaPlugin.php | 414 +++++++++++------- 16 files changed, 421 insertions(+), 286 deletions(-) rename plugins/Embed/{README => README.md} (71%) delete mode 100644 plugins/StoreRemoteMedia/README create mode 100644 plugins/StoreRemoteMedia/README.md diff --git a/actions/attachment.php b/actions/attachment.php index 7572688227..5597a35100 100644 --- a/actions/attachment.php +++ b/actions/attachment.php @@ -76,14 +76,7 @@ class AttachmentAction extends ManagedAction $this->mimetype = $this->attachment->mimetype; $this->filename = $this->attachment->filename; - if ($this->attachment->isLocal()) { - $this->filepath = $this->attachment->getFileOrThumbnailPath(); - if (empty($this->filepath)) { - $this->clientError( - _m('Requested local URL for a file that is not stored locally.'), - 404 - ); - } + if ($this->attachment->isLocal() || $this->attachment->isFetchedRemoteFile()) { $this->filesize = $this->attachment->getFileOrThumbnailSize(); $this->mimetype = $this->attachment->getFileOrThumbnailMimetype(); $this->filename = MediaFile::getDisplayName($this->attachment); @@ -115,15 +108,6 @@ class AttachmentAction extends ManagedAction public function showPage(): void { - if ( - !$this->attachment->isLocal() - || empty($this->filepath) - || !file_exists($this->filepath) - ) { - // If it's not a locally stored file, get lost - common_redirect($this->attachment->getUrl(), 303); - } - parent::showPage(); } diff --git a/actions/attachment_download.php b/actions/attachment_download.php index 3436bc81c2..df0c4705aa 100644 --- a/actions/attachment_download.php +++ b/actions/attachment_download.php @@ -35,6 +35,14 @@ class Attachment_downloadAction extends AttachmentAction @ini_set('display_errors', 0); if ($this->attachment->isLocal()) { + try { + $this->filepath = $this->attachment->getFileOrThumbnailPath(); + } catch (Exception $e) { + $this->clientError( + _m('Requested local URL for a file that is not stored locally.'), + 404 + ); + } common_send_file( $this->filepath, $this->mimetype, diff --git a/actions/attachment_view.php b/actions/attachment_view.php index 780717cd58..d320e61f29 100644 --- a/actions/attachment_view.php +++ b/actions/attachment_view.php @@ -20,8 +20,8 @@ defined('GNUSOCIAL') || die(); * View notice attachment * * @package GNUsocial - * @author Miguel Dantas - * @copyright 2019 Free Software Foundation, Inc http://www.fsf.org + * @author Mikael Nordfeldth + * @copyright 2016 Free Software Foundation, Inc http://www.fsf.org * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later */ class Attachment_viewAction extends AttachmentAction @@ -33,13 +33,20 @@ class Attachment_viewAction extends AttachmentAction // script execution, and we don't want to have any more errors until then, so don't reset it @ini_set('display_errors', 0); - if ($this->attachment->isLocal()) { + if ($this->attachment->isLocal() || $this->attachment->isFetchedRemoteFile()) { + try { + $this->filepath = $this->attachment->getFileOrThumbnailPath(); + } catch (Exception $e) { + $this->clientError( + _m('Requested local URL for a file that is not stored locally.'), + 404 + ); + } $disposition = 'attachment'; if (in_array(common_get_mime_media($this->mimetype), ['image', 'video'])) { $disposition = 'inline'; } - common_send_file($this->filepath, $this->mimetype, -$this->filename, $disposition); + common_send_file($this->filepath, $this->mimetype, $this->filename, $disposition); } else { common_redirect($this->attachment->getUrl(), 303); } diff --git a/actions/newnotice.php b/actions/newnotice.php index 1ea7f84b85..a8e223d800 100644 --- a/actions/newnotice.php +++ b/actions/newnotice.php @@ -204,7 +204,9 @@ class NewnoticeAction extends FormAction $this->stored = Notice::saveActivity($act, $this->scoped, $options); - $upload->attachToNotice($this->stored); + if ($upload instanceof MediaFile) { + $upload->attachToNotice($this->stored); + } Event::handle('EndNoticeSaveWeb', array($this, $this->stored)); } diff --git a/classes/File.php b/classes/File.php index 50b51b1ed1..2dc832edd6 100644 --- a/classes/File.php +++ b/classes/File.php @@ -557,7 +557,9 @@ class File extends Managed_DataObject // This means we either don't know what it is, so it can't // be shown as an enclosure, or it is an HTML link which // does not link to a resource with further metadata. - throw new ServerException('Unknown enclosure mimetype, not enough metadata'); + // throw new ServerException('Unknown enclosure mimetype, not enough metadata'); + // It's not really an error that must be shown or handled... + common_debug('Unknown enclosure mimetype, not enough metadata'); } self::$_enclosures[$this->getID()] = $enclosure; @@ -830,11 +832,36 @@ class File extends Managed_DataObject return $count; } - public function isLocal() + // A file with no url and with filename is a local file. + public function isLocal(): bool { return empty($this->url) && !empty($this->filename); } + // A file with an url but no filename is a remote file that wasn't fetched, not even the thumbnail. + public function isNonFetchedRemoteFile(): bool + { + return !empty($this->url) && empty($this->filename); + } + + // A file with an url and filename is a fetched remote file (maybe just a thumbnail of it). + public function isFetchedRemoteFile(): bool + { + return !empty($this->url) && !empty($this->filename); + } + + // A file with no filename nor url is a redirect. + public function isRedirect(): bool + { + return empty($this->url) && empty($this->filename); + } + + // Is in a remote location. + public function isStoredRemotely(): bool + { + return empty($this->filename); + } + public function unlink() { // Delete the file, if it exists locally if (!empty($this->filename) && file_exists(self::path($this->filename))) { diff --git a/classes/File_thumbnail.php b/classes/File_thumbnail.php index 661f54958b..8290635776 100644 --- a/classes/File_thumbnail.php +++ b/classes/File_thumbnail.php @@ -88,8 +88,8 @@ class File_thumbnail extends Managed_DataObject bool $force_still = true, ?bool $upscale = null ): File_thumbnail { - if (is_null($file->filename)) { // Remote file - // If StoreRemoteMedia is enabled... + if ($file->isStoredRemotely()) { // Remote file + // If StoreRemoteMedia or Embed are enabled... if (Event::handle('CreateFileImageThumbnailSource', [$file, &$imgPath, 'image'])) { if (!file_exists($imgPath)) { throw new FileNotFoundException($imgPath); @@ -101,7 +101,7 @@ class File_thumbnail extends Managed_DataObject throw new UseFileAsThumbnailException($file); } } - throw new FileNotFoundException("This remote file has no local thumbnail."); + throw new ServerException("This remote file has no local thumbnail."); } $image = ImageFile::fromFileObject($file); $imgPath = $image->getPath(); @@ -215,7 +215,7 @@ class File_thumbnail extends Managed_DataObject return $tn; } - public static function path($filename) + public static function path($filename): string { File::tryFilename($filename); @@ -239,7 +239,7 @@ class File_thumbnail extends Managed_DataObject * @throws FileNotFoundException * @throws ServerException */ - public function getPath() + public function getPath(): string { $oldpath = File::path($this->getFilename()); $thumbpath = self::path($this->getFilename()); diff --git a/lib/media/attachmentlistitem.php b/lib/media/attachmentlistitem.php index 0e57b39d62..deb447fea1 100644 --- a/lib/media/attachmentlistitem.php +++ b/lib/media/attachmentlistitem.php @@ -103,7 +103,8 @@ class AttachmentListItem extends Widget $this->showRepresentation(); } - function showRepresentation() { + function showRepresentation() + { $enclosure = $this->attachment->getEnclosure(); if (Event::handle('StartShowAttachmentRepresentation', [$this->out, $this->attachment])) { diff --git a/lib/media/imagefile.php b/lib/media/imagefile.php index 855c88093c..7b334f7c70 100644 --- a/lib/media/imagefile.php +++ b/lib/media/imagefile.php @@ -201,6 +201,7 @@ class ImageFile extends MediaFile if ($mediafile instanceof self) { return $mediafile; } else { + $mediafile->delete(); // We can conclude that we have failed to get the MIME type // TRANS: Client exception thrown trying to upload an invalid image type. // TRANS: %s is the file type that was denied @@ -219,21 +220,23 @@ class ImageFile extends MediaFile * @param string $url Remote image URL * @param Profile|null $scoped * @param string|null $name + * @param int|null $file_id same as in this class constructor * @return ImageFile * @throws ClientException - * @throws FileNotFoundException + * @throws HTTP_Request2_Exception * @throws InvalidFilenameException * @throws NoResultException * @throws ServerException * @throws UnsupportedMediaException * @throws UseFileAsThumbnailException */ - public static function fromUrl(string $url, ?Profile $scoped = null, ?string $name = null): self + public static function fromUrl(string $url, ?Profile $scoped = null, ?string $name = null, ?int $file_id = null): self { - $mediafile = parent::fromUrl($url, $scoped, $name); + $mediafile = parent::fromUrl($url, $scoped, $name, $file_id); if ($mediafile instanceof self) { return $mediafile; } else { + $mediafile->delete(); // We can conclude that we have failed to get the MIME type // TRANS: Client exception thrown trying to upload an invalid image type. // TRANS: %s is the file type that was denied diff --git a/lib/media/mediafile.php b/lib/media/mediafile.php index aa3a5770eb..401a164c77 100644 --- a/lib/media/mediafile.php +++ b/lib/media/mediafile.php @@ -252,7 +252,7 @@ class MediaFile // video support plugin or something. // FIXME: Do this more automagically. // Honestly, I think this is unlikely these days, - // but better be safe than sure, I guess + // but better be safe than sorry, I guess if ($image->getPath() != $file->getPath()) { $image->unlink(); } @@ -506,16 +506,17 @@ class MediaFile * @param string $url Remote media URL * @param Profile|null $scoped * @param string|null $name + * @param int|null $file_id same as in this class constructor * @return ImageFile|MediaFile * @throws ClientException - * @throws FileNotFoundException + * @throws HTTP_Request2_Exception * @throws InvalidFilenameException * @throws NoResultException * @throws ServerException * @throws UnsupportedMediaException * @throws UseFileAsThumbnailException */ - public static function fromUrl(string $url, ?Profile $scoped = null, ?string $name = null) + public static function fromUrl(string $url, ?Profile $scoped = null, ?string $name = null, ?int $file_id = null) { if (!common_valid_http_url($url)) { // TRANS: Server exception. %s is a URL. @@ -631,10 +632,10 @@ class MediaFile } if ($media === 'image') { - return new ImageFile(null, $filepath, $filehash, $url); + return new ImageFile($file_id, $filepath, $filehash, $url); } } - return new self($filepath, $mimetype, $filehash, null, $url); + return new self($filepath, $mimetype, $filehash, $file_id, $url); } public static function fromFileInfo(SplFileInfo $finfo, Profile $scoped = null) diff --git a/lib/util/httpclient.php b/lib/util/httpclient.php index 184b2c2e13..bb5f2c3b48 100644 --- a/lib/util/httpclient.php +++ b/lib/util/httpclient.php @@ -403,7 +403,7 @@ class HTTPClient extends HTTP_Request2 return new GNUsocial_HTTPResponse($response, $this->getUrl(), $redirs); } - public static function get_filename(string $url, array $headers = null) : string { + public static function get_filename(string $url, array $headers = null) : ?string { if ($headers === null) { $head = (new HTTPClient())->head($url); $headers = $head->getHeader(); @@ -414,7 +414,7 @@ class HTTPClient extends HTTP_Request2 return $matches[1]; } else { common_log(LOG_INFO, "Couldn't determine filename for url: {$url}"); - return _('Untitled attachment'); + return null; } } } diff --git a/plugins/ActivityPub/lib/models/Activitypub_notice.php b/plugins/ActivityPub/lib/models/Activitypub_notice.php index 11ecd853d2..a9ea4d3e05 100644 --- a/plugins/ActivityPub/lib/models/Activitypub_notice.php +++ b/plugins/ActivityPub/lib/models/Activitypub_notice.php @@ -227,10 +227,10 @@ class Activitypub_notice $act->context->location = Location::fromLatLon($settings['latitude'], $settings['longitude']); } - /* Reject notice if it is too long (without the HTML) + // Reject notice if it is too long (without the HTML) if (Notice::contentTooLong($content)) { throw new Exception('That\'s too long. Maximum notice size is %d character.'); - }*/ + } // Attachments (first part) $attachments = []; diff --git a/plugins/Embed/EmbedPlugin.php b/plugins/Embed/EmbedPlugin.php index 4cf5b72188..7810e91d94 100644 --- a/plugins/Embed/EmbedPlugin.php +++ b/plugins/Embed/EmbedPlugin.php @@ -40,7 +40,7 @@ use Embed\Embed; */ class EmbedPlugin extends Plugin { - const PLUGIN_VERSION = '0.1.0'; + const PLUGIN_VERSION = '2.0.0'; // settings which can be set in config.php with addPlugin('Embed', ['param'=>'value', ...]); // WARNING, these are _regexps_ (slashes added later). Always escape your dots and end ('$') your strings @@ -141,7 +141,7 @@ class EmbedPlugin extends Plugin } } catch (Exception $e) { common_log(LOG_INFO, "Failed to find Embed data for {$url} with 'oscarotero/Embed'" . - ", got exception: " . get_class($e)); + ", got exception: " . get_class($e)); } if (isset($metadata->thumbnail_url)) { @@ -151,7 +151,7 @@ class EmbedPlugin extends Plugin if ($metadata->thumbnail_url[0] == '/') { $thumbnail_url_parsed = parse_url($metadata->url); $metadata->thumbnail_url = "{$thumbnail_url_parsed['scheme']}://". - "{$thumbnail_url_parsed['host']}{$metadata->thumbnail_url}"; + "{$thumbnail_url_parsed['host']}{$metadata->thumbnail_url}"; } // some wordpress opengraph implementations sometimes return a white blank image @@ -168,21 +168,21 @@ class EmbedPlugin extends Plugin public function onEndShowHeadElements(Action $action) { switch ($action->getActionName()) { - case 'attachment': - $url = common_local_url('attachment', ['attachment' => $action->attachment->getID()]); - break; - case 'shownotice': - if (!$action->notice->isLocal()) { - return true; - } - try { - $url = $action->notice->getUrl(); - } catch (InvalidUrlException $e) { - // The notice is probably a share or similar, which don't - // have a representational URL of their own. - return true; - } - break; + case 'attachment': + $url = common_local_url('attachment', ['attachment' => $action->attachment->getID()]); + break; + case 'shownotice': + if (!$action->notice->isLocal()) { + return true; + } + try { + $url = $action->notice->getUrl(); + } catch (InvalidUrlException $e) { + // The notice is probably a share or similar, which don't + // have a representational URL of their own. + return true; + } + break; } if (isset($url)) { @@ -226,7 +226,7 @@ class EmbedPlugin extends Plugin if (isset($file->mimetype) && (('text/html' === substr($file->mimetype, 0, 9) || - 'application/xhtml+xml' === substr($file->mimetype, 0, 21)))) { + 'application/xhtml+xml' === substr($file->mimetype, 0, 21)))) { try { $embed_data = File_embed::getEmbed($file->url); if ($embed_data === false) { @@ -255,7 +255,7 @@ class EmbedPlugin extends Plugin } $out->elementStart('div', ['id'=>'oembed_info', 'class'=>'e-content']); foreach (['author_name' => ['class' => ' author', 'url' => 'author_url'], - 'provider' => ['class' => '', 'url' => 'provider_url']] + 'provider' => ['class' => '', 'url' => 'provider_url']] as $field => $options) { if (!empty($embed->{$field})) { $out->elementStart('div', "fn vcard" . $options['class']); @@ -265,7 +265,7 @@ class EmbedPlugin extends Plugin $out->element( 'a', ['href' => $embed->{$options['url']}, - 'class' => 'url'], + 'class' => 'url'], $embed->{$field} ); } @@ -367,16 +367,16 @@ class EmbedPlugin extends Plugin // the 'photo' type is shown through ordinary means, using StartShowAttachmentRepresentation! switch ($embed->type) { - case 'video': - case 'link': - if (!empty($embed->html) + case 'video': + case 'link': + if (!empty($embed->html) && (GNUsocial::isAjax() || common_config('attachments', 'show_html'))) { - $purifier = new HTMLPurifier(); - // FIXME: do we allow and here? we did that when we used htmLawed, - // but I'm not sure anymore... - $out->raw($purifier->purify($embed->html)); - } - return false; + $purifier = new HTMLPurifier(); + // FIXME: do we allow and here? we did that when we used htmLawed, + // but I'm not sure anymore... + $out->raw($purifier->purify($embed->html)); + } + return false; } return true; @@ -389,11 +389,15 @@ class EmbedPlugin extends Plugin * * @param $file File the file of the created thumbnail * @param &$imgPath string = the path to the created thumbnail + * @param $media string = media type * @return bool true if it succeeds (including non-action * states where it isn't oEmbed data, so it doesn't mess up the event handle * for other things hooked into it), or the exception if it fails. + * @throws FileNotFoundException + * @throws NoResultException + * @throws ServerException */ - public function onCreateFileImageThumbnailSource(File $file, &$imgPath, $media) + public function onCreateFileImageThumbnailSource(File $file, &$imgPath, string $media): bool { // If we are on a private node, we won't do any remote calls (just as a precaution until // we can configure this from config.php for the private nodes) @@ -402,9 +406,9 @@ class EmbedPlugin extends Plugin } // All our remote Embed images lack a local filename property in the File object - if (!is_null($file->filename)) { - common_debug(sprintf('Filename of file id==%d is not null (%s), so nothing Embed '. - 'should handle.', $file->getID(), _ve($file->filename))); + if ($file->isLocal()) { + common_debug(sprintf('File of id==%d is local (filename: %s), so nothing Embed '. + 'should handle.', $file->getID(), _ve($file->filename))); return true; } @@ -486,7 +490,7 @@ class EmbedPlugin extends Plugin return isset($headers['content-length']) ? $headers['content-length'] : false; } catch (Exception $err) { common_log(LOG_ERR, __CLASS__.': getRemoteFileSize on URL : '._ve($url). - ' threw exception: '.$err->getMessage()); + ' threw exception: '.$err->getMessage()); return false; } } @@ -521,7 +525,7 @@ class EmbedPlugin extends Plugin * @param array|null $headers - The headers possible previous request to $url * @param int|null $file_id - The id of the file this image belongs to, used for logging */ - protected function validateAndWriteImage(&$imgData, $url = null, $headers = null, $file_id = 0) : array + protected function validateAndWriteImage(&$imgData, ?string $url = null, ?array $headers = null, ?int $file_id = null) : array { $info = @getimagesizefromstring($imgData); // array indexes documented on php.net: @@ -540,7 +544,7 @@ class EmbedPlugin extends Plugin if (!empty($url)) { $original_name = HTTPClient::get_filename($url, $headers); } - $filename = MediaFile::encodeFilename($original_name ?? '', $filehash); + $filename = MediaFile::encodeFilename($original_name ?? _m('Untitled attachment'), $filehash); $fullpath = File_thumbnail::path($filename); // Write the file to disk. Throw Exception on failure if (!file_exists($fullpath)) { @@ -569,13 +573,13 @@ class EmbedPlugin extends Plugin } } else { throw new AlreadyFulfilledException('A thumbnail seems to already exist for remote file' . - ($file_id ? 'with id==' . $file_id : '') . ' at path ' . $fullpath); + ($file_id ? 'with id==' . $file_id : '') . ' at path ' . $fullpath); } } catch (AlreadyFulfilledException $e) { // Carry on } catch (Exception $err) { common_log(LOG_ERR, "Went to write a thumbnail to disk in EmbedPlugin::storeRemoteThumbnail " . - "but encountered error: {$err}"); + "but encountered error: {$err}"); throw $err; } finally { unset($imgData); @@ -620,7 +624,7 @@ class EmbedPlugin extends Plugin $file_size = $this->getRemoteFileSize($url, $headers); if (($file_size!=false) && ($file_size > $max_size)) { common_debug("Went to store remote thumbnail of size " . $file_size . - " but the upload limit is " . $max_size . " so we aborted."); + " but the upload limit is " . $max_size . " so we aborted."); return false; } } else { @@ -628,7 +632,7 @@ class EmbedPlugin extends Plugin } } catch (Exception $err) { common_debug("Could not determine size of remote image, aborted local storage."); - return $err; + throw $err; } // First we download the file to memory and test whether it's actually an image file @@ -667,8 +671,8 @@ class EmbedPlugin extends Plugin $thumbnail->updateWithKeys($orig); } catch (Exception $err) { common_log(LOG_ERR, "Went to write a thumbnail entry to the database in " . - "EmbedPlugin::storeRemoteThumbnail but encountered error: ".$err); - return $err; + "EmbedPlugin::storeRemoteThumbnail but encountered error: ".$err); + throw $err; } return true; } diff --git a/plugins/Embed/README b/plugins/Embed/README.md similarity index 71% rename from plugins/Embed/README rename to plugins/Embed/README.md index b5e1aeae1f..9551ada801 100644 --- a/plugins/Embed/README +++ b/plugins/Embed/README.md @@ -1,10 +1,10 @@ -The Oembed plugin for using and representing oEmbed data. +The Embed plugin for using and representing both Open Graph and oEmbed data. -See: http://www.oembed.com/ +See: https://ogp.me/ and https://www.oembed.com/ Installation ============ -This plugin is enabled by default +This plugin is enabled by default. Settings ======== @@ -16,14 +16,17 @@ check_whitelist: Whether to check the domain_whitelist. Example ======= + +``` $config['thumbnail']['width'] = 42; $config['thumbnail']['height'] = 42; $config['attachments']['show_html'] = true; -addPlugin('Oembed', array( - 'domain_whitelist' => array( +addPlugin('Embed', [ + 'domain_whitelist' => [ '^i\d*\.ytimg\.com$' => 'YouTube', '^i\d*\.vimeocdn\.com$' => 'Vimeo' - ), + ], 'check_whitelist' => true -)); - + ] +); +``` \ No newline at end of file diff --git a/plugins/StoreRemoteMedia/README b/plugins/StoreRemoteMedia/README deleted file mode 100644 index 751a0b402d..0000000000 --- a/plugins/StoreRemoteMedia/README +++ /dev/null @@ -1,34 +0,0 @@ -The StoreRemoteMedia plugin downloads remotely attached files to local server. - -Installation -============ -add "addPlugin('StoreRemoteMedia');" -to the bottom of your config.php - -Settings -======== -domain_blacklist: Array of regular expressions. Always escape your dots and end your strings. -check_blacklist: Whether to check the domain_blacklist. - -domain_whitelist: Array of regular expressions. Always escape your dots and end your strings. -check_whitelist: Whether to check the domain_whitelist. - -max_image_bytes: Max image size. Anything bigger than this is rejected. 10MiB by default - -When check_whitelist is set, only images from URLs matching a regex in the -domain_whitelist array are accepted for local storage. When check_blacklist -is set, images from URLs matching any regex in the domain_blacklist are -denied local storage. When both lists are checked, only images from URLs -that match a regex in the domain_whitelist and that match no regexen in the -domain_blacklist are accepted for local storage. - -Example -======= -addPlugin('StoreRemoteMedia', array( - 'domain_whitelist' => array( - '^i\d*\.ytimg\.com$' => 'YouTube', - '^i\d*\.vimeocdn\.com$' => 'Vimeo' - ), - 'check_whitelist' => true, -)); - diff --git a/plugins/StoreRemoteMedia/README.md b/plugins/StoreRemoteMedia/README.md new file mode 100644 index 0000000000..86679289a1 --- /dev/null +++ b/plugins/StoreRemoteMedia/README.md @@ -0,0 +1,31 @@ +The StoreRemoteMedia plugin downloads remotely attached files to local server. + +IMPORTANT: If using both Embed and StoreRemoteMedia plugins, Embed should be added first. + +Installation +============ +add `addPlugin('StoreRemoteMedia');` +to the bottom of your config.php + +Settings +======== +domain_whitelist: Array of regular expressions. Always escape your dots and end your strings. +check_whitelist: Whether to check the domain_whitelist. + +max_size: Max media size. Anything bigger than this is rejected. 10MiB by default. + +When check_whitelist is set, only images from URLs matching a regex in the +domain_whitelist array are accepted for local storage. + +Example +======= + +``` +addPlugin('StoreRemoteMedia', [ + 'domain_whitelist' => [ + '^i\d*\.ytimg\.com$' => 'YouTube', + '^i\d*\.vimeocdn\.com$' => 'Vimeo' + ], + 'check_whitelist' => true, +]); +``` diff --git a/plugins/StoreRemoteMedia/StoreRemoteMediaPlugin.php b/plugins/StoreRemoteMedia/StoreRemoteMediaPlugin.php index 03b021b132..78467fdcfe 100644 --- a/plugins/StoreRemoteMedia/StoreRemoteMediaPlugin.php +++ b/plugins/StoreRemoteMedia/StoreRemoteMediaPlugin.php @@ -1,40 +1,61 @@ 'value', ...)); + // settings which can be set in config.php with addPlugin('StoreRemoteMedia', array('param'=>'value', ...)); // WARNING, these are _regexps_ (slashes added later). Always escape your dots and end your strings public $domain_whitelist = [ // hostname => service provider '^i\d*\.ytimg\.com$' => 'YouTube', '^i\d*\.vimeocdn\.com$' => 'Vimeo', - ]; + ]; public $append_whitelist = []; // fill this array as domain_whitelist to add more trusted sources public $check_whitelist = false; // security/abuse precaution - public $domain_blacklist = []; - public $check_blacklist = false; - - public $max_image_bytes = 10 * 1024 * 1024; // 10MiB max image size by default + public $thumbnail_width = null; + public $thumbnail_height = 128; + public $thumbnail_crop = true; + public $max_size = 10 * 1024 * 1024; // 10MiB max image size by default protected $imgData = []; - // these should be declared protected everywhere + /** + * Initialize the StoreRemoteMedia plugin and set up the environment it needs for it. + * Returns true if it initialized properly, the exception object if it + * doesn't. + */ public function initialize() { parent::initialize(); + if (is_null($this->thumbnail_width)) { + $this->thumbnail_width = common_config('thumbnail', 'width'); + $this->thumbnail_height = common_config('thumbnail', 'height'); + $this->thumbnail_crop = common_config('thumbnail', 'crop'); + $this->max_size = common_get_preferred_php_upload_limit(); + } + $this->domain_whitelist = array_merge($this->domain_whitelist, $this->append_whitelist); } - public function onCreateFileImageThumbnailSource(File $file, &$imgPath, $media=null) + /** + * This event executes when GNU social is creating a file thumbnail entry in + * the database. We glom onto this to fetch remote attachments. + * + * @param $file File the file of the created thumbnail + * @param &$imgPath string = the path to the created thumbnail + * @param $media string = media type + * @return bool + * @throws AlreadyFulfilledException + * @throws FileNotFoundException + * @throws FileNotStoredLocallyException + * @throws HTTP_Request2_Exception + * @throws ServerException + */ + public function onCreateFileImageThumbnailSource(File $file, &$imgPath, string $media): bool { // If we are on a private node, we won't do any remote calls (just as a precaution until // we can configure this from config.php for the private nodes) @@ -42,183 +63,260 @@ class StoreRemoteMediaPlugin extends Plugin return true; } - if ($media !== 'image') { - return true; - } - // If there is a local filename, it is either a local file already or has already been downloaded. - if (!empty($file->filename)) { - return true; - } - - $remoteUrl = $file->getUrl(); - - if (empty($remoteUrl)) { - return true; - } - - if (!$this->checkWhiteList($remoteUrl) || - !$this->checkBlackList($remoteUrl)) { - return true; - } - - // Relative URL, something's off - if (empty(parse_url($remoteUrl, PHP_URL_HOST))) { - common_err("StoreRemoteMedia found a url without host (\"{$remoteUrl}\") for file with id = {$file->id}"); + if (!$file->isStoredRemotely()) { + common_debug(sprintf('File id==%d isn\'t a non-fetched remote file (%s), so nothing StoreRemoteMedia '. + 'should handle.', $file->getID(), _ve($file->filename))); return true; } try { + File_thumbnail::byFile($file); + // If we don't get the exception `No result found on File_thumbnail lookup.` then Embed has already handled it most likely. + return true; + } catch (NoResultException $e) { + // We can move on + } - $http = new HTTPClient(); - common_debug(sprintf('Performing HEAD request for remote file id==%u to avoid '. - 'unnecessarily downloading too large files. URL: %s', - $file->getID(), $remoteUrl)); - - $url = $remoteUrl; - $head = $http->head($remoteUrl); - $remoteUrl = $head->getEffectiveUrl(); // to avoid going through redirects again - - if (empty($remoteUrl)) { - common_log(LOG_ERR, "URL after redirects is somehow empty, for URL {$url}"); - return true; - } - - if (!$this->checkBlackList($remoteUrl)) { - common_log(LOG_WARN, sprintf('%s: Non-blacklisted URL %s redirected to blacklisted URL %s', - __CLASS__, $file->getUrl(), $remoteUrl)); - return true; - } + $url = $file->getUrl(); + if (substr($url, 0, 7) == 'file://') { + $filename = substr($url, 7); + $info = getimagesize($filename); + $filename = basename($filename); + $width = $info[0]; + $height = $info[1]; + } else { + $this->checkWhitelist($url); + $head = (new HTTPClient())->head($url); $headers = $head->getHeader(); $headers = array_change_key_case($headers, CASE_LOWER); - $filesize = isset($headers['content-length']) ?: $file->getSize(); - if (empty($filesize)) { - // file size not specified on remote server - common_debug(sprintf('%s: Ignoring remote media because we did not get a ' . - 'content length for file id==%u', __CLASS__, $file->getID())); - return true; - } elseif ($filesize > $this->max_image_bytes) { - //FIXME: When we perhaps start fetching videos etc. we'll need to - // differentiate max_image_bytes from that... - - // file too big according to plugin configuration - common_debug(sprintf('%s: Skipping remote media because content length (%u) ' . - 'is larger than plugin configured max_image_bytes (%u) ' . - 'for file id==%u', __CLASS__, intval($filesize), - $this->max_image_bytes, $file->getID())); - return true; - } elseif ($filesize > common_config('attachments', 'file_quota')) { - // file too big according to site configuration - common_debug(sprintf('%s: Skipping remote media because content length (%u) ' . - 'is larger than file_quota (%u) for file id==%u', - __CLASS__, intval($filesize), - common_config('attachments', 'file_quota'), $file->getID())); - return true; + try { + $is_image = $this->isRemoteImage($url, $headers); + if ($is_image == true) { + $file_size = $this->getRemoteFileSize($url, $headers); + if (($file_size!=false) && ($file_size > $this->max_size)) { + common_debug("Went to store remote thumbnail of size " . $file_size . + " but the upload limit is " . $this->max_size . " so we aborted."); + return false; + } + } else { + return false; + } + } catch (Exception $err) { + common_debug("Could not determine size of remote image, aborted local storage."); + throw $err; } - // Then we download the file to memory and test whether it's actually an image file - common_debug(sprintf('Downloading remote file id=%u (should be size %u) ' . - 'with effective URL: %s', $file->getID(), $filesize, _ve($remoteUrl))); - $imgData = HTTPClient::quickGet($remoteUrl); - } catch (HTTP_Request2_ConnectionException $e) { - common_log(LOG_ERR, __CLASS__.': '._ve(get_class($e)).' on URL: ' . - _ve($file->getUrl()).' threw exception: '.$e->getMessage()); - return true; - } - $info = @getimagesizefromstring($imgData); - if ($info === false) { - throw new UnsupportedMediaException(_('Remote file format was not identified as an image.'), $remoteUrl); - } elseif (!$info[0] || !$info[1]) { - throw new UnsupportedMediaException(_('Image file had impossible geometry (0 width or height)')); - } - - $filehash = hash(File::FILEHASH_ALG, $imgData); - try { - // Exception will be thrown before $file is set to anything, so old $file value will be kept - $file = File::getByHash($filehash); - $file->fetch(); - - //FIXME: Add some code so we don't have to store duplicate File rows for same hash files. - } catch (NoResultException $e) { - $original_name = HTTPClient::get_filename($remoteUrl, $headers); - $filename = MediaFile::encodeFilename($original_name, $filehash); - $fullpath = File::path($filename); - - common_debug("StoreRemoteMedia retrieved url {$remoteUrl} for file with id={$file->id} " . - "and will store in {$fullpath}"); - - // Write the file to disk if it doesn't exist yet. Throw Exception on failure. - if ((!file_exists($fullpath) || substr($fullpath, 0, strlen(INSTALLDIR)) != INSTALLDIR) && - file_put_contents($fullpath, $imgData) === false) { - throw new ServerException(_('Could not write downloaded file to disk.')); - } - - // Updated our database for the file record - $orig = clone($file); - $file->filehash = $filehash; - $file->filename = $filename; - $file->width = $info[0]; // array indexes documented on php.net: - $file->height = $info[1]; // https://php.net/manual/en/function.getimagesize.php - // Throws exception on failure. - $file->updateWithKeys($orig); - } - - // Get rid of the file from memory - unset($imgData); - - // Output - $imgPath = $file->getPath(); - - return false; - } - - /** - * @return boolean true if given url passes blacklist check - */ - protected function checkBlackList($url) - { - if (!$this->check_blacklist) { - return true; - } - $host = parse_url($url, PHP_URL_HOST); - foreach ($this->domain_blacklist as $regex => $provider) { - if (preg_match("/$regex/", $host)) { + // First we download the file to memory and test whether it's actually an image file + // FIXME: To support remote video/whatever files, this needs reworking. + common_debug(sprintf( + 'Downloading remote image for file id==%u with URL: %s', + $file->getID(), + $url + )); + try { + $imgData = HTTPClient::quickGet($url); + if (isset($imgData)) { + list($filename, $filehash, $width, $height) = $this->validateAndWriteImage( + $imgData, + $url, + $headers, + $file->getID() + ); + } else { + throw new UnsupportedMediaException('HTTPClient returned an empty result'); + } + } catch (UnsupportedMediaException $e) { + // Couldn't find anything that looks like an image, nothing to do + common_debug("StoreRemoteMedia was not able to find an image for URL `{$url}`: " . $e->getMessage()); return false; } } - return true; + try { + // Update our database for the file record + $orig = clone($file); + $file->filename = $filename; + $file->filehash = $filehash; + $file->width = $width; + $file->height = $height; + // Throws exception on failure. + $file->updateWithKeys($orig); + } catch (Exception $err) { + common_log(LOG_ERR, "Went to update a file entry to the database in " . + "StoreRemoteMediaPlugin::storeRemoteThumbnail but encountered error: ".$err); + throw $err; + } + + // Out + $imgPath = $file->getPath(); + + return !file_exists($imgPath); } - /*** - * @return boolean true if given url passes whitelist check + /** + * Check the file size of a remote file using a HEAD request and checking + * the content-length variable returned. This isn't 100% foolproof but is + * reliable enough for our purposes. + * + * @return string|bool the file size if it succeeds, false otherwise. */ - protected function checkWhiteList($url) + private function getRemoteFileSize($url, $headers = null) + { + try { + if ($headers === null) { + if (!common_valid_http_url($url)) { + common_log(LOG_ERR, "Invalid URL in StoreRemoteMedia::getRemoteFileSize()"); + return false; + } + $head = (new HTTPClient())->head($url); + $headers = $head->getHeader(); + $headers = array_change_key_case($headers, CASE_LOWER); + } + return isset($headers['content-length']) ? $headers['content-length'] : false; + } catch (Exception $err) { + common_log(LOG_ERR, __CLASS__.': getRemoteFileSize on URL : '._ve($url). + ' threw exception: '.$err->getMessage()); + return false; + } + } + + /** + * A private helper function that uses a CURL lookup to check the mime type + * of a remote URL to see it it's an image. + * + * @return bool true if the remote URL is an image, or false otherwise. + */ + private function isRemoteImage($url, $headers = null) + { + if (empty($headers)) { + if (!common_valid_http_url($url)) { + common_log(LOG_ERR, "Invalid URL in StoreRemoteMedia::isRemoteImage()"); + return false; + } + $head = (new HTTPClient())->head($url); + $headers = $head->getHeader(); + $headers = array_change_key_case($headers, CASE_LOWER); + } + return !empty($headers['content-type']) && common_get_mime_media($headers['content-type']) === 'image'; + } + + /** + * Validate that $imgData is a valid image before writing it to + * disk, as well as resizing it to at most $this->thumbnail_width + * by $this->thumbnail_height + * + * @param $imgData - The image data to validate. Taken by reference to avoid copying + * @param string|null $url - The url where the image came from, to fetch metadata + * @param array|null $headers - The headers possible previous request to $url + * @param int|null $file_id - The id of the file this image belongs to, used for logging + */ + protected function validateAndWriteImage(&$imgData, ?string $url = null, ?array $headers = null, ?int $file_id = null) : array + { + $info = @getimagesizefromstring($imgData); + // array indexes documented on php.net: + // https://php.net/manual/en/function.getimagesize.php + if ($info === false) { + throw new UnsupportedMediaException(_m('Remote file format was not identified as an image.'), $url); + } elseif (!$info[0] || !$info[1]) { + throw new UnsupportedMediaException(_m('Image file had impossible geometry (0 width or height)')); + } + + $width = min($info[0], $this->thumbnail_width); + $height = min($info[1], $this->thumbnail_height); + $filehash = hash(File::FILEHASH_ALG, $imgData); + + try { + if (!empty($url)) { + $original_name = HTTPClient::get_filename($url, $headers); + } + $filename = MediaFile::encodeFilename($original_name ?? _m('Untitled attachment'), $filehash); + $filepath = File::path($filename); + // Write the file to disk. Throw Exception on failure + if (!file_exists($filepath)) { + if (strpos($filepath, INSTALLDIR) !== 0 || file_put_contents($filepath, $imgData) === false) { + throw new ServerException(_m('Could not write downloaded file to disk.')); + } + + if (common_get_mime_media(MediaFile::getUploadedMimeType($filepath)) !== 'image') { + @unlink($filepath); + throw new UnsupportedMediaException( + _m('Remote file format was not identified as an image.'), + $url + ); + } + + // If the image is not of the desired size, resize it + if ($info[0] > $this->thumbnail_width || $info[1] > $this->thumbnail_height) { + // Temporary object, not stored in DB + $img = new ImageFile(-1, $filepath); + $box = $img->scaleToFit($this->thumbnail_width, $this->thumbnail_height, $this->thumbnail_crop); + $width = $box['width']; + $height = $box['height']; + $outpath = $img->resizeTo($filepath, $box); + $result = rename($outpath, $filepath); + if (!$result) { + // TRANS: Client exception thrown when a file upload operation fails because the file could + // TRANS: not be moved from the temporary folder to the permanent file location. + // UX: too specific + throw new ClientException(_m('File could not be moved to destination directory.')); + } + } + } else { + throw new AlreadyFulfilledException('A thumbnail seems to already exist for remote file' . + ($file_id ? 'with id==' . $file_id : '') . ' at path ' . $filepath); + } + } catch (AlreadyFulfilledException $e) { + // Carry on + } catch (Exception $err) { + common_log(LOG_ERR, "Went to write a thumbnail to disk in StoreRemoteMediaPlugin::storeRemoteThumbnail " . + "but encountered error: {$err}"); + throw $err; + } finally { + unset($imgData); + } + + return [$filename, $filehash, $width, $height]; + } + + /** + * @return bool false on no check made, provider name on success + * @throws ServerException if check is made but fails + */ + protected function checkWhitelist($url) { if (!$this->check_whitelist) { - return true; + return false; // indicates "no check made" } + $host = parse_url($url, PHP_URL_HOST); foreach ($this->domain_whitelist as $regex => $provider) { if (preg_match("/$regex/", $host)) { - return true; + return $provider; // we trust this source, return provider name } } - return false; + throw new ServerException(sprintf(_m('Domain not in remote thumbnail source whitelist: %s'), $host)); } + /** + * Event raised when GNU social polls the plugin for information about it. + * Adds this plugin's version information to $versions array + * + * @param &$versions array inherited from parent + * @return bool true hook value + */ public function onPluginVersion(array &$versions): bool { - $versions[] = array('name' => 'StoreRemoteMedia', - 'version' => self::PLUGIN_VERSION, - 'author' => 'Mikael Nordfeldth', - 'homepage' => GNUSOCIAL_ENGINE_URL, - 'description' => - // TRANS: Plugin description. - _m('Plugin for downloading remotely attached files to local server.')); + $versions[] = ['name' => 'StoreRemoteMedia', + 'version' => self::PLUGIN_VERSION, + 'author' => 'Mikael Nordfeldth, Diogo Peralta Cordeiro', + 'homepage' => GNUSOCIAL_ENGINE_URL, + 'description' => + // TRANS: Plugin description. + _m('Plugin for downloading remotely attached files to local server.')]; return true; } -} +} \ No newline at end of file