From bac18715c5a1e13b59502e30a4c370add889504c Mon Sep 17 00:00:00 2001 From: Diogo Peralta Cordeiro Date: Thu, 12 Aug 2021 00:39:36 +0100 Subject: [PATCH] [StoreRemoteMedia] Implement the first version of it in v3 --- plugins/StoreRemoteMedia/StoreRemoteMedia.php | 438 +++++------------- src/Entity/RemoteURL.php | 5 +- src/Entity/RemoteURLToNote.php | 18 + 3 files changed, 130 insertions(+), 331 deletions(-) diff --git a/plugins/StoreRemoteMedia/StoreRemoteMedia.php b/plugins/StoreRemoteMedia/StoreRemoteMedia.php index ae48ce832f..cce8e88d4f 100644 --- a/plugins/StoreRemoteMedia/StoreRemoteMedia.php +++ b/plugins/StoreRemoteMedia/StoreRemoteMedia.php @@ -18,7 +18,18 @@ namespace Plugin\StoreRemoteMedia; +use App\Core\DB\DB; +use App\Core\Event; +use App\Core\GSFile; +use App\Core\HTTPClient; use App\Core\Modules\Plugin; +use App\Entity\AttachmentThumbnail; +use App\Entity\AttachmentToNote; +use App\Entity\Note; +use App\Entity\RemoteURL; +use App\Entity\RemoteURLToAttachment; +use App\Util\Common; +use App\Util\TemporaryFile; /** * The StoreRemoteMedia plugin downloads remotely attached files to local server. @@ -35,348 +46,115 @@ use App\Core\Modules\Plugin; */ class StoreRemoteMedia extends Plugin { - const PLUGIN_VERSION = '3.0.0'; - - // settings which can be set in config.php with addPlugin('StoreRemoteMedia', array('param'=>'value', ...)); - // WARNING, these are _regexps_ (slashes added later). Always escape your dots and end your strings - public $domain_whitelist = [ - // hostname => service provider - '^i\d*\.ytimg\.com$' => 'YouTube', - '^i\d*\.vimeocdn\.com$' => 'Vimeo', - ]; - - public $append_whitelist = []; // fill this array as domain_whitelist to add more trusted sources - public $check_whitelist = false; // security/abuse precaution - - public $store_original = false; // Whether to maintain a copy of the original media or only a thumbnail of it - public $thumbnail_width; - public $thumbnail_height; - public $crop; - public $max_size; - - /** - * Initialize the StoreRemoteMedia plugin and set up the environment it needs for it. - * Returns true if it initialized properly, the exception object if it - * doesn't. - */ - public function initialize() + public function version(): string { - parent::initialize(); + return '3.0.0'; + } - $this->domain_whitelist = array_merge($this->domain_whitelist, $this->append_whitelist); + public bool $store_original = false; // Whether to maintain a copy of the original media or only a thumbnail of it + public ?int $thumbnail_width; + public ?int $thumbnail_height; + public ?int $max_size; + public ?bool $smart_crop; - // Load global configuration if specific not provided - $this->thumbnail_width = $this->thumbnail_width ?? common_config('thumbnail', 'width'); - $this->thumbnail_height = $this->thumbnail_height ?? common_config('thumbnail', 'height'); - $this->max_size = $this->max_size ?? common_config('attachments', 'file_quota'); - $this->crop = $this->crop ?? common_config('thumbnail', 'crop'); + private function getStoreOriginal(): bool + { + return $this->store_original; + } + private function getThumbnailWidth(): int + { + return $this->thumbnail_width ?? Common::config('thumbnail', 'width'); + } + + private function getThumbnailHeight(): int + { + return $this->thumbnail_height ?? Common::config('thumbnail', 'height'); + } + + private function getMaxSize(): int + { + return $this->max_size ?? Common::config('attachments', 'file_quota'); + } + + private function getSmartCrop(): bool + { + return $this->smart_crop ?? Common::config('thumbnail', 'smart_crop'); } /** - * This event executes when GNU social is creating a file thumbnail entry in - * the database. We glom onto this to fetch remote attachments. - * - * @param $file File the file of the created thumbnail - * @param &$imgPath null|string = out the path to the created thumbnail (output parameter) - * @param $media string = media type (unused) - * - * @throws AlreadyFulfilledException - * @throws FileNotFoundException - * @throws FileNotStoredLocallyException - * @throws HTTP_Request2_Exception - * @throws ServerException + * @param RemoteURL $remote_url * * @return bool */ - public function onCreateFileImageThumbnailSource(File $file, ?string &$imgPath = null, ?string $media = null): bool + public function onNewRemoteURLFromNote(RemoteURL $remote_url, Note $note): bool { - // If we are on a private node, we won't do any remote calls (just as a precaution until - // we can configure this from config.php for the private nodes) - if (common_config('site', 'private')) { - return true; + // Embed is the plugin to handle these + if ($remote_url->getMimetypeMajor() === 'text') { + return Event::next; } - // If there is a local filename, it is either a local file already or has already been downloaded. - if (!$file->isStoredRemotely()) { - common_debug(sprintf('File id==%d isn\'t a non-fetched remote file (%s), so nothing StoreRemoteMedia ' . - 'should handle.', $file->getID(), _ve($file->filename))); - return true; - } + // Have we handled it already? + $remoteurl_to_attachment = DB::find('remoteurl_to_attachment', + ['remoteurl_id' => $remote_url->getId()]); - try { - File_thumbnail::byFile($file); - // If we don't get the exception `No result found on File_thumbnail lookup.` then Embed has already handled it most likely. - return true; - } catch (NoResultException $e) { - // We can move on - } - - $url = $file->getUrl(false); - - if (substr($url, 0, 7) == 'file://') { - $filename = substr($url, 7); - $info = getimagesize($filename); - $filename = basename($filename); - $width = $info[0]; - $height = $info[1]; + // If it was handled already + if (!is_null($remoteurl_to_attachment)) { + // Relate the note with the existing attachment + DB::persist(AttachmentToNote::create([ + 'attachment_id' => $remoteurl_to_attachment->getAttachmentId(), + 'note_id' => $note->getId(), + ])); + DB::flush(); + return Event::stop; } else { - $this->checkWhitelist($url); - $head = (new HTTPClient())->head($url); - $headers = $head->getHeader(); - $headers = array_change_key_case($headers, CASE_LOWER); + // Retrieve media + $get_response = HTTPClient::get($remote_url->getRemoteUrl()); + $media = $get_response->getContent(); + $mimetype = $get_response->getHeaders()['content-type'][0]; + unset($get_response); - try { - $is_image = $this->isRemoteImage($url, $headers); - if ($is_image == true) { - $file_size = $this->getRemoteFileSize($url, $headers); - if (($file_size != false) && ($file_size > $this->max_size)) { - common_debug('Went to store remote thumbnail of size ' . $file_size . - ' but the upload limit is ' . $this->max_size . ' so we aborted.'); - return false; - } - } else { - return false; + // Ensure we still want to handle it + if ($mimetype != $remote_url->getMimetype()) { + $remote_url->setMimetype($mimetype); + DB::persist($remote_url); + DB::flush(); + if ($remote_url->getMimetypeMajor() === 'text') { + return Event::next; } - } catch (Exception $err) { - common_debug('Could not determine size of remote image, aborted local storage.'); - throw $err; } - // First we download the file to memory and test whether it's actually an image file - // FIXME: To support remote video/whatever files, this needs reworking. - common_debug(sprintf( - 'Downloading remote image for file id==%u with URL: %s', - $file->getID(), - $url - )); - try { - $imgData = HTTPClient::quickGet($url); - if (isset($imgData)) { - list($filename, $filehash, $width, $height) = $this->validateAndWriteImage( - $imgData, - $url, - $headers, - $file->getID() - ); - } else { - throw new UnsupportedMediaException('HTTPClient returned an empty result'); - } - } catch (UnsupportedMediaException $e) { - // Couldn't find anything that looks like an image, nothing to do - common_debug("StoreRemoteMedia was not able to find an image for URL `{$url}`: " . $e->getMessage()); - return false; + // Create an attachment for this + $temp_file = new TemporaryFile(); + $temp_file->write($media); + $attachment = GSFile::sanitizeAndStoreFileAsAttachment($temp_file); + + // Relate the remoteurl with the attachment + DB::persist(RemoteURLToAttachment::create([ + 'remoteurl_id' => $remote_url->getId(), + 'attachment_id' => $attachment->getId(), + ])); + + // Relate the note with the attachment + DB::persist(AttachmentToNote::create([ + 'attachment_id' => $attachment->getId(), + 'note_id' => $note->getId(), + ])); + + DB::flush(); + + // Should we create a thumb and delete the original file? + if (!$this->getStoreOriginal()) { + $thumbnail = AttachmentThumbnail::getOrCreate( + attachment: $attachment, + width: $this->getThumbnailWidth(), + height: $this->getThumbnailHeight(), + crop: $this->getSmartCrop() + ); + $attachment->deleteStorage(); } + + return Event::stop; } - - $ft = null; - if ($this->store_original) { - try { - // Update our database for the file record - $orig = clone $file; - $file->filename = $filename; - $file->filehash = $filehash; - $file->width = $width; - $file->height = $height; - // Throws exception on failure. - $file->updateWithKeys($orig); - } catch (Exception $err) { - common_log(LOG_ERR, 'Went to update a file entry on the database in ' . - 'StoreRemoteMediaPlugin::storeRemoteThumbnail but encountered error: ' . $err); - throw $err; - } - } else { - try { - // Insert a thumbnail record for this file - $data = new stdClass(); - $data->thumbnail_url = $url; - $data->thumbnail_width = $width; - $data->thumbnail_height = $height; - File_thumbnail::saveNew($data, $file->getID()); - $ft = File_thumbnail::byFile($file); - $orig = clone $ft; - $ft->filename = $filename; - $ft->updateWithKeys($orig); - } catch (Exception $err) { - common_log(LOG_ERR, 'Went to write a thumbnail entry to the database in ' . - 'StoreRemoteMediaPlugin::storeRemoteThumbnail but encountered error: ' . $err); - throw $err; - } - } - - // Out - try { - $imgPath = $file->getFileOrThumbnailPath($ft); - return !file_exists($imgPath); - } catch (Exception $e) { - return true; - } - } - - /** - * Check the file size of a remote file using a HEAD request and checking - * the content-length variable returned. This isn't 100% foolproof but is - * reliable enough for our purposes. - * - * @param mixed $url - * @param null|mixed $headers - * - * @return bool|string the file size if it succeeds, false otherwise. - */ - private function getRemoteFileSize($url, $headers = null) - { - try { - if ($headers === null) { - if (!common_valid_http_url($url)) { - common_log(LOG_ERR, 'Invalid URL in StoreRemoteMedia::getRemoteFileSize()'); - return false; - } - $head = (new HTTPClient())->head($url); - $headers = $head->getHeader(); - $headers = array_change_key_case($headers, CASE_LOWER); - } - return $headers['content-length'] ?? false; - } catch (Exception $err) { - common_log(LOG_ERR, __CLASS__ . ': getRemoteFileSize on URL : ' . _ve($url) . - ' threw exception: ' . $err->getMessage()); - return false; - } - } - - /** - * A private helper function that uses a CURL lookup to check the mime type - * of a remote URL to see it it's an image. - * - * @param mixed $url - * @param null|mixed $headers - * - * @return bool true if the remote URL is an image, or false otherwise. - */ - private function isRemoteImage($url, $headers = null): bool - { - if (empty($headers)) { - if (!common_valid_http_url($url)) { - common_log(LOG_ERR, 'Invalid URL in StoreRemoteMedia::isRemoteImage()'); - return false; - } - $head = (new HTTPClient())->head($url); - $headers = $head->getHeader(); - $headers = array_change_key_case($headers, CASE_LOWER); - } - return !empty($headers['content-type']) && common_get_mime_media($headers['content-type']) === 'image'; - } - - /** - * Validate that $imgData is a valid image before writing it to - * disk, as well as resizing it to at most $this->thumbnail_width - * by $this->thumbnail_height - * - * @param $imgData - The image data to validate. Taken by reference to avoid copying - * @param null|string $url - The url where the image came from, to fetch metadata - * @param null|array $headers - The headers possible previous request to $url - * @param null|int $file_id - The id of the file this image belongs to, used for logging - */ - protected function validateAndWriteImage(&$imgData, ?string $url = null, ?array $headers = null, ?int $file_id = null): array - { - $info = @getimagesizefromstring($imgData); - // array indexes documented on php.net: - // https://php.net/manual/en/function.getimagesize.php - if ($info === false) { - throw new UnsupportedMediaException(_m('Remote file format was not identified as an image.'), $url); - } elseif (!$info[0] || !$info[1]) { - throw new UnsupportedMediaException(_m('Image file had impossible geometry (0 width or height)')); - } - - $width = min($info[0], $this->thumbnail_width); - $height = min($info[1], $this->thumbnail_height); - $filehash = hash(File::FILEHASH_ALG, $imgData); - - try { - if (!empty($url)) { - $original_name = HTTPClient::get_filename($url, $headers); - } - $filename = MediaFile::encodeFilename($original_name ?? _m('Untitled attachment'), $filehash); - } catch (Exception $err) { - common_log(LOG_ERR, 'Went to write a thumbnail to disk in StoreRemoteMediaPlugin::storeRemoteThumbnail ' . - "but encountered error: {$err}"); - throw $err; - } - - try { - $fullpath = $this->store_original ? File::path($filename) : File_thumbnail::path($filename); - // Write the file to disk. Throw Exception on failure - if (!file_exists($fullpath)) { - if (strpos($fullpath, INSTALLDIR) !== 0 || file_put_contents($fullpath, $imgData) === false) { - throw new ServerException(_m('Could not write downloaded file to disk.')); - } - - if (common_get_mime_media(MediaFile::getUploadedMimeType($fullpath)) !== 'image') { - @unlink($fullpath); - throw new UnsupportedMediaException( - _m('Remote file format was not identified as an image.'), - $url - ); - } - - // If the image is not of the desired size, resize it - if (!$this->store_original && $this->crop && ($info[0] > $this->thumbnail_width || $info[1] > $this->thumbnail_height)) { - try { - // Temporary object, not stored in DB - $img = new ImageFile(-1, $fullpath); - list($width, $height, $x, $y, $w, $h) = $img->scaleToFit($this->thumbnail_width, $this->thumbnail_height, $this->crop); - - // The boundary box for our resizing - $box = [ - 'width' => $width, 'height' => $height, - 'x' => $x, 'y' => $y, - 'w' => $w, 'h' => $h, - ]; - - $width = $box['width']; - $height = $box['height']; - $img->resizeTo($fullpath, $box); - } catch (\Intervention\Image\Exception\NotReadableException $e) { - common_log(LOG_ERR, "StoreRemoteMediaPlugin::storeRemoteThumbnail was unable to decode image with Intervention: {$e}"); - // No need to interrupt processing - } - } - } else { - throw new AlreadyFulfilledException('A thumbnail seems to already exist for remote file' . - ($file_id ? 'with id==' . $file_id : '') . ' at path ' . $fullpath); - } - } catch (AlreadyFulfilledException $e) { - // Carry on - } catch (Exception $err) { - common_log(LOG_ERR, 'Went to write a thumbnail to disk in StoreRemoteMediaPlugin::storeRemoteThumbnail ' . - "but encountered error: {$err}"); - throw $err; - } finally { - unset($imgData); - } - - return [$filename, $filehash, $width, $height]; - } - - /** - * @param mixed $url - * - * @throws ServerException if check is made but fails - * - * @return bool false on no check made, provider name on success - */ - protected function checkWhitelist($url) - { - if (!$this->check_whitelist) { - return false; // indicates "no check made" - } - - $host = parse_url($url, PHP_URL_HOST); - foreach ($this->domain_whitelist as $regex => $provider) { - if (preg_match("/{$regex}/", $host)) { - return $provider; // we trust this source, return provider name - } - } - - throw new ServerException(sprintf(_m('Domain not in remote thumbnail source whitelist: %s'), $host)); } /** @@ -389,12 +167,14 @@ class StoreRemoteMedia extends Plugin */ public function onPluginVersion(array &$versions): bool { - $versions[] = ['name' => 'StoreRemoteMedia', - 'version' => self::PLUGIN_VERSION, - 'author' => 'Mikael Nordfeldth, Diogo Peralta Cordeiro', - 'homepage' => GNUSOCIAL_ENGINE_URL, - 'description' => // TRANS: Plugin description. - _m('Plugin for downloading remotely attached files to local server.'), ]; - return true; + $versions[] = [ + 'name' => 'StoreRemoteMedia', + 'version' => $this->version(), + 'author' => 'Mikael Nordfeldth, Diogo Peralta Cordeiro', + 'homepage' => GNUSOCIAL_PROJECT_URL, + 'description' => // TRANS: Plugin description. + _m('Plugin for downloading remotely attached files to local server.'), + ]; + return Event::next; } } diff --git a/src/Entity/RemoteURL.php b/src/Entity/RemoteURL.php index 6b28db3d93..11e9c0ad1c 100644 --- a/src/Entity/RemoteURL.php +++ b/src/Entity/RemoteURL.php @@ -22,6 +22,7 @@ namespace App\Entity; use App\Core\DB\DB; use App\Core\Entity; use App\Core\Event; +use App\Core\GSFile; use App\Core\HTTPClient; use App\Util\Common; use App\Util\Exception\DuplicateFoundException; @@ -137,7 +138,7 @@ class RemoteURL extends Entity $head = HTTPClient::head($url); // This must come before getInfo given that Symfony HTTPClient is lazy (thus forcing curl exec) $headers = $head->getHeaders(); - $url = $head->getInfo('url'); // The last effective url (after getHeaders so it follows redirects) + $url = $head->getInfo('url'); // The last effective url (after getHeaders, so it follows redirects) $url_hash = hash(self::URLHASH_ALGO, $url); try { return DB::findOneBy('remoteurl', ['remote_url_hash' => $url_hash]); @@ -149,7 +150,7 @@ class RemoteURL extends Entity 'mimetype' => $headers['content-type'][0], ]); DB::persist($remoteurl); - Event::handle('RemoteURLStoreNew', [&$remoteurl]); + Event::handle('RemoteURLStoredNew', [&$remoteurl]); return $remoteurl; } } else { diff --git a/src/Entity/RemoteURLToNote.php b/src/Entity/RemoteURLToNote.php index a34ac71771..ad2c750c00 100644 --- a/src/Entity/RemoteURLToNote.php +++ b/src/Entity/RemoteURLToNote.php @@ -19,7 +19,9 @@ namespace App\Entity; +use App\Core\DB\DB; use App\Core\Entity; +use App\Core\Event; use DateTimeInterface; /** @@ -73,6 +75,22 @@ class RemoteURLToNote extends Entity return $this->modified; } + /** + * Create an instance of RemoteURLToNote or fill in the + * properties of $obj with the associative array $args. Doesn't + * persist the result + * + * @param null|mixed $obj + */ + public static function create(array $args, $obj = null) + { + $remoteURL = DB::find('remoteurl', ['id' => $args['remoteurl_id']]); + $note = DB::find('note', ['id' => $args['note_id']]); + Event::handle('NewRemoteURLFromNote', [$remoteURL, $note]); + $obj = new self(); + return parent::create($args, $obj); + } + // @codeCoverageIgnoreEnd // }}} Autocode