From f746866b652c297da06c1d1ec921976004e3b6bb Mon Sep 17 00:00:00 2001 From: Miguel Dantas Date: Sun, 30 Jun 2019 13:36:33 +0100 Subject: [PATCH] [StoreRemoteMedia] StoreRemoteMedia now uses the new filename format, which allows it to display correctly in the UI. Formatting fixes --- classes/File.php | 12 +- lib/attachmentlist.php | 3 +- lib/attachmentlistitem.php | 11 +- lib/mediafile.php | 104 +++++++++++++---- plugins/Oembed/OembedPlugin.php | 30 +++-- .../StoreRemoteMediaPlugin.php | 107 +++++++++--------- 6 files changed, 169 insertions(+), 98 deletions(-) diff --git a/classes/File.php b/classes/File.php index c6e9b40ac7..113690b4b9 100644 --- a/classes/File.php +++ b/classes/File.php @@ -341,23 +341,27 @@ class File extends Managed_DataObject * @return string|bool Value from the 'extblacklist' array, in the config */ public static function getSafeExtension(string $filename) { - if (preg_match('/^.+?\.([A-Za-z0-9]+)$/', $filename, $matches)) { + if (preg_match('/^.+?\.([A-Za-z0-9]+)$/', $filename, $matches) === 1) { // we matched on a file extension, so let's see if it means something. + common_debug("MATCHES EXT: " . print_r($matches, true)); $ext = mb_strtolower($matches[1]); $blacklist = common_config('attachments', 'extblacklist'); // If we got an extension from $filename we want to check if it's in a blacklist // so we avoid people uploading restricted files if (array_key_exists($ext, $blacklist)) { if (!is_string($blacklist[$ext])) { + // Blocked return false; } // return a safe replacement extension ('php' => 'phps' for example) return $blacklist[$ext]; + } else { + // the attachment extension based on its filename was not blacklisted so it's ok to use it + return $ext; } - // the attachment extension based on its filename was not blacklisted so it's ok to use it - return $ext; } else { - return false; + // No extension + return null; } } diff --git a/lib/attachmentlist.php b/lib/attachmentlist.php index 8c52d7a86e..696e000c73 100644 --- a/lib/attachmentlist.php +++ b/lib/attachmentlist.php @@ -87,7 +87,8 @@ class AttachmentList extends Widget if ($this->notice->getProfile()->isSilenced()) { // TRANS: Message for inline attachments list in notices when the author has been silenced. - $this->element('div', ['class'=>'error'], _('Attachments are hidden because this profile has been silenced.')); + $this->element('div', ['class'=>'error'], + _('Attachments are hidden because this profile has been silenced.')); return 0; } diff --git a/lib/attachmentlistitem.php b/lib/attachmentlistitem.php index af44f7aada..c5455b8220 100644 --- a/lib/attachmentlistitem.php +++ b/lib/attachmentlistitem.php @@ -168,12 +168,12 @@ class AttachmentListItem extends Widget } $this->out->elementStart($mediatype, - array('class'=>"attachment_player u-{$mediatype}", - 'poster'=>$poster, - 'controls'=>'controls')); + array('class'=>"attachment_player u-{$mediatype}", + 'poster'=>$poster, + 'controls'=>'controls')); $this->out->element('source', array('src'=>$this->attachment->getUrl(), - 'type'=>$this->attachment->mimetype)); + 'type'=>$this->attachment->mimetype)); $this->out->elementEnd($mediatype); break; @@ -181,7 +181,8 @@ class AttachmentListItem extends Widget unset($thumb); // there's no need carrying this along switch (common_bare_mime($this->attachment->mimetype)) { case 'text/plain': - $this->element('div', ['class'=>'e-content plaintext'], file_get_contents($this->attachment->getPath())); + $this->element('div', ['class'=>'e-content plaintext'], + file_get_contents($this->attachment->getPath())); break; case 'text/html': if (!empty($this->attachment->filename) diff --git a/lib/mediafile.php b/lib/mediafile.php index aebe40cb9e..0efdbada76 100644 --- a/lib/mediafile.php +++ b/lib/mediafile.php @@ -236,6 +236,76 @@ class MediaFile return common_config('attachments', 'file_quota'); } + /** + * Encodes a file name and a file hash in the new file format, which is used to avoid + * having an extension in the file, removing trust in extensions, while keeping the original name + * @throws ClientException + */ + public static function encodeFilename(string $original_name, string $filehash, string $ext = null) : string + { + if (empty($original_name)) { + $original_name = _('Untitled attachment'); + } + + // If we're given an extension explicitly, use it, otherwise... + $ext = $ext ?: + // get a replacement extension if configured, returns false if it's blocked, + // null if no extension + File::getSafeExtension($original_name); + if ($ext === false) { + throw new ClientException(_('Blacklisted file extension.')); + } + + common_debug("EXT: " . print_r($ext, true)); + + if (!empty($ext)) { + // Remove dots if we have them (make sure they're not repeated) + $ext = preg_replace('/^\.+/', '', $ext); + $original_name = preg_replace('/\.+.+$/i', ".{$ext}", $original_name); + } + + $enc_name = bin2hex($original_name); + return "{$enc_name}-{$filehash}"; + } + + /** + * Decode the new filename format + * @return false | null | string on failure, no match (old format) or original file name, respectively + */ + public static function decodeFilename(string $encoded_filename) + { + $ret = preg_match('/^([^-]+?)-[^-]+$/', $encoded_filename, $matches); + if ($ret === false) { + return false; + } elseif ($ret === 0) { + return null; // No match + } else { + $filename = hex2bin($matches[1]); + + // Matches extension + if (preg_match('/^(.+?)\.(.+)$/', $filename, $sub_matches) === 1) { + $ext = $sub_matches[2]; + // Previously, there was a blacklisted extension array, which could have an alternative + // extension, such as phps, to replace php. We want to turn it back (this is deprecated, + // as it no longer makes sense, since we don't trust trust files based on extension, + // but keep the feature) + $blacklist = common_config('attachments', 'extblacklist'); + if (is_array($blacklist)) { + foreach ($blacklist as $upload_ext => $safe_ext) { + if ($ext === $safe_ext) { + $ext = $upload_ext; + break; + } + } + } + return "{$sub_matches[1]}.{$ext}"; + } else { + // No extension, don't bother trying to replace it + return $filename; + } + } + } + /** * Create a new MediaFile or ImageFile object from an upload * @@ -314,12 +384,6 @@ class MediaFile File::respectsQuota($scoped, $_FILES[$param]['size']); } - // Gets a replacement extension if configured in the config, returns false if it's blocked - $ext = File::getSafeExtension($_FILES[$param]['name']); - if ($ext === false) { - throw new ClientException(_('Blacklisted file extension.')); - } - $mimetype = self::getUploadedMimeType($_FILES[$param]['tmp_name'], $_FILES[$param]['name']); $media = common_get_mime_media($mimetype); @@ -334,14 +398,7 @@ class MediaFile $ext = image_type_to_extension($img->preferredType(), false); } - // If we have a replacement extension (either from the config or from converting an image) - if ($ext !== false) { - $basename = preg_replace("/\..+$/i", ".{$ext}", $basename); - } - - // New file name format - $original_filename = bin2hex($basename); - $filename = "{$original_filename}-{$filehash}"; + $filename = self::encodeFilename($basename, $filehash, $ext); $filepath = File::path($filename); if ($media === 'image') { @@ -582,27 +639,26 @@ class MediaFile } // New file name format is "{bin2hex(original_name.ext)}-{$hash}" - $ret = preg_match('/^([^\.-]+)-.+$/', $file->filename, $matches); + $filename = self::decodeFilename($file->filename); + // If there was an error in the match, something's wrong with some piece // of code (could be a file with utf8 chars in the name) $log_error_msg = "Invalid file name for File with id={$file->id} " . - "({$file->filename}). Some plugin probably did something wrong."; - - if ($ret === false) { + "({$file->filename}). Some plugin probably did something wrong."; + if ($filename === false) { common_log(LOG_ERR, $log_error_msg); - } elseif ($ret === 1) { - $filename = hex2bin($matches[1]); - } else { - // The old file name format was "{hash}.{ext}" - // This estracts the extension + } elseif ($filename === null) { + // The old file name format was "{hash}.{ext}" so we didn't have a name + // This extracts the extension $ret = preg_match('/^.+?\.(.+)$/', $file->filename, $matches); if ($ret !== 1) { common_log(LOG_ERR, $log_error_msg); return _('Untitled attachment'); } $ext = $matches[1]; - // Previously, there was a blacklisted extension array, which could have an alternative + // There's a blacklisted extension array, which could have an alternative // extension, such as phps, to replace php. We want to turn it back + // (currently defaulted to empty, but let's keep the feature) $blacklist = common_config('attachments', 'extblacklist'); if (is_array($blacklist)) { foreach ($blacklist as $upload_ext => $safe_ext) { diff --git a/plugins/Oembed/OembedPlugin.php b/plugins/Oembed/OembedPlugin.php index 707fe1de8f..fb3fe43fc5 100644 --- a/plugins/Oembed/OembedPlugin.php +++ b/plugins/Oembed/OembedPlugin.php @@ -273,7 +273,7 @@ class OembedPlugin extends Plugin $out->element( 'a', array('href' => $oembed->author_url, - 'class' => 'url'), + 'class' => 'url'), $oembed->author_name ); } @@ -286,7 +286,7 @@ class OembedPlugin extends Plugin $out->element( 'a', array('href' => $oembed->provider_url, - 'class' => 'url'), + 'class' => 'url'), $oembed->provider ); } @@ -421,7 +421,8 @@ class OembedPlugin extends Plugin // All our remote Oembed images lack a local filename property in the File object if (!is_null($file->filename)) { - common_debug(sprintf('Filename of file id==%d is not null (%s), so nothing oEmbed should handle.', $file->getID(), _ve($file->filename))); + common_debug(sprintf('Filename of file id==%d is not null (%s), so nothing oEmbed '. + 'should handle.', $file->getID(), _ve($file->filename))); return true; } @@ -440,10 +441,12 @@ class OembedPlugin extends Plugin } catch (AlreadyFulfilledException $e) { // aw yiss! } catch (Exception $e) { - common_debug(sprintf('oEmbed encountered an exception (%s) for file id==%d: %s', get_class($e), $file->getID(), _ve($e->getMessage()))); + common_debug(sprintf('oEmbed encountered an exception (%s) for file id==%d: %s', + get_class($e), $file->getID(), _ve($e->getMessage()))); throw $e; } + // Out $imgPath = $thumbnail->getPath(); return false; @@ -544,7 +547,8 @@ class OembedPlugin extends Plugin protected function storeRemoteFileThumbnail(File_thumbnail $thumbnail) { if (!empty($thumbnail->filename) && file_exists($thumbnail->getPath())) { - throw new AlreadyFulfilledException(sprintf('A thumbnail seems to already exist for remote file with id==%u', $thumbnail->file_id)); + throw new AlreadyFulfilledException( + sprintf('A thumbnail seems to already exist for remote file with id==%u', $thumbnail->file_id)); } $url = $thumbnail->getUrl(); @@ -556,7 +560,8 @@ class OembedPlugin extends Plugin $max_size = common_get_preferred_php_upload_limit(); $file_size = $this->getRemoteFileSize($url); if (($file_size!=false) && ($file_size > $max_size)) { - common_debug("Went to store remote thumbnail of size " . $file_size . " but the upload limit is " . $max_size . " so we aborted."); + common_debug("Went to store remote thumbnail of size " . $file_size . + " but the upload limit is " . $max_size . " so we aborted."); return false; } } @@ -567,7 +572,8 @@ class OembedPlugin extends Plugin // First we download the file to memory and test whether it's actually an image file // FIXME: To support remote video/whatever files, this needs reworking. - common_debug(sprintf('Downloading remote thumbnail for file id==%u with thumbnail URL: %s', $thumbnail->file_id, $url)); + common_debug(sprintf('Downloading remote thumbnail for file id==%u with thumbnail URL: %s', + $thumbnail->file_id, $url)); $imgData = HTTPClient::quickGet($url); $info = @getimagesizefromstring($imgData); if ($info === false) { @@ -580,14 +586,17 @@ class OembedPlugin extends Plugin try { // We'll trust sha256 (File::FILEHASH_ALG) not to have collision issues any time soon :) - $filename = sprintf('oembed-%d.%s', hash(File::FILEHASH_ALG, $imgData), $ext); + $original_filename = bin2hex('oembed.' . $ext); + $filehash = hash(File::FILEHASH_ALG, $imgData); + $filename = "{$original_filename}-{$filehash}"; $fullpath = File_thumbnail::path($filename); // Write the file to disk. Throw Exception on failure if (!file_exists($fullpath) && file_put_contents($fullpath, $imgData) === false) { throw new ServerException(_('Could not write downloaded file to disk.')); } } catch (Exception $err) { - common_log(LOG_ERROR, "Went to write a thumbnail to disk in OembedPlugin::storeRemoteThumbnail but encountered error: {$err}"); + common_log(LOG_ERROR, "Went to write a thumbnail to disk in OembedPlugin::storeRemoteThumbnail " . + "but encountered error: {$err}"); return $err; } finally { unset($imgData); @@ -602,7 +611,8 @@ class OembedPlugin extends Plugin // Throws exception on failure. $thumbnail->updateWithKeys($orig); } catch (exception $err) { - common_log(LOG_ERROR, "Went to write a thumbnail entry to the database in OembedPlugin::storeRemoteThumbnail but encountered error: ".$err); + common_log(LOG_ERROR, "Went to write a thumbnail entry to the database in " . + "OembedPlugin::storeRemoteThumbnail but encountered error: ".$err); return $err; } return true; diff --git a/plugins/StoreRemoteMedia/StoreRemoteMediaPlugin.php b/plugins/StoreRemoteMedia/StoreRemoteMediaPlugin.php index e222f4ea60..a722f9535a 100644 --- a/plugins/StoreRemoteMedia/StoreRemoteMediaPlugin.php +++ b/plugins/StoreRemoteMedia/StoreRemoteMediaPlugin.php @@ -10,19 +10,21 @@ class StoreRemoteMediaPlugin extends Plugin // settings which can be set in config.php with addPlugin('Oembed', array('param'=>'value', ...)); // WARNING, these are _regexps_ (slashes added later). Always escape your dots and end your strings - public $domain_whitelist = array( // hostname => service provider - '^i\d*\.ytimg\.com$' => 'YouTube', - '^i\d*\.vimeocdn\.com$' => 'Vimeo', - ); - public $append_whitelist = array(); // fill this array as domain_whitelist to add more trusted sources - public $check_whitelist = false; // security/abuse precaution + public $domain_whitelist = [ + // hostname => service provider + '^i\d*\.ytimg\.com$' => 'YouTube', + '^i\d*\.vimeocdn\.com$' => 'Vimeo', + ]; - public $domain_blacklist = array(); + public $append_whitelist = []; // fill this array as domain_whitelist to add more trusted sources + public $check_whitelist = false; // security/abuse precaution + + public $domain_blacklist = []; public $check_blacklist = false; - public $max_image_bytes = 10485760; // 10MiB max image size by default + public $max_image_bytes = 10 * 1024 * 1024; // 10MiB max image size by default - protected $imgData = array(); + protected $imgData = []; // these should be declared protected everywhere public function initialize() @@ -32,38 +34,6 @@ class StoreRemoteMediaPlugin extends Plugin $this->domain_whitelist = array_merge($this->domain_whitelist, $this->append_whitelist); } - /** - * Save embedding information for a File, if applicable. - * - * Normally this event is called through File::saveNew() - * - * @param File $file The abount-to-be-inserted File object. - * - * @return boolean success - */ - public function onStartFileSaveNew(File &$file) - { - // save given URL as title if it's a media file this plugin understands - // which will make it shown in the AttachmentList widgets - - if (isset($file->title) && strlen($file->title)>0) { - // Title is already set - return true; - } - if (!isset($file->mimetype)) { - // Unknown mimetype, it's not our job to figure out what it is. - return true; - } - switch (common_get_mime_media($file->mimetype)) { - case 'image': - // Just to set something for now at least... - //$file->title = $file->mimetype; - break; - } - - return true; - } - public function onCreateFileImageThumbnailSource(File $file, &$imgPath, $media=null) { // If we are on a private node, we won't do any remote calls (just as a precaution until @@ -92,41 +62,61 @@ class StoreRemoteMediaPlugin extends Plugin return true; } + // Relative URL, something's off + if (empty(parse_url($remoteUrl, PHP_URL_HOST))) { + common_err("StoreRemoteMedia found a url without host (\"{$remoteUrl}\") for file with id = {$file->id}"); + return true; + } + try { - /* + $http = new HTTPClient(); - common_debug(sprintf('Performing HEAD request for remote file id==%u to avoid unnecessarily downloading too large files. URL: %s', $file->getID(), $remoteUrl)); + common_debug(sprintf('Performing HEAD request for remote file id==%u to avoid '. + 'unnecessarily downloading too large files. URL: %s', + $file->getID(), $remoteUrl)); + $head = $http->head($remoteUrl); $remoteUrl = $head->getEffectiveUrl(); // to avoid going through redirects again if (!$this->checkBlackList($remoteUrl)) { - common_log(LOG_WARN, sprintf('%s: Non-blacklisted URL %s redirected to blacklisted URL %s', __CLASS__, $file->getUrl(), $remoteUrl)); + common_log(LOG_WARN, sprintf('%s: Non-blacklisted URL %s redirected to blacklisted URL %s', + __CLASS__, $file->getUrl(), $remoteUrl)); return true; } $headers = $head->getHeader(); - $filesize = isset($headers['content-length']) ? $headers['content-length'] : null; - */ - $filesize = $file->getSize(); + $filesize = isset($headers['content-length']) ?: $file->getSize(); + if (empty($filesize)) { // file size not specified on remote server - common_debug(sprintf('%s: Ignoring remote media because we did not get a content length for file id==%u', __CLASS__, $file->getID())); + common_debug(sprintf('%s: Ignoring remote media because we did not get a ' . + 'content length for file id==%u', __CLASS__, $file->getID())); return true; } elseif ($filesize > $this->max_image_bytes) { - //FIXME: When we perhaps start fetching videos etc. we'll need to differentiate max_image_bytes from that... + //FIXME: When we perhaps start fetching videos etc. we'll need to + // differentiate max_image_bytes from that... + // file too big according to plugin configuration - common_debug(sprintf('%s: Skipping remote media because content length (%u) is larger than plugin configured max_image_bytes (%u) for file id==%u', __CLASS__, intval($filesize), $this->max_image_bytes, $file->getID())); + common_debug(sprintf('%s: Skipping remote media because content length (%u) ' . + 'is larger than plugin configured max_image_bytes (%u) ' . + 'for file id==%u', __CLASS__, intval($filesize), + $this->max_image_bytes, $file->getID())); return true; } elseif ($filesize > common_config('attachments', 'file_quota')) { // file too big according to site configuration - common_debug(sprintf('%s: Skipping remote media because content length (%u) is larger than file_quota (%u) for file id==%u', __CLASS__, intval($filesize), common_config('attachments', 'file_quota'), $file->getID())); + common_debug(sprintf('%s: Skipping remote media because content length (%u) ' . + 'is larger than file_quota (%u) for file id==%u', + __CLASS__, intval($filesize), + common_config('attachments', 'file_quota'), $file->getID())); return true; } // Then we download the file to memory and test whether it's actually an image file - common_debug(sprintf('Downloading remote file id==%u (should be size %u) with effective URL: %s', $file->getID(), $filesize, _ve($remoteUrl))); + common_debug(sprintf('Downloading remote file id=%u (should be size %u) ' . + 'with effective URL: %s', $file->getID(), $filesize, _ve($remoteUrl))); $imgData = HTTPClient::quickGet($remoteUrl); } catch (HTTP_Request2_ConnectionException $e) { - common_log(LOG_ERR, __CLASS__.': '._ve(get_class($e)).' on URL: '._ve($file->getUrl()).' threw exception: '.$e->getMessage()); + common_log(LOG_ERR, __CLASS__.': '._ve(get_class($e)).' on URL: ' . + _ve($file->getUrl()).' threw exception: '.$e->getMessage()); return true; } $info = @getimagesizefromstring($imgData); @@ -143,9 +133,16 @@ class StoreRemoteMediaPlugin extends Plugin //FIXME: Add some code so we don't have to store duplicate File rows for same hash files. } catch (NoResultException $e) { - $filename = $filehash . '.' . common_supported_mime_to_ext($info['mime']); + if (preg_match('/^.+; filename="(.+?)"$/', $headers['content-disposition'], $matches) === 1) { + $filename = MediaFile::encodeFilename($matches[1], $filehash); + } else { + common_log(LOG_ERR, "Couldn't determine filename for url: {$remoteUrl}"); + // throw new ServerError(_("Couldn't determine filename for url: {$remoteUrl}")); + } $fullpath = File::path($filename); + common_debug("StoreRemoteMedia retrieved file with id={$file->id} and will store in {$filename}"); + // Write the file to disk if it doesn't exist yet. Throw Exception on failure. if (!file_exists($fullpath) && file_put_contents($fullpath, $imgData) === false) { throw new ServerException(_('Could not write downloaded file to disk.')); @@ -160,9 +157,11 @@ class StoreRemoteMediaPlugin extends Plugin // Throws exception on failure. $file->updateWithKeys($orig); } + // Get rid of the file from memory unset($imgData); + // Output $imgPath = $file->getPath(); return false;