From 34059a8d3d76837e1b46f9f23dbb398859e4a99f Mon Sep 17 00:00:00 2001 From: Hugo Sales Date: Tue, 27 Apr 2021 20:53:59 +0000 Subject: [PATCH] [ATTACHMENTS][EVENT] Add onHashFile event, which can be used to deduplicate files Currently, we simply hash the contents of the file with sha256, but in the future we can use something smarter, which could find visual feature similarity between images --- components/Posting/Posting.php | 14 +++++++++++++- src/Core/GSFile.php | 13 +++++++------ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/components/Posting/Posting.php b/components/Posting/Posting.php index e791792ef6..d84d84011a 100644 --- a/components/Posting/Posting.php +++ b/components/Posting/Posting.php @@ -29,6 +29,7 @@ use App\Core\GSFile; use function App\Core\I18n\_m; use App\Core\Modules\Component; use App\Core\Security; +use App\Entity\Attachment; use App\Entity\AttachmentToNote; use App\Entity\Note; use App\Util\Common; @@ -124,7 +125,7 @@ END; $matched_urls = []; preg_match_all(self::URL_REGEX, $content, $matched_urls, PREG_SET_ORDER); foreach ($matched_urls as $match) { - $processed_attachments[] = GSFile::validateAndStoreURL($url); + $processed_attachments[] = GSFile::validateAndStoreURL($match[0]); } DB::persist($note); @@ -138,4 +139,15 @@ END; DB::flush(); } } + + /** + * Get a unique representation of a file on disk + * + * This can be used in the future to deduplicate images by visual content + */ + public static function onHashFile(string $filename, ?string &$out_hash) + { + $out_hash = hash_file(Attachment::FILEHASH_ALGO, $filename); + return Event::stop; + } } diff --git a/src/Core/GSFile.php b/src/Core/GSFile.php index 60928a8908..e4164b5fc9 100644 --- a/src/Core/GSFile.php +++ b/src/Core/GSFile.php @@ -43,9 +43,9 @@ class GSFile bool $is_local = true, int $actor_id = null): Attachment { + Event::handle('HashFile', [$file->getPathname(), &$hash]); // The following properly gets the mimetype with `file` or other // available methods, so should be safe - $hash = hash_file(Attachment::FILEHASH_ALGO, $sfile->getPathname()); $mimetype = $sfile->getMimeType(); Event::handle('AttachmentValidation', [&$sfile, &$mimetype]); $attachment = Attachment::create([ @@ -70,16 +70,17 @@ class GSFile public static function validateAndStoreURL(string $url): Attachment { if (Common::isValidHttpUrl($url)) { - HTTPClient::head($url); + $head = HTTPClient::head($url); $headers = $head->getHeaders(); $headers = array_change_key_case($headers, CASE_LOWER); $attachment = Attachment::create([ - 'remote_url' => $match[0], - 'remote_url_hash' => hash('sha256', $match[0]), - 'mimetype' => $headers['content-type'], + 'remote_url' => $url, + 'remote_url_hash' => hash(Attachment::URLHASH_ALGO, $url), + 'mimetype' => $headers['content-type'][0], + 'is_local' => false, ]); DB::persist($attachment); - Event::handle('AttachmentStoreNew', [&$at]); + Event::handle('AttachmentStoreNew', [&$attachment]); return $attachment; } else { throw new \InvalidArgumentException();