[ATTACHMENTS][EVENT] Add onHashFile event, which can be used to deduplicate files
Currently, we simply hash the contents of the file with sha256, but in the future we can use something smarter, which could find visual feature similarity between images
This commit is contained in:
parent
60a9085e56
commit
34059a8d3d
@ -29,6 +29,7 @@ use App\Core\GSFile;
|
|||||||
use function App\Core\I18n\_m;
|
use function App\Core\I18n\_m;
|
||||||
use App\Core\Modules\Component;
|
use App\Core\Modules\Component;
|
||||||
use App\Core\Security;
|
use App\Core\Security;
|
||||||
|
use App\Entity\Attachment;
|
||||||
use App\Entity\AttachmentToNote;
|
use App\Entity\AttachmentToNote;
|
||||||
use App\Entity\Note;
|
use App\Entity\Note;
|
||||||
use App\Util\Common;
|
use App\Util\Common;
|
||||||
@ -124,7 +125,7 @@ END;
|
|||||||
$matched_urls = [];
|
$matched_urls = [];
|
||||||
preg_match_all(self::URL_REGEX, $content, $matched_urls, PREG_SET_ORDER);
|
preg_match_all(self::URL_REGEX, $content, $matched_urls, PREG_SET_ORDER);
|
||||||
foreach ($matched_urls as $match) {
|
foreach ($matched_urls as $match) {
|
||||||
$processed_attachments[] = GSFile::validateAndStoreURL($url);
|
$processed_attachments[] = GSFile::validateAndStoreURL($match[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
DB::persist($note);
|
DB::persist($note);
|
||||||
@ -138,4 +139,15 @@ END;
|
|||||||
DB::flush();
|
DB::flush();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a unique representation of a file on disk
|
||||||
|
*
|
||||||
|
* This can be used in the future to deduplicate images by visual content
|
||||||
|
*/
|
||||||
|
public static function onHashFile(string $filename, ?string &$out_hash)
|
||||||
|
{
|
||||||
|
$out_hash = hash_file(Attachment::FILEHASH_ALGO, $filename);
|
||||||
|
return Event::stop;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -43,9 +43,9 @@ class GSFile
|
|||||||
bool $is_local = true,
|
bool $is_local = true,
|
||||||
int $actor_id = null): Attachment
|
int $actor_id = null): Attachment
|
||||||
{
|
{
|
||||||
|
Event::handle('HashFile', [$file->getPathname(), &$hash]);
|
||||||
// The following properly gets the mimetype with `file` or other
|
// The following properly gets the mimetype with `file` or other
|
||||||
// available methods, so should be safe
|
// available methods, so should be safe
|
||||||
$hash = hash_file(Attachment::FILEHASH_ALGO, $sfile->getPathname());
|
|
||||||
$mimetype = $sfile->getMimeType();
|
$mimetype = $sfile->getMimeType();
|
||||||
Event::handle('AttachmentValidation', [&$sfile, &$mimetype]);
|
Event::handle('AttachmentValidation', [&$sfile, &$mimetype]);
|
||||||
$attachment = Attachment::create([
|
$attachment = Attachment::create([
|
||||||
@ -70,16 +70,17 @@ class GSFile
|
|||||||
public static function validateAndStoreURL(string $url): Attachment
|
public static function validateAndStoreURL(string $url): Attachment
|
||||||
{
|
{
|
||||||
if (Common::isValidHttpUrl($url)) {
|
if (Common::isValidHttpUrl($url)) {
|
||||||
HTTPClient::head($url);
|
$head = HTTPClient::head($url);
|
||||||
$headers = $head->getHeaders();
|
$headers = $head->getHeaders();
|
||||||
$headers = array_change_key_case($headers, CASE_LOWER);
|
$headers = array_change_key_case($headers, CASE_LOWER);
|
||||||
$attachment = Attachment::create([
|
$attachment = Attachment::create([
|
||||||
'remote_url' => $match[0],
|
'remote_url' => $url,
|
||||||
'remote_url_hash' => hash('sha256', $match[0]),
|
'remote_url_hash' => hash(Attachment::URLHASH_ALGO, $url),
|
||||||
'mimetype' => $headers['content-type'],
|
'mimetype' => $headers['content-type'][0],
|
||||||
|
'is_local' => false,
|
||||||
]);
|
]);
|
||||||
DB::persist($attachment);
|
DB::persist($attachment);
|
||||||
Event::handle('AttachmentStoreNew', [&$at]);
|
Event::handle('AttachmentStoreNew', [&$attachment]);
|
||||||
return $attachment;
|
return $attachment;
|
||||||
} else {
|
} else {
|
||||||
throw new \InvalidArgumentException();
|
throw new \InvalidArgumentException();
|
||||||
|
Loading…
Reference in New Issue
Block a user