From 9c7b66984c46668f314f93337d28c62854b6d134 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 10 May 2010 16:18:29 -0700 Subject: [PATCH] Enhanced upload file type detection. If given an original filename, we'll attempt to detect type from the extension if we were unable to make a definitive match from content. Generic octet-stream, zip, and MS Office type are explicitly singled out for re-checks, which fixes OpenOffice and MS Office documents to come up with the proper types when misdetected. File extensions can also be added to the upload type whitelist; they'll be normalized to types for the actual comparison, so only known extensions will work. --- lib/mediafile.php | 54 +++++++++++++++++++++-- tests/MediaFileTest.php | 73 ++++++++++++++++++++------------ tests/sample-uploads/image.gif | Bin 0 -> 35 bytes tests/sample-uploads/image.jpeg | Bin 0 -> 306 bytes tests/sample-uploads/image.jpg | Bin 0 -> 306 bytes tests/sample-uploads/image.png | Bin 0 -> 159 bytes 6 files changed, 97 insertions(+), 30 deletions(-) create mode 100644 tests/sample-uploads/image.gif create mode 100644 tests/sample-uploads/image.jpeg create mode 100644 tests/sample-uploads/image.jpg create mode 100644 tests/sample-uploads/image.png diff --git a/lib/mediafile.php b/lib/mediafile.php index 10d90d0081..85d673d92a 100644 --- a/lib/mediafile.php +++ b/lib/mediafile.php @@ -180,7 +180,8 @@ class MediaFile return; } - $mimetype = MediaFile::getUploadedFileType($_FILES[$param]['tmp_name']); + $mimetype = MediaFile::getUploadedFileType($_FILES[$param]['tmp_name'], + $_FILES[$param]['name']); $filename = null; @@ -241,19 +242,41 @@ class MediaFile return new MediaFile($user, $filename, $mimetype); } - static function getUploadedFileType($f) { + /** + * Attempt to identify the content type of a given file. + * + * @param mixed $f file handle resource, or filesystem path as string + * @param string $originalFilename (optional) for extension-based detection + * @return string + * + * @fixme is this an internal or public method? It's called from GetFileAction + * @fixme this seems to tie a front-end error message in, kinda confusing + * @fixme this looks like it could return a PEAR_Error in some cases, if + * type can't be identified and $config['attachments']['supported'] is true + * + * @throws ClientException if type is known, but not supported for local uploads + */ + static function getUploadedFileType($f, $originalFilename=false) { require_once 'MIME/Type.php'; + require_once 'MIME/Type/Extension.php'; + $mte = new MIME_Type_Extension(); $cmd = &PEAR::getStaticProperty('MIME_Type', 'fileCmd'); $cmd = common_config('attachments', 'filecommand'); $filetype = null; + // If we couldn't get a clear type from the file extension, + // we'll go ahead and try checking the content. Content checks + // are unambiguous for most image files, but nearly useless + // for office document formats. + if (is_string($f)) { // assuming a filename $filetype = MIME_Type::autoDetect($f); + } else { // assuming a filehandle @@ -262,7 +285,32 @@ class MediaFile $filetype = MIME_Type::autoDetect($stream['uri']); } - if (common_config('attachments', 'supported') === true || in_array($filetype, common_config('attachments', 'supported'))) { + // The content-based sources for MIME_Type::autoDetect() + // are wildly unreliable for office-type documents. If we've + // gotten an unclear reponse back or just couldn't identify it, + // we'll try detecting a type from its extension... + $unclearTypes = array('application/octet-stream', + 'application/vnd.ms-office', + 'application/zip'); + + if ($originalFilename && (!$filetype || in_array($filetype, $unclearTypes))) { + $type = $mte->getMIMEType($originalFilename); + if (is_string($type)) { + $filetype = $type; + } + } + + $supported = common_config('attachments', 'supported'); + if (is_array($supported)) { + // Normalize extensions to mime types + foreach ($supported as $i => $entry) { + if (strpos($entry, '/') === false) { + common_log(LOG_INFO, "sample.$entry"); + $supported[$i] = $mte->getMIMEType("sample.$entry"); + } + } + } + if ($supported === true || in_array($filetype, $supported)) { return $filetype; } $media = MIME_Type::getMedia($filetype); diff --git a/tests/MediaFileTest.php b/tests/MediaFileTest.php index 6fe9956210..a76a4f45e6 100644 --- a/tests/MediaFileTest.php +++ b/tests/MediaFileTest.php @@ -34,43 +34,62 @@ class MediaFileTest extends PHPUnit_Framework_TestCase if (!file_exists($filename)) { throw new Exception("WTF? $filename test file missing"); } - $this->assertEquals($expectedType, MediaFile::getUploadedFileType($filename)); + + $type = MediaFile::getUploadedFileType($filename, basename($filename)); + $this->assertEquals($expectedType, $type); + } + + /** + * @dataProvider fileTypeCases + * + */ + public function testUploadedFileType($filename, $expectedType) + { + if (!file_exists($filename)) { + throw new Exception("WTF? $filename test file missing"); + } + $tmp = tmpfile(); + fwrite($tmp, file_get_contents($filename)); + + $type = MediaFile::getUploadedFileType($tmp, basename($filename)); + $this->assertEquals($expectedType, $type); } static public function fileTypeCases() { $base = dirname(__FILE__); $dir = "$base/sample-uploads"; - return array( - array("$dir/office.pdf", "application/pdf"), + $files = array( + "image.png" => "image/png", + "image.gif" => "image/gif", + "image.jpg" => "image/jpeg", + "image.jpeg" => "image/jpeg", + + "office.pdf" => "application/pdf", - array("$dir/wordproc.odt", "application/vnd.oasis.opendocument.text"), - array("$dir/wordproc.ott", "application/vnd.oasis.opendocument.text-template"), - array("$dir/wordproc.doc", "application/msword"), - array("$dir/wordproc.docx", - "application/vnd.openxmlformats-officedocument.wordprocessingml.document"), - array("$dir/wordproc.rtf", "text/rtf"), + "wordproc.odt" => "application/vnd.oasis.opendocument.text", + "wordproc.ott" => "application/vnd.oasis.opendocument.text-template", + "wordproc.doc" => "application/msword", + "wordproc.docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "wordproc.rtf" => "text/rtf", - array("$dir/spreadsheet.ods", - "application/vnd.oasis.opendocument.spreadsheet"), - array("$dir/spreadsheet.ots", - "application/vnd.oasis.opendocument.spreadsheet-template"), - array("$dir/spreadsheet.xls", "application/vnd.ms-excel"), - array("$dir/spreadsheet.xlt", "application/vnd.ms-excel"), - array("$dir/spreadsheet.xlsx", - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), + "spreadsheet.ods" => "application/vnd.oasis.opendocument.spreadsheet", + "spreadsheet.ots" => "application/vnd.oasis.opendocument.spreadsheet-template", + "spreadsheet.xls" => "application/vnd.ms-excel", + "spreadsheet.xlt" => "application/vnd.ms-excel", + "spreadsheet.xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - array("$dir/presentation.odp", - "application/vnd.oasis-opendocument.presentation"), - array("$dir/presentation.otp", - "application/vnd.oasis-opendocument.presentation-template"), - array("$dir/presentation.ppt", - "application/vnd.ms-powerpoint"), - array("$dir/presentation.pot", - "application/vnd.ms-powerpoint"), - array("$dir/presentation.pptx", - "application/vnd.openxmlformats-officedocument.presentationml.presentation"), + "presentation.odp" => "application/vnd.oasis.opendocument.presentation", + "presentation.otp" => "application/vnd.oasis.opendocument.presentation-template", + "presentation.ppt" => "application/vnd.ms-powerpoint", + "presentation.pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation", ); + + $dataset = array(); + foreach ($files as $file => $type) { + $dataset[] = array("$dir/$file", $type); + } + return $dataset; } } diff --git a/tests/sample-uploads/image.gif b/tests/sample-uploads/image.gif new file mode 100644 index 0000000000000000000000000000000000000000..b636f4b8df536b0a85e7cea1a6cf3f0bd3179b96 GIT binary patch literal 35 jcmZ?wbh9u|WMp7uXkcLY4+c66KmZb9U}AD%WUvMRyAlZ1 literal 0 HcmV?d00001 diff --git a/tests/sample-uploads/image.jpeg b/tests/sample-uploads/image.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..21fcb5aef4f885ba2de26b136bec29a3056fcc86 GIT binary patch literal 306 zcmb7;NeaS15Ji94X}cvMu}st;rlY}oxDcE`@Ct&0Zbk53p1_5NaM*Yf+t!`B@)gwL z@oTVWdq;IToD4xk7`uXfARcU2i`{D3*>861&S)~5+ZzUu!t*>KU6WFlRiKq4^8HG{ zQLV-ibKGb*5FmVdKX1cKZZ!RX^~RQB=&=X)#B literal 0 HcmV?d00001 diff --git a/tests/sample-uploads/image.jpg b/tests/sample-uploads/image.jpg new file mode 100644 index 0000000000000000000000000000000000000000..21fcb5aef4f885ba2de26b136bec29a3056fcc86 GIT binary patch literal 306 zcmb7;NeaS15Ji94X}cvMu}st;rlY}oxDcE`@Ct&0Zbk53p1_5NaM*Yf+t!`B@)gwL z@oTVWdq;IToD4xk7`uXfARcU2i`{D3*>861&S)~5+ZzUu!t*>KU6WFlRiKq4^8HG{ zQLV-ibKGb*5FmVdKX1cKZZ!RX^~RQB=&=X)#B literal 0 HcmV?d00001 diff --git a/tests/sample-uploads/image.png b/tests/sample-uploads/image.png new file mode 100644 index 0000000000000000000000000000000000000000..60cbcfd17f345e65baf596c61f53985977f5a89e GIT binary patch literal 159 zcmeAS@N?(olHy`uVBq!ia0vp^j3CUx1SBVv2j2s6ii6yp7}lMWc?slj7I;J!Gca%q zgD@k*tT_@uLG}_)Usv{9tlZ*U3Ogzu=mUi$OI#yLobz*YQ}ap~oQqNuOHxx5$}>wc x6x=<11Hv2m#DR)