Enhanced upload file type detection. If given an original filename, we'll attempt to detect type from the extension if we were unable to make a definitive match from content. Generic octet-stream, zip, and MS Office type are explicitly singled out for re-checks, which fixes OpenOffice and MS Office documents to come up with the proper types when misdetected.

File extensions can also be added to the upload type whitelist; they'll be normalized to types for the actual comparison, so only known extensions will work.
This commit is contained in:
Brion Vibber 2010-05-10 16:18:29 -07:00
parent 41d81b996f
commit 9c7b66984c
6 changed files with 97 additions and 30 deletions

View File

@ -180,7 +180,8 @@ class MediaFile
return; return;
} }
$mimetype = MediaFile::getUploadedFileType($_FILES[$param]['tmp_name']); $mimetype = MediaFile::getUploadedFileType($_FILES[$param]['tmp_name'],
$_FILES[$param]['name']);
$filename = null; $filename = null;
@ -241,19 +242,41 @@ class MediaFile
return new MediaFile($user, $filename, $mimetype); return new MediaFile($user, $filename, $mimetype);
} }
static function getUploadedFileType($f) { /**
* Attempt to identify the content type of a given file.
*
* @param mixed $f file handle resource, or filesystem path as string
* @param string $originalFilename (optional) for extension-based detection
* @return string
*
* @fixme is this an internal or public method? It's called from GetFileAction
* @fixme this seems to tie a front-end error message in, kinda confusing
* @fixme this looks like it could return a PEAR_Error in some cases, if
* type can't be identified and $config['attachments']['supported'] is true
*
* @throws ClientException if type is known, but not supported for local uploads
*/
static function getUploadedFileType($f, $originalFilename=false) {
require_once 'MIME/Type.php'; require_once 'MIME/Type.php';
require_once 'MIME/Type/Extension.php';
$mte = new MIME_Type_Extension();
$cmd = &PEAR::getStaticProperty('MIME_Type', 'fileCmd'); $cmd = &PEAR::getStaticProperty('MIME_Type', 'fileCmd');
$cmd = common_config('attachments', 'filecommand'); $cmd = common_config('attachments', 'filecommand');
$filetype = null; $filetype = null;
// If we couldn't get a clear type from the file extension,
// we'll go ahead and try checking the content. Content checks
// are unambiguous for most image files, but nearly useless
// for office document formats.
if (is_string($f)) { if (is_string($f)) {
// assuming a filename // assuming a filename
$filetype = MIME_Type::autoDetect($f); $filetype = MIME_Type::autoDetect($f);
} else { } else {
// assuming a filehandle // assuming a filehandle
@ -262,7 +285,32 @@ class MediaFile
$filetype = MIME_Type::autoDetect($stream['uri']); $filetype = MIME_Type::autoDetect($stream['uri']);
} }
if (common_config('attachments', 'supported') === true || in_array($filetype, common_config('attachments', 'supported'))) { // The content-based sources for MIME_Type::autoDetect()
// are wildly unreliable for office-type documents. If we've
// gotten an unclear reponse back or just couldn't identify it,
// we'll try detecting a type from its extension...
$unclearTypes = array('application/octet-stream',
'application/vnd.ms-office',
'application/zip');
if ($originalFilename && (!$filetype || in_array($filetype, $unclearTypes))) {
$type = $mte->getMIMEType($originalFilename);
if (is_string($type)) {
$filetype = $type;
}
}
$supported = common_config('attachments', 'supported');
if (is_array($supported)) {
// Normalize extensions to mime types
foreach ($supported as $i => $entry) {
if (strpos($entry, '/') === false) {
common_log(LOG_INFO, "sample.$entry");
$supported[$i] = $mte->getMIMEType("sample.$entry");
}
}
}
if ($supported === true || in_array($filetype, $supported)) {
return $filetype; return $filetype;
} }
$media = MIME_Type::getMedia($filetype); $media = MIME_Type::getMedia($filetype);

View File

@ -34,43 +34,62 @@ class MediaFileTest extends PHPUnit_Framework_TestCase
if (!file_exists($filename)) { if (!file_exists($filename)) {
throw new Exception("WTF? $filename test file missing"); throw new Exception("WTF? $filename test file missing");
} }
$this->assertEquals($expectedType, MediaFile::getUploadedFileType($filename));
$type = MediaFile::getUploadedFileType($filename, basename($filename));
$this->assertEquals($expectedType, $type);
}
/**
* @dataProvider fileTypeCases
*
*/
public function testUploadedFileType($filename, $expectedType)
{
if (!file_exists($filename)) {
throw new Exception("WTF? $filename test file missing");
}
$tmp = tmpfile();
fwrite($tmp, file_get_contents($filename));
$type = MediaFile::getUploadedFileType($tmp, basename($filename));
$this->assertEquals($expectedType, $type);
} }
static public function fileTypeCases() static public function fileTypeCases()
{ {
$base = dirname(__FILE__); $base = dirname(__FILE__);
$dir = "$base/sample-uploads"; $dir = "$base/sample-uploads";
return array( $files = array(
array("$dir/office.pdf", "application/pdf"), "image.png" => "image/png",
"image.gif" => "image/gif",
"image.jpg" => "image/jpeg",
"image.jpeg" => "image/jpeg",
"office.pdf" => "application/pdf",
array("$dir/wordproc.odt", "application/vnd.oasis.opendocument.text"), "wordproc.odt" => "application/vnd.oasis.opendocument.text",
array("$dir/wordproc.ott", "application/vnd.oasis.opendocument.text-template"), "wordproc.ott" => "application/vnd.oasis.opendocument.text-template",
array("$dir/wordproc.doc", "application/msword"), "wordproc.doc" => "application/msword",
array("$dir/wordproc.docx", "wordproc.docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"), "wordproc.rtf" => "text/rtf",
array("$dir/wordproc.rtf", "text/rtf"),
array("$dir/spreadsheet.ods", "spreadsheet.ods" => "application/vnd.oasis.opendocument.spreadsheet",
"application/vnd.oasis.opendocument.spreadsheet"), "spreadsheet.ots" => "application/vnd.oasis.opendocument.spreadsheet-template",
array("$dir/spreadsheet.ots", "spreadsheet.xls" => "application/vnd.ms-excel",
"application/vnd.oasis.opendocument.spreadsheet-template"), "spreadsheet.xlt" => "application/vnd.ms-excel",
array("$dir/spreadsheet.xls", "application/vnd.ms-excel"), "spreadsheet.xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
array("$dir/spreadsheet.xlt", "application/vnd.ms-excel"),
array("$dir/spreadsheet.xlsx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
array("$dir/presentation.odp", "presentation.odp" => "application/vnd.oasis.opendocument.presentation",
"application/vnd.oasis-opendocument.presentation"), "presentation.otp" => "application/vnd.oasis.opendocument.presentation-template",
array("$dir/presentation.otp", "presentation.ppt" => "application/vnd.ms-powerpoint",
"application/vnd.oasis-opendocument.presentation-template"), "presentation.pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation",
array("$dir/presentation.ppt",
"application/vnd.ms-powerpoint"),
array("$dir/presentation.pot",
"application/vnd.ms-powerpoint"),
array("$dir/presentation.pptx",
"application/vnd.openxmlformats-officedocument.presentationml.presentation"),
); );
$dataset = array();
foreach ($files as $file => $type) {
$dataset[] = array("$dir/$file", $type);
}
return $dataset;
} }
} }

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 306 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 306 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 159 B