Merge branch 'refactor-file-processNew' into 'nightly'

Refactor on File::processNew

The code was so involved there was even a comment asking for a refactor.

Now, File_redirection::where always returns a nice File_redirection
object instead of an array or string or nothing.  The object is
either one which already existed or else a new, unsaved object.

Instead of duplicating "does it exist" checks everywhere, do it in
File_redirection::where.  You either get what exists or something to save.

An unsaved File_redirection may be paired with an unsaved File.
You will want to save the File first (using ->saveFile()) and put the
id in File_redirection#file_id before saving.

See merge request !57
This commit is contained in:
mmn 2015-11-20 17:50:01 +00:00
commit b33b40db45
6 changed files with 96 additions and 143 deletions

View File

@ -94,26 +94,31 @@ class File extends Managed_DataObject
// We don't have the file's URL since before, so let's continue. // We don't have the file's URL since before, so let's continue.
} }
if (!Event::handle('StartFileSaveNew', array(&$redir_data, $given_url))) {
throw new ServerException('File not saved due to an aborted StartFileSaveNew event.');
}
$file = new File; $file = new File;
$file->urlhash = self::hashurl($given_url);
$file->url = $given_url; $file->url = $given_url;
if (!empty($redir_data['protected'])) $file->protected = $redir_data['protected']; if (!empty($redir_data['protected'])) $file->protected = $redir_data['protected'];
if (!empty($redir_data['title'])) $file->title = $redir_data['title']; if (!empty($redir_data['title'])) $file->title = $redir_data['title'];
if (!empty($redir_data['type'])) $file->mimetype = $redir_data['type']; if (!empty($redir_data['type'])) $file->mimetype = $redir_data['type'];
if (!empty($redir_data['size'])) $file->size = intval($redir_data['size']); if (!empty($redir_data['size'])) $file->size = intval($redir_data['size']);
if (isset($redir_data['time']) && $redir_data['time'] > 0) $file->date = intval($redir_data['time']); if (isset($redir_data['time']) && $redir_data['time'] > 0) $file->date = intval($redir_data['time']);
$file_id = $file->insert(); $file->saveFile();
return $file;
}
if ($file_id === false) { public function saveFile() {
$this->urlhash = self::hashurl($this->url);
if (!Event::handle('StartFileSaveNew', array(&$this))) {
throw new ServerException('File not saved due to an aborted StartFileSaveNew event.');
}
$this->id = $this->insert();
if ($this->id === false) {
throw new ServerException('File/URL metadata could not be saved to the database.'); throw new ServerException('File/URL metadata could not be saved to the database.');
} }
Event::handle('EndFileSaveNew', array($file, $redir_data, $given_url)); Event::handle('EndFileSaveNew', array($this));
return $file;
} }
/** /**
@ -124,7 +129,6 @@ class File extends Managed_DataObject
* - optionally save a file_to_post record * - optionally save a file_to_post record
* - return the File object with the full reference * - return the File object with the full reference
* *
* @fixme refactor this mess, it's gotten pretty scary.
* @param string $given_url the URL we're looking at * @param string $given_url the URL we're looking at
* @param Notice $notice (optional) * @param Notice $notice (optional)
* @param bool $followRedirects defaults to true * @param bool $followRedirects defaults to true
@ -143,69 +147,30 @@ class File extends Managed_DataObject
throw new ServerException('No canonical URL from given URL to process'); throw new ServerException('No canonical URL from given URL to process');
} }
$file = null; $redir = File_redirection::where($given_url);
$file = $redir->getFile();
try { // If we still don't have a File object, let's create one now!
$file = File::getByUrl($given_url); if (empty($file->id)) {
} catch (NoResultException $e) { if ($redir->url === $given_url || !$followRedirects) {
// First check if we have a lookup trace for this URL already // Save the File object based on our lookup trace
try { $file->saveFile();
$file_redir = File_redirection::getByUrl($given_url); } else {
$file = File::getKV('id', $file_redir->file_id); $file->saveFile();
if (!$file instanceof File) { $redir->file_id = $file->id;
// File did not exist, let's clean up the File_redirection entry $redir->insert();
$file_redir->delete();
}
} catch (NoResultException $e) {
// We just wanted to doublecheck whether a File_thumbnail we might've had
// actually referenced an existing File object.
} }
} }
// If we still don't have a File object, let's create one now! if (!$file instanceof File || empty($file->id)) {
if (!$file instanceof File) { // This should not happen
// @fixme for new URLs this also looks up non-redirect data throw new ServerException('URL processing failed without new File object');
// such as target content type, size, etc, which we need
// for File::saveNew(); so we call it even if not following
// new redirects.
$redir_data = File_redirection::where($given_url);
if (is_array($redir_data)) {
$redir_url = $redir_data['url'];
} elseif (is_string($redir_data)) {
$redir_url = $redir_data;
$redir_data = array();
} else {
// TRANS: Server exception thrown when a URL cannot be processed.
throw new ServerException(sprintf(_("Cannot process URL '%s'"), $given_url));
}
if ($redir_url === $given_url || !$followRedirects) {
// Save the File object based on our lookup trace
$file = File::saveNew($redir_data, $given_url);
} else {
// This seems kind of messed up... for now skipping this part
// if we're already under a redirect, so we don't go into
// horrible infinite loops if we've been given an unstable
// redirect (where the final destination of the first request
// doesn't match what we get when we ask for it again).
//
// Seen in the wild with clojure.org, which redirects through
// wikispaces for auth and appends session data in the URL params.
$file = self::processNew($redir_url, $notice, /*followRedirects*/false);
File_redirection::saveNew($redir_data, $file->id, $given_url);
}
if (!$file instanceof File) {
// This should only happen if File::saveNew somehow did not return a File object,
// though we have an assert for that in case the event there might've gone wrong.
// If anything else goes wrong, there should've been an exception thrown.
throw new ServerException('URL processing failed without new File object');
}
} }
if ($notice instanceof Notice) { if ($notice instanceof Notice) {
File_to_post::processNew($file, $notice); File_to_post::processNew($file, $notice);
} }
return $file; return $file;
} }

View File

@ -39,6 +39,8 @@ class File_redirection extends Managed_DataObject
/* the code above is auto generated do not remove the tag below */ /* the code above is auto generated do not remove the tag below */
###END_AUTOCODE ###END_AUTOCODE
protected $file; /* Cache the associated file sometimes */
public static function schemaDef() public static function schemaDef()
{ {
return array( return array(
@ -155,41 +157,53 @@ class File_redirection extends Managed_DataObject
* *
* @param string $in_url * @param string $in_url
* @param boolean $discover true to attempt dereferencing the redirect if we don't know it already * @param boolean $discover true to attempt dereferencing the redirect if we don't know it already
* @return mixed one of: * @return File_redirection
* string - target URL, if this is a direct link or a known redirect
* array - redirect info if this is an *unknown* redirect:
* associative array with the following elements:
* code: HTTP status code
* redirects: count of redirects followed
* url: URL string of final target
* type (optional): MIME type from Content-Type header
* size (optional): byte size from Content-Length header
* time (optional): timestamp from Last-Modified header
*/ */
static function where($in_url, $discover=true) { static function where($in_url, $discover=true) {
// let's see if we know this... $redir = new File_redirection();
$redir->url = $in_url;
$redir->urlhash = File::hashurl($redir->url);
$redir->redirections = 0;
try { try {
$a = File::getByUrl($in_url); $r = File_redirection::getByUrl($in_url);
// this is a direct link to $a->url if($r instanceof File_redirection) {
return $a->url; return $r;
}
} catch (NoResultException $e) { } catch (NoResultException $e) {
try { try {
$b = File_redirection::getByUrl($in_url); $f = File::getByUrl($in_url);
// this is a redirect to $b->file_id $redir->file_id = $f->id;
$a = File::getByID($b->file_id); $redir->file = $f;
return $a->url; return $redir;
} catch (NoResultException $e) { } catch (NoResultException $e) {
// Oh well, let's keep going // Oh well, let's keep going
} }
} }
if ($discover) { if ($discover) {
$ret = File_redirection::lookupWhere($in_url); $redir_info = File_redirection::lookupWhere($in_url);
return $ret; if(is_string($redir_info)) {
$redir_info = array('url' => $redir_info);
}
// Double check that we don't already have the resolved URL
$r = self::where($redir_info['url'], false);
if (!empty($r->file_id)) {
return $r;
}
} }
// No manual dereferencing; leave the unknown URL as is. $redir->httpcode = $redir_info['code'];
return $in_url; $redir->redirections = intval($redir_info['redirects']);
$redir->file = new File();
$redir->file->url = $redir_info ? $redir_info['url'] : $in_url;
$redir->file->mimetype = $redir_info['type'];
$redir->file->size = $redir_info['size'];
$redir->file->date = $redir_info['time'];
if($redir_info['protected']) $redir->file->protected = true;
return $redir;
} }
/** /**
@ -247,28 +261,12 @@ class File_redirection extends Managed_DataObject
$short_url = (string)$short_url; $short_url = (string)$short_url;
// store it // store it
$file = File::getKV('url', $long_url); $file = File::getKV('url', $long_url);
if ($file instanceof File) { if (!$file instanceof File) {
$file_id = $file->getID();
} else {
// Check if the target URL is itself a redirect... // Check if the target URL is itself a redirect...
$redir_data = File_redirection::where($long_url); $redir = File_redirection::where($long_url);
if (is_array($redir_data)) { $file = $redir->getFile();
// We haven't seen the target URL before. if (empty($file->id)) {
// Save file and embedding data about it! $file->saveFile();
$file = File::saveNew($redir_data, $long_url);
$file_id = $file->getID();
} else if (is_string($redir_data)) {
// The file is a known redirect target.
$file = File::getKV('url', $redir_data);
if (empty($file)) {
// @fixme should we save a new one?
// this case was triggering sometimes for redirects
// with unresolvable targets; found while fixing
// "can't linkify" bugs with shortened links to
// SSL sites with cert issues.
return null;
}
$file_id = $file->getID();
} }
} }
$file_redir = File_redirection::getKV('url', $short_url); $file_redir = File_redirection::getKV('url', $short_url);
@ -276,7 +274,7 @@ class File_redirection extends Managed_DataObject
$file_redir = new File_redirection; $file_redir = new File_redirection;
$file_redir->urlhash = File::hashurl($short_url); $file_redir->urlhash = File::hashurl($short_url);
$file_redir->url = $short_url; $file_redir->url = $short_url;
$file_redir->file_id = $file_id; $file_redir->file_id = $file->id;
$file_redir->insert(); $file_redir->insert();
} }
return $short_url; return $short_url;
@ -385,4 +383,12 @@ class File_redirection extends Managed_DataObject
echo "DONE.\n"; echo "DONE.\n";
echo "Resuming core schema upgrade..."; echo "Resuming core schema upgrade...";
} }
public function getFile() {
if(empty($this->file) && $this->file_id) {
$this->file = File::getKV('id', $this->file_id);
}
return $this->file;
}
} }

View File

@ -987,20 +987,9 @@ function common_linkify($url) {
$canon = "mailto:$url"; $canon = "mailto:$url";
$longurl = "mailto:$url"; $longurl = "mailto:$url";
} else { } else {
$canon = File_redirection::_canonUrl($url); $canon = File_redirection::_canonUrl($url);
$longurl_data = File_redirection::where($canon, common_config('attachments', 'process_links')); $longurl_data = File_redirection::where($canon, common_config('attachments', 'process_links'));
if (is_array($longurl_data)) { $longurl = $longurl_data->url;
$longurl = $longurl_data['url'];
} elseif (is_string($longurl_data)) {
$longurl = $longurl_data;
} else {
// Unable to reach the server to verify contents, etc
// Just pass the link on through for now.
common_log(LOG_ERR, "Can't linkify url '$url'");
$longurl = $url;
}
} }
$attrs = array('href' => $canon, 'title' => $longurl); $attrs = array('href' => $canon, 'title' => $longurl);

View File

@ -90,12 +90,10 @@ class OembedPlugin extends Plugin
* Normally this event is called through File::saveNew() * Normally this event is called through File::saveNew()
* *
* @param File $file The newly inserted File object. * @param File $file The newly inserted File object.
* @param array $redir_data lookup data eg from File_redirection::where()
* @param string $given_url
* *
* @return boolean success * @return boolean success
*/ */
public function onEndFileSaveNew(File $file, array $redir_data, $given_url) public function onEndFileSaveNew(File $file)
{ {
$fo = File_oembed::getKV('file_id', $file->id); $fo = File_oembed::getKV('file_id', $file->id);
if ($fo instanceof File_oembed) { if ($fo instanceof File_oembed) {
@ -103,15 +101,12 @@ class OembedPlugin extends Plugin
return true; return true;
} }
if (isset($redir_data['oembed']['json']) if (isset($file->mimetype)
&& !empty($redir_data['oembed']['json'])) { && (('text/html' === substr($file->mimetype, 0, 9)
File_oembed::saveNew($redir_data['oembed']['json'], $file->id); || 'application/xhtml+xml' === substr($file->mimetype, 0, 21)))) {
} elseif (isset($redir_data['type'])
&& (('text/html' === substr($redir_data['type'], 0, 9)
|| 'application/xhtml+xml' === substr($redir_data['type'], 0, 21)))) {
try { try {
$oembed_data = File_oembed::_getOembed($given_url); $oembed_data = File_oembed::_getOembed($file->url);
if ($oembed_data === false) { if ($oembed_data === false) {
throw new Exception('Did not get oEmbed data from URL'); throw new Exception('Did not get oEmbed data from URL');
} }

View File

@ -128,12 +128,12 @@ class File_oembed extends Managed_DataObject
if ($file instanceof File) { if ($file instanceof File) {
$file_oembed->mimetype = $file->mimetype; $file_oembed->mimetype = $file->mimetype;
} else { } else {
$file_redir = File_redirection::getKV('url', $given_url); $redir = File_redirection::where($given_url);
if (empty($file_redir)) { if (empty($redir->file_id)) {
$redir_data = File_redirection::where($given_url); $f = $redir->getFile();
$file_oembed->mimetype = $redir_data['type']; $file_oembed->mimetype = $f->mimetype;
} else { } else {
$file_id = $file_redir->file_id; $file_id = $redir->file_id;
} }
} }
} }

View File

@ -30,29 +30,27 @@ class StoreRemoteMediaPlugin extends Plugin
* *
* Normally this event is called through File::saveNew() * Normally this event is called through File::saveNew()
* *
* @param File $file The newly inserted File object. * @param File $file The abount-to-be-inserted File object.
* @param array $redir_data lookup data eg from File_redirection::where()
* @param string $given_url
* *
* @return boolean success * @return boolean success
*/ */
public function onStartFileSaveNew(array &$redir_data, $given_url) public function onStartFileSaveNew(File &$file)
{ {
// save given URL as title if it's a media file this plugin understands // save given URL as title if it's a media file this plugin understands
// which will make it shown in the AttachmentList widgets // which will make it shown in the AttachmentList widgets
if (isset($redir_data['title']) && strlen($redir_data['title']>0)) { if (isset($file->title) && strlen($file->title)>0) {
// Title is already set // Title is already set
return true; return true;
} }
if (!isset($redir_data['type'])) { if (!isset($file->mimetype)) {
// Unknown mimetype, it's not our job to figure out what it is. // Unknown mimetype, it's not our job to figure out what it is.
return true; return true;
} }
switch (common_get_mime_media($redir_data['type'])) { switch (common_get_mime_media($file->mimetype)) {
case 'image': case 'image':
// Just to set something for now at least... // Just to set something for now at least...
$redir_data['title'] = $given_url; $file->title = $file->mimetype;
break; break;
} }