gnu-social/classes/File.php
Miguel Dantas b224d93098 [MEDIA] ImageFile now extends MediaFile and validates images more aggressively.
Default supported files need to use consistent names. Bumped version to 1.20.0

ImageFile has been changed to extend MediaFile and rely on it to partially
validate files. This validation has been extended to not rely solely on
Fileinfo, as it is disabled on some places. Now it'll try to use the shell
command `file`, if Fileinfo isn't available.

ImageFile now converts every new upload to PNG, except JPEG and GIF, which
are kept, but still resized (to the same size), to remove possible scripts
embedded therein.

MediaFile::fromUpload will return an ImageFile if the uploaded file is an image
or a MediaFile otherwise.

MediaFile can be constructed with an id with value -1 to denote a temporary
object, which is not added to the DB. This is useful to create a temporary
object for representing images, so it can be used to rescale them.

The supported attachment array needs to be populated with the result of calling
`image_type_to_extension` for the appropriate image type, in the case of images.
This is important so all parts of the code see the same extension for each image
type (jpg vs jpeg).

Added documentation to classes/File.php and to lib/MediaFile and lib/ImageFile
2019-06-10 00:35:53 +01:00

812 lines
33 KiB
PHP

<?php
/**
* GNU social - a federating social network
*
* Abstraction for files
*
* LICENCE: This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* @category Files
* @package GNUsocial
* @author Mikael Nordfeldth <mmn@hethane.se>
* @author Miguel Dantas <biodantas@gmail.com>
* @copyright 2008-2009, 2019 Free Software Foundation http://fsf.org
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
* @link https://www.gnu.org/software/social/
*/
defined('GNUSOCIAL') || die();
/**
* Table Definition for file
*/
class File extends Managed_DataObject
{
public $__table = 'file'; // table name
public $id; // int(4) primary_key not_null
public $urlhash; // varchar(64) unique_key
public $url; // text
public $filehash; // varchar(64) indexed
public $mimetype; // varchar(50)
public $size; // int(4)
public $title; // text()
public $date; // int(4)
public $protected; // int(4)
public $filename; // text()
public $width; // int(4)
public $height; // int(4)
public $modified; // timestamp() not_null default_CURRENT_TIMESTAMP
const URLHASH_ALG = 'sha256';
const FILEHASH_ALG = 'sha256';
public static function schemaDef()
{
return array(
'fields' => array(
'id' => array('type' => 'serial', 'not null' => true),
'urlhash' => array('type' => 'varchar', 'length' => 64, 'not null' => true, 'description' => 'sha256 of destination URL (url field)'),
'url' => array('type' => 'text', 'description' => 'destination URL after following possible redirections'),
'filehash' => array('type' => 'varchar', 'length' => 64, 'not null' => false, 'description' => 'sha256 of the file contents, only for locally stored files of course'),
'mimetype' => array('type' => 'varchar', 'length' => 50, 'description' => 'mime type of resource'),
'size' => array('type' => 'int', 'description' => 'size of resource when available'),
'title' => array('type' => 'text', 'description' => 'title of resource when available'),
'date' => array('type' => 'int', 'description' => 'date of resource according to http query'),
'protected' => array('type' => 'int', 'description' => 'true when URL is private (needs login)'),
'filename' => array('type' => 'text', 'description' => 'if file is stored locally (too) this is the filename'),
'width' => array('type' => 'int', 'description' => 'width in pixels, if it can be described as such and data is available'),
'height' => array('type' => 'int', 'description' => 'height in pixels, if it can be described as such and data is available'),
'modified' => array('type' => 'timestamp', 'not null' => true, 'description' => 'date this record was modified'),
),
'primary key' => array('id'),
'unique keys' => array(
'file_urlhash_key' => array('urlhash'),
),
'indexes' => array(
'file_filehash_idx' => array('filehash'),
),
);
}
public static function isProtected($url) {
$protected_urls_exps = array(
'https://www.facebook.com/login.php',
common_path('main/login')
);
foreach ($protected_urls_exps as $protected_url_exp) {
if (preg_match('!^'.preg_quote($protected_url_exp).'(.*)$!i', $url) === 1) {
return true;
}
}
return false;
}
/**
* Save a new file record.
*
* @param array $redir_data lookup data eg from File_redirection::where()
* @param string $given_url
* @return File
* @throws ServerException
*/
public static function saveNew(array $redir_data, $given_url)
{
$file = null;
try {
// I don't know why we have to keep doing this but we run a last check to avoid
// uniqueness bugs.
$file = File::getByUrl($given_url);
return $file;
} catch (NoResultException $e) {
// We don't have the file's URL since before, so let's continue.
}
// if the given url is an local attachment url and the id already exists, don't
// save a new file record. This should never happen, but let's make it foolproof
// FIXME: how about attachments servers?
$u = parse_url($given_url);
if (isset($u['host']) && $u['host'] === common_config('site', 'server')) {
$r = Router::get();
// Skip the / in the beginning or $r->map won't match
try {
$args = $r->map(mb_substr($u['path'], 1));
if ($args['action'] === 'attachment') {
try {
// $args['attachment'] should always be set if action===attachment, given our routing rules
$file = File::getByID($args['attachment']);
return $file;
} catch (EmptyPkeyValueException $e) {
// ...but $args['attachment'] can also be 0...
} catch (NoResultException $e) {
// apparently this link goes to us, but is _not_ an existing attachment (File) ID?
}
}
} catch (Exception $e) {
// Some other exception was thrown from $r->map, likely a
// ClientException (404) because of some malformed link to
// our own instance. It's still a valid URL however, so we
// won't abort anything... I noticed this when linking:
// https://social.umeahackerspace.se/mmn/foaf' (notice the
// apostrophe in the end, making it unrecognizable for our
// URL routing.
// That specific issue (the apostrophe being part of a link
// is something that may or may not have been fixed since,
// in lib/util.php in common_replace_urls_callback().
}
}
$file = new File;
$file->url = $given_url;
if (!empty($redir_data['protected'])) $file->protected = $redir_data['protected'];
if (!empty($redir_data['title'])) $file->title = $redir_data['title'];
if (!empty($redir_data['type'])) $file->mimetype = $redir_data['type'];
if (!empty($redir_data['size'])) $file->size = intval($redir_data['size']);
if (isset($redir_data['time']) && $redir_data['time'] > 0) $file->date = intval($redir_data['time']);
$file->saveFile();
return $file;
}
public function saveFile() {
$this->urlhash = self::hashurl($this->url);
if (!Event::handle('StartFileSaveNew', array(&$this))) {
throw new ServerException('File not saved due to an aborted StartFileSaveNew event.');
}
$this->id = $this->insert();
if ($this->id === false) {
throw new ServerException('File/URL metadata could not be saved to the database.');
}
Event::handle('EndFileSaveNew', array($this));
}
/**
* Go look at a URL and possibly save data about it if it's new:
* - follow redirect chains and store them in file_redirection
* - if a thumbnail is available, save it in file_thumbnail
* - save file record with basic info
* - optionally save a file_to_post record
* - return the File object with the full reference
*
* @param string $given_url the URL we're looking at
* @param Notice $notice (optional)
* @param bool $followRedirects defaults to true
*
* @return mixed File on success, -1 on some errors
*
* @throws ServerException on failure
*/
public static function processNew($given_url, Notice $notice=null, $followRedirects=true) {
if (empty($given_url)) {
throw new ServerException('No given URL to process');
}
$given_url = File_redirection::_canonUrl($given_url);
if (empty($given_url)) {
throw new ServerException('No canonical URL from given URL to process');
}
$redir = File_redirection::where($given_url);
try {
$file = $redir->getFile();
} catch (EmptyPkeyValueException $e) {
common_log(LOG_ERR, 'File_redirection::where gave object with empty file_id for given_url '._ve($given_url));
throw new ServerException('URL processing failed without new File object');
} catch (NoResultException $e) {
// This should not happen
common_log(LOG_ERR, 'File_redirection after discovery could still not return a File object.');
throw new ServerException('URL processing failed without new File object');
}
if ($notice instanceof Notice) {
File_to_post::processNew($file, $notice);
}
return $file;
}
public static function respectsQuota(Profile $scoped, $fileSize) {
if ($fileSize > common_config('attachments', 'file_quota')) {
// TRANS: Message used to be inserted as %2$s in the text "No file may
// TRANS: be larger than %1$d byte and the file you sent was %2$s.".
// TRANS: %1$d is the number of bytes of an uploaded file.
$fileSizeText = sprintf(_m('%1$d byte','%1$d bytes',$fileSize),$fileSize);
$fileQuota = common_config('attachments', 'file_quota');
// TRANS: Message given if an upload is larger than the configured maximum.
// TRANS: %1$d (used for plural) is the byte limit for uploads,
// TRANS: %2$s is the proper form of "n bytes". This is the only ways to have
// TRANS: gettext support multiple plurals in the same message, unfortunately...
throw new ClientException(
sprintf(_m('No file may be larger than %1$d byte and the file you sent was %2$s. Try to upload a smaller version.',
'No file may be larger than %1$d bytes and the file you sent was %2$s. Try to upload a smaller version.',
$fileQuota),
$fileQuota, $fileSizeText));
}
$file = new File;
$query = "select sum(size) as total from file join file_to_post on file_to_post.file_id = file.id join notice on file_to_post.post_id = notice.id where profile_id = {$scoped->id} and file.url like '%/notice/%/file'";
$file->query($query);
$file->fetch();
$total = $file->total + $fileSize;
if ($total > common_config('attachments', 'user_quota')) {
// TRANS: Message given if an upload would exceed user quota.
// TRANS: %d (number) is the user quota in bytes and is used for plural.
throw new ClientException(
sprintf(_m('A file this large would exceed your user quota of %d byte.',
'A file this large would exceed your user quota of %d bytes.',
common_config('attachments', 'user_quota')),
common_config('attachments', 'user_quota')));
}
$query .= ' AND EXTRACT(month FROM file.modified) = EXTRACT(month FROM now()) and EXTRACT(year FROM file.modified) = EXTRACT(year FROM now())';
$file->query($query);
$file->fetch();
$total = $file->total + $fileSize;
if ($total > common_config('attachments', 'monthly_quota')) {
// TRANS: Message given id an upload would exceed a user's monthly quota.
// TRANS: $d (number) is the monthly user quota in bytes and is used for plural.
throw new ClientException(
sprintf(_m('A file this large would exceed your monthly quota of %d byte.',
'A file this large would exceed your monthly quota of %d bytes.',
common_config('attachments', 'monthly_quota')),
common_config('attachments', 'monthly_quota')));
}
return true;
}
public function getFilename()
{
return self::tryFilename($this->filename);
}
public function getSize()
{
return intval($this->size);
}
// where should the file go?
static function filename(Profile $profile, $origname, $mimetype)
{
$ext = self::guessMimeExtension($mimetype, $origname);
// Normalize and make the original filename more URL friendly.
$origname = basename($origname, ".$ext");
if (class_exists('Normalizer')) {
// http://php.net/manual/en/class.normalizer.php
// http://www.unicode.org/reports/tr15/
$origname = Normalizer::normalize($origname, Normalizer::FORM_KC);
}
$origname = preg_replace('/[^A-Za-z0-9\.\_]/', '_', $origname);
$nickname = $profile->getNickname();
$datestamp = strftime('%Y%m%d', time());
do {
// generate new random strings until we don't run into a filename collision.
$random = strtolower(common_confirmation_code(16));
$filename = "$nickname-$datestamp-$origname-$random.$ext";
} while (file_exists(self::path($filename)));
return $filename;
}
/**
* @param $mimetype string The mimetype we've discovered for this file.
* @param $filename string An optional filename which we can use on failure.
* @return mixed|string
* @throws ClientException
*/
static function guessMimeExtension($mimetype, $filename=null)
{
try {
// first see if we know the extension for our mimetype
$ext = common_supported_mime_to_ext($mimetype);
// we do, so use it!
return $ext;
} catch (UnknownMimeExtensionException $e) {
// We don't know the extension for this mimetype, but let's guess.
// If we can't recognize the extension from the MIME, we try
// to guess based on filename, if one was supplied.
if (!is_null($filename) && preg_match('/^.+\.([A-Za-z0-9]+)$/', $filename, $matches)) {
// we matched on a file extension, so let's see if it means something.
$ext = mb_strtolower($matches[1]);
$blacklist = common_config('attachments', 'extblacklist');
// If we got an extension from $filename we want to check if it's in a blacklist
// so we avoid people uploading .php files etc.
if (array_key_exists($ext, $blacklist)) {
if (!is_string($blacklist[$ext])) {
// we don't have a safe replacement extension
throw new ClientException(_('Blacklisted file extension.'));
}
common_debug('Found replaced extension for filename '._ve($filename).': '._ve($ext));
// return a safe replacement extension ('php' => 'phps' for example)
return $blacklist[$ext];
}
// the attachment extension based on its filename was not blacklisted so it's ok to use it
return $ext;
}
} catch (Exception $e) {
common_log(LOG_INFO, 'Problem when figuring out extension for mimetype: '._ve($e));
}
// If nothing else has given us a result, try to extract it from
// the mimetype value (this turns .jpg to .jpeg for example...)
$matches = array();
// FIXME: try to build a regexp that will get jpeg from image/jpeg as well as json from application/jrd+json
if (!preg_match('/\/([a-z0-9]+)/', mb_strtolower($mimetype), $matches)) {
throw new Exception('Malformed mimetype: '.$mimetype);
}
return mb_strtolower($matches[1]);
}
/**
* Validation for as-saved base filenames
* @param $filename
* @return false|int
*/
static function validFilename($filename)
{
return preg_match('/^[A-Za-z0-9._-]+$/', $filename);
}
static function tryFilename($filename)
{
if (!self::validFilename($filename))
{
throw new InvalidFilenameException($filename);
}
// if successful, return the filename for easy if-statementing
return $filename;
}
/**
* @param $filename
* @return string
* @throws InvalidFilenameException
*/
static function path($filename)
{
self::tryFilename($filename);
$dir = common_config('attachments', 'dir');
if (!in_array($dir[mb_strlen($dir)-1], ['/', '\\'])) {
$dir .= DIRECTORY_SEPARATOR;
}
return $dir . $filename;
}
static function url($filename)
{
self::tryFilename($filename);
if (common_config('site','private')) {
return common_local_url('getfile',
array('filename' => $filename));
}
if (GNUsocial::useHTTPS()) {
$sslserver = common_config('attachments', 'sslserver');
if (empty($sslserver)) {
// XXX: this assumes that background dir == site dir + /file/
// not true if there's another server
if (is_string(common_config('site', 'sslserver')) &&
mb_strlen(common_config('site', 'sslserver')) > 0) {
$server = common_config('site', 'sslserver');
} else if (common_config('site', 'server')) {
$server = common_config('site', 'server');
}
$path = common_config('site', 'path') . '/file/';
} else {
$server = $sslserver;
$path = common_config('attachments', 'sslpath');
if (empty($path)) {
$path = common_config('attachments', 'path');
}
}
$protocol = 'https';
} else {
$path = common_config('attachments', 'path');
$server = common_config('attachments', 'server');
if (empty($server)) {
$server = common_config('site', 'server');
}
$ssl = common_config('attachments', 'ssl');
$protocol = ($ssl) ? 'https' : 'http';
}
if ($path[strlen($path)-1] != '/') {
$path .= '/';
}
if ($path[0] != '/') {
$path = '/'.$path;
}
return $protocol.'://'.$server.$path.$filename;
}
static $_enclosures = array();
function getEnclosure(){
if (isset(self::$_enclosures[$this->getID()])) {
return self::$_enclosures[$this->getID()];
}
$enclosure = (object) array();
foreach (array('title', 'url', 'date', 'modified', 'size', 'mimetype', 'width', 'height') as $key) {
if ($this->$key !== '') {
$enclosure->$key = $this->$key;
}
}
$needMoreMetadataMimetypes = array(null, 'application/xhtml+xml', 'text/html');
if (!isset($this->filename) && in_array(common_bare_mime($enclosure->mimetype), $needMoreMetadataMimetypes)) {
// This fetches enclosure metadata for non-local links with unset/HTML mimetypes,
// which may be enriched through oEmbed or similar (implemented as plugins)
Event::handle('FileEnclosureMetadata', array($this, &$enclosure));
}
if (empty($enclosure->mimetype)) {
// This means we either don't know what it is, so it can't
// be shown as an enclosure, or it is an HTML link which
// does not link to a resource with further metadata.
throw new ServerException('Unknown enclosure mimetype, not enough metadata');
}
self::$_enclosures[$this->getID()] = $enclosure;
return $enclosure;
}
public function hasThumbnail()
{
try {
$this->getThumbnail();
} catch (Exception $e) {
return false;
}
return true;
}
/**
* Get the attachment's thumbnail record, if any.
* Make sure you supply proper 'int' typed variables (or null).
*
* @param $width int Max width of thumbnail in pixels. (if null, use common_config values)
* @param $height int Max height of thumbnail in pixels. (if null, square-crop to $width)
* @param $crop bool Crop to the max-values' aspect ratio
* @param $force_still bool Don't allow fallback to showing original (such as animated GIF)
* @param $upscale mixed Whether or not to scale smaller images up to larger thumbnail sizes. (null = site default)
*
* @return File_thumbnail
*
* @throws UseFileAsThumbnailException if the file is considered an image itself and should be itself as thumbnail
* @throws UnsupportedMediaException if, despite trying, we can't understand how to make a thumbnail for this format
* @throws ServerException on various other errors
*/
public function getThumbnail($width=null, $height=null, $crop=false, $force_still=true, $upscale=null)
{
// Get some more information about this file through our ImageFile class
$image = ImageFile::fromFileObject($this);
if ($image->animated && !common_config('thumbnail', 'animated')) {
// null means "always use file as thumbnail"
// false means you get choice between frozen frame or original when calling getThumbnail
if (is_null(common_config('thumbnail', 'animated')) || !$force_still) {
try {
// remote files with animated GIFs as thumbnails will match this
return File_thumbnail::byFile($this);
} catch (NoResultException $e) {
// and if it's not a remote file, it'll be safe to use the locally stored File
throw new UseFileAsThumbnailException($this);
}
}
}
return $image->getFileThumbnail($width, $height, $crop,
!is_null($upscale) ? $upscale : common_config('thumbnail', 'upscale'));
}
public function getPath()
{
$filepath = self::path($this->filename);
if (!file_exists($filepath)) {
throw new FileNotFoundException($filepath);
}
return $filepath;
}
public function getAttachmentUrl()
{
return common_local_url('attachment', array('attachment'=>$this->getID()));
}
/**
* @param mixed $use_local true means require local, null means prefer local, false means use whatever is stored
* @return string
* @throws FileNotStoredLocallyException
*/
public function getUrl($use_local=null)
{
if ($use_local !== false) {
if (is_string($this->filename) || !empty($this->filename)) {
// A locally stored file, so let's generate a URL for our instance.
return self::url($this->getFilename());
}
if ($use_local) {
// if the file wasn't stored locally (has filename) and we require a local URL
throw new FileNotStoredLocallyException($this);
}
}
// No local filename available, return the URL we have stored
return $this->url;
}
static public function getByUrl($url)
{
$file = new File();
$file->urlhash = self::hashurl($url);
if (!$file->find(true)) {
throw new NoResultException($file);
}
return $file;
}
/**
* @param string $hashstr String of (preferrably lower case) hexadecimal characters, same as result of 'hash_file(...)'
* @return File
* @throws NoResultException
*/
static public function getByHash($hashstr)
{
$file = new File();
$file->filehash = strtolower($hashstr);
if (!$file->find(true)) {
throw new NoResultException($file);
}
return $file;
}
public function updateUrl($url)
{
$file = File::getKV('urlhash', self::hashurl($url));
if ($file instanceof File) {
throw new ServerException('URL already exists in DB');
}
$sql = 'UPDATE %1$s SET urlhash=%2$s, url=%3$s WHERE urlhash=%4$s;';
$result = $this->query(sprintf($sql, $this->tableName(),
$this->_quote((string)self::hashurl($url)),
$this->_quote((string)$url),
$this->_quote((string)$this->urlhash)));
if ($result === false) {
common_log_db_error($this, 'UPDATE', __FILE__);
throw new ServerException("Could not UPDATE {$this->tableName()}.url");
}
return $result;
}
/**
* Blow the cache of notices that link to this URL
*
* @param boolean $last Whether to blow the "last" cache too
*
* @return void
*/
function blowCache($last=false)
{
self::blow('file:notice-ids:%s', $this->id);
if ($last) {
self::blow('file:notice-ids:%s;last', $this->id);
}
self::blow('file:notice-count:%d', $this->id);
}
/**
* Stream of notices linking to this URL
*
* @param integer $offset Offset to show; default is 0
* @param integer $limit Limit of notices to show
* @param integer $since_id Since this notice
* @param integer $max_id Before this notice
*
* @return array ids of notices that link to this file
*/
function stream($offset=0, $limit=NOTICES_PER_PAGE, $since_id=0, $max_id=0)
{
// FIXME: Try to get the Profile::current() here in some other way to avoid mixing
// the current session user with possibly background/queue processing.
$stream = new FileNoticeStream($this, Profile::current());
return $stream->getNotices($offset, $limit, $since_id, $max_id);
}
function noticeCount()
{
$cacheKey = sprintf('file:notice-count:%d', $this->id);
$count = self::cacheGet($cacheKey);
if ($count === false) {
$f2p = new File_to_post();
$f2p->file_id = $this->id;
$count = $f2p->count();
self::cacheSet($cacheKey, $count);
}
return $count;
}
public function isLocal()
{
return !empty($this->filename);
}
public function delete($useWhere=false)
{
// Delete the file, if it exists locally
if (!empty($this->filename) && file_exists(self::path($this->filename))) {
$deleted = @unlink(self::path($this->filename));
if (!$deleted) {
common_log(LOG_ERR, sprintf('Could not unlink existing file: "%s"', self::path($this->filename)));
}
}
// Clear out related things in the database and filesystem, such as thumbnails
if (Event::handle('FileDeleteRelated', array($this))) {
$thumbs = new File_thumbnail();
$thumbs->file_id = $this->id;
if ($thumbs->find()) {
while ($thumbs->fetch()) {
$thumbs->delete();
}
}
$f2p = new File_to_post();
$f2p->file_id = $this->id;
if ($f2p->find()) {
while ($f2p->fetch()) {
$f2p->delete();
}
}
}
// And finally remove the entry from the database
return parent::delete($useWhere);
}
public function getTitle()
{
$title = $this->title ?: $this->filename;
return $title ?: null;
}
public function setTitle($title)
{
$orig = clone($this);
$this->title = mb_strlen($title) > 0 ? $title : null;
return $this->update($orig);
}
static public function hashurl($url)
{
if (empty($url)) {
throw new Exception('No URL provided to hash algorithm.');
}
return hash(self::URLHASH_ALG, $url);
}
static public function beforeSchemaUpdate()
{
$table = strtolower(get_called_class());
$schema = Schema::get();
$schemadef = $schema->getTableDef($table);
// 2015-02-19 We have to upgrade our table definitions to have the urlhash field populated
if (isset($schemadef['fields']['urlhash']) && isset($schemadef['unique keys']['file_urlhash_key'])) {
// We already have the urlhash field, so no need to migrate it.
return;
}
echo "\nFound old $table table, upgrading it to contain 'urlhash' field...";
$file = new File();
$file->query(sprintf('SELECT id, LEFT(url, 191) AS shortenedurl, COUNT(*) AS c FROM %1$s WHERE LENGTH(url)>191 GROUP BY shortenedurl HAVING c > 1', $schema->quoteIdentifier($table)));
print "\nFound {$file->N} URLs with too long entries in file table\n";
while ($file->fetch()) {
// We've got a URL that is too long for our future file table
// so we'll cut it. We could save the original URL, but there is
// no guarantee it is complete anyway since the previous max was 255 chars.
$dupfile = new File();
// First we find file entries that would be duplicates of this when shortened
// ... and we'll just throw the dupes out the window for now! It's already so borken.
$dupfile->query(sprintf('SELECT * FROM file WHERE LEFT(url, 191) = %1$s', $dupfile->_quote($file->shortenedurl)));
// Leave one of the URLs in the database by using ->find(true) (fetches first entry)
if ($dupfile->find(true)) {
print "\nShortening url entry for $table id: {$file->id} [";
$orig = clone($dupfile);
$origurl = $dupfile->url; // save for logging purposes
$dupfile->url = $file->shortenedurl; // make sure it's only 191 chars from now on
$dupfile->update($orig);
print "\nDeleting duplicate entries of too long URL on $table id: {$file->id} [";
// only start deleting with this fetch.
while($dupfile->fetch()) {
common_log(LOG_INFO, sprintf('Deleting duplicate File entry of %1$d: %2$d (original URL: %3$s collides with these first 191 characters: %4$s', $dupfile->id, $file->id, $origurl, $file->shortenedurl));
print ".";
$dupfile->delete();
}
print "]\n";
} else {
print "\nWarning! URL suddenly disappeared from database: {$file->url}\n";
}
}
echo "...and now all the non-duplicates which are longer than 191 characters...\n";
$file->query('UPDATE file SET url=LEFT(url, 191) WHERE LENGTH(url)>191');
echo "\n...now running hacky pre-schemaupdate change for $table:";
// We have to create a urlhash that is _not_ the primary key,
// transfer data and THEN run checkSchema
$schemadef['fields']['urlhash'] = array (
'type' => 'varchar',
'length' => 64,
'not null' => false, // this is because when adding column, all entries will _be_ NULL!
'description' => 'sha256 of destination URL (url field)',
);
$schemadef['fields']['url'] = array (
'type' => 'text',
'description' => 'destination URL after following possible redirections',
);
unset($schemadef['unique keys']);
$schema->ensureTable($table, $schemadef);
echo "DONE.\n";
$classname = ucfirst($table);
$tablefix = new $classname;
// urlhash is hash('sha256', $url) in the File table
echo "Updating urlhash fields in $table table...";
// Maybe very MySQL specific :(
$tablefix->query(sprintf('UPDATE %1$s SET %2$s=%3$s;',
$schema->quoteIdentifier($table),
'urlhash',
// The line below is "result of sha256 on column `url`"
'SHA2(url, 256)'));
echo "DONE.\n";
echo "Resuming core schema upgrade...";
}
}