2009-05-11 18:45:00 +01:00
< ? php
2019-09-11 06:15:16 +01:00
// This file is part of GNU social - https://www.gnu.org/software/social
//
// GNU social is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// GNU social is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with GNU social. If not, see <http://www.gnu.org/licenses/>.
2019-06-07 14:08:27 +01:00
/**
* @ category Files
* @ package GNUsocial
* @ author Mikael Nordfeldth < mmn @ hethane . se >
* @ author Miguel Dantas < biodantas @ gmail . com >
* @ copyright 2008 - 2009 , 2019 Free Software Foundation http :// fsf . org
2019-09-11 06:15:16 +01:00
* @ license https :// www . gnu . org / licenses / agpl . html GNU AGPL v3 or later
2009-05-11 18:45:00 +01:00
*/
2019-06-07 14:08:27 +01:00
defined ( 'GNUSOCIAL' ) || die ();
2009-05-11 18:45:00 +01:00
/**
* Table Definition for file
*/
2011-08-22 22:52:02 +01:00
class File extends Managed_DataObject
2009-05-11 18:45:00 +01:00
{
public $__table = 'file' ; // table name
2009-06-22 22:19:41 +01:00
public $id ; // int(4) primary_key not_null
2015-02-17 17:55:12 +00:00
public $urlhash ; // varchar(64) unique_key
public $url ; // text
2015-02-24 20:11:25 +00:00
public $filehash ; // varchar(64) indexed
2009-06-22 23:48:31 +01:00
public $mimetype ; // varchar(50)
public $size ; // int(4)
2015-12-27 10:50:11 +00:00
public $title ; // text()
2009-06-22 23:48:31 +01:00
public $date ; // int(4)
public $protected ; // int(4)
2015-12-27 10:50:11 +00:00
public $filename ; // text()
2014-05-07 08:51:37 +01:00
public $width ; // int(4)
public $height ; // int(4)
2020-06-28 23:41:46 +01:00
public $modified ; // timestamp() not_null default_CURRENT_TIMESTAMP
2009-05-11 18:45:00 +01:00
2015-02-17 17:55:12 +00:00
const URLHASH_ALG = 'sha256' ;
2015-02-24 20:11:25 +00:00
const FILEHASH_ALG = 'sha256' ;
2015-02-17 17:55:12 +00:00
2011-08-22 22:52:02 +01:00
public static function schemaDef ()
{
return array (
'fields' => array (
'id' => array ( 'type' => 'serial' , 'not null' => true ),
2020-09-21 21:54:23 +01:00
'urlhash' => array ( 'type' => 'varchar' , 'length' => 64 , 'description' => 'sha256 of destination URL (url field)' ),
2015-02-19 21:06:43 +00:00
'url' => array ( 'type' => 'text' , 'description' => 'destination URL after following possible redirections' ),
2015-02-24 20:11:25 +00:00
'filehash' => array ( 'type' => 'varchar' , 'length' => 64 , 'not null' => false , 'description' => 'sha256 of the file contents, only for locally stored files of course' ),
2011-08-22 22:52:02 +01:00
'mimetype' => array ( 'type' => 'varchar' , 'length' => 50 , 'description' => 'mime type of resource' ),
'size' => array ( 'type' => 'int' , 'description' => 'size of resource when available' ),
2015-12-27 10:50:11 +00:00
'title' => array ( 'type' => 'text' , 'description' => 'title of resource when available' ),
2011-08-22 22:52:02 +01:00
'date' => array ( 'type' => 'int' , 'description' => 'date of resource according to http query' ),
'protected' => array ( 'type' => 'int' , 'description' => 'true when URL is private (needs login)' ),
2016-01-06 18:23:38 +00:00
'filename' => array ( 'type' => 'text' , 'description' => 'if file is stored locally (too) this is the filename' ),
2014-04-21 19:39:28 +01:00
'width' => array ( 'type' => 'int' , 'description' => 'width in pixels, if it can be described as such and data is available' ),
'height' => array ( 'type' => 'int' , 'description' => 'height in pixels, if it can be described as such and data is available' ),
2020-06-28 23:41:46 +01:00
'modified' => array ( 'type' => 'timestamp' , 'not null' => true , 'description' => 'date this record was modified' ),
2011-08-22 22:52:02 +01:00
),
'primary key' => array ( 'id' ),
'unique keys' => array (
2015-02-17 17:55:12 +00:00
'file_urlhash_key' => array ( 'urlhash' ),
2011-08-22 22:52:02 +01:00
),
2015-02-24 20:11:25 +00:00
'indexes' => array (
'file_filehash_idx' => array ( 'filehash' ),
),
2011-08-22 22:52:02 +01:00
);
}
2019-06-09 23:26:48 +01:00
public static function isProtected ( $url )
{
$protected_urls_exps = array (
'https://www.facebook.com/login.php' ,
common_path ( 'main/login' )
);
foreach ( $protected_urls_exps as $protected_url_exp ) {
if ( preg_match ( '!^' . preg_quote ( $protected_url_exp ) . '(.*)$!i' , $url ) === 1 ) {
return true ;
}
}
2016-01-25 19:00:46 +00:00
2019-06-09 23:26:48 +01:00
return false ;
2009-05-13 19:27:32 +01:00
}
2010-03-10 21:39:42 +00:00
/**
* Save a new file record .
*
* @ param array $redir_data lookup data eg from File_redirection :: where ()
* @ param string $given_url
* @ return File
2019-06-07 14:08:27 +01:00
* @ throws ServerException
2010-03-10 21:39:42 +00:00
*/
2015-09-27 11:29:38 +01:00
public static function saveNew ( array $redir_data , $given_url )
{
$file = null ;
try {
// I don't know why we have to keep doing this but we run a last check to avoid
// uniqueness bugs.
$file = File :: getByUrl ( $given_url );
2015-10-01 21:14:49 +01:00
return $file ;
2015-09-27 11:29:38 +01:00
} catch ( NoResultException $e ) {
2015-10-01 21:14:49 +01:00
// We don't have the file's URL since before, so let's continue.
}
2020-09-21 21:54:23 +01:00
// If the given url is a local attachment url, don't save a new file record.
$uh = parse_url ( $given_url , PHP_URL_HOST );
$up = parse_url ( $given_url , PHP_URL_PATH );
if ( $uh == common_config ( 'site' , 'server' ) || $uh == common_config ( 'attachments' , 'server' )) {
unset ( $uh );
2016-02-26 13:10:32 +00:00
$r = Router :: get ();
2016-02-26 13:13:46 +00:00
// Skip the / in the beginning or $r->map won't match
2016-03-09 13:52:15 +00:00
try {
2020-09-21 21:54:23 +01:00
$args = $r -> map ( mb_substr ( $up , 1 ));
if ( $args [ 'action' ] === 'attachment' ||
$args [ 'action' ] === 'attachment_view' ||
$args [ 'action' ] === 'attachment_download' ||
$args [ 'action' ] === 'attachment_thumbnail' ) {
2016-03-09 13:52:15 +00:00
try {
2020-09-21 21:54:23 +01:00
if ( array_key_exists ( 'attachment' , $args )) {
return File :: getByID (( int ) $args [ 'attachment' ]);
} elseif ( array_key_exists ( 'filehash' , $args )) {
$file = File :: getByHash ( $args [ 'filehash' ]);
$file -> fetch ();
return $file ;
2019-06-28 00:18:27 +01:00
}
2016-03-09 13:52:15 +00:00
} catch ( NoResultException $e ) {
// apparently this link goes to us, but is _not_ an existing attachment (File) ID?
}
2016-01-29 00:36:30 +00:00
}
2016-03-09 13:52:15 +00:00
} catch ( Exception $e ) {
// Some other exception was thrown from $r->map, likely a
// ClientException (404) because of some malformed link to
// our own instance. It's still a valid URL however, so we
// won't abort anything... I noticed this when linking:
// https://social.umeahackerspace.se/mmn/foaf' (notice the
// apostrophe in the end, making it unrecognizable for our
// URL routing.
// That specific issue (the apostrophe being part of a link
// is something that may or may not have been fixed since,
// in lib/util.php in common_replace_urls_callback().
2016-01-29 00:36:30 +00:00
}
}
2015-10-01 21:14:49 +01:00
$file = new File ;
$file -> url = $given_url ;
2019-06-09 23:26:48 +01:00
if ( ! empty ( $redir_data [ 'protected' ])) {
$file -> protected = $redir_data [ 'protected' ];
}
if ( ! empty ( $redir_data [ 'title' ])) {
$file -> title = $redir_data [ 'title' ];
}
if ( ! empty ( $redir_data [ 'type' ])) {
$file -> mimetype = $redir_data [ 'type' ];
}
if ( ! empty ( $redir_data [ 'size' ])) {
2020-09-21 21:54:23 +01:00
$file -> size = ( int ) $redir_data [ 'size' ];
2019-06-09 23:26:48 +01:00
}
if ( isset ( $redir_data [ 'time' ]) && $redir_data [ 'time' ] > 0 ) {
2020-09-21 21:54:23 +01:00
$file -> date = ( int ) $redir_data [ 'time' ];
2019-06-09 23:26:48 +01:00
}
2015-11-02 05:15:08 +00:00
$file -> saveFile ();
return $file ;
}
2019-06-09 23:26:48 +01:00
public function saveFile ()
{
2020-09-21 21:54:23 +01:00
$this -> urlhash = is_null ( $this -> url ) ? null : self :: hashurl ( $this -> url );
2015-10-01 21:14:49 +01:00
2015-11-02 05:15:08 +00:00
if ( ! Event :: handle ( 'StartFileSaveNew' , array ( & $this ))) {
throw new ServerException ( 'File not saved due to an aborted StartFileSaveNew event.' );
}
$this -> id = $this -> insert ();
if ( $this -> id === false ) {
2015-10-01 21:14:49 +01:00
throw new ServerException ( 'File/URL metadata could not be saved to the database.' );
2011-09-30 18:03:42 +01:00
}
2009-05-13 19:27:32 +01:00
2015-11-02 05:15:08 +00:00
Event :: handle ( 'EndFileSaveNew' , array ( $this ));
2009-05-13 19:27:32 +01:00
}
2010-05-25 21:09:21 +01:00
/**
2010-11-15 19:00:42 +00:00
* Go look at a URL and possibly save data about it if it ' s new :
* - follow redirect chains and store them in file_redirection
* - if a thumbnail is available , save it in file_thumbnail
* - save file record with basic info
* - optionally save a file_to_post record
* - return the File object with the full reference
*
* @ param string $given_url the URL we ' re looking at
2020-09-21 21:54:23 +01:00
* @ param Notice | null $notice ( optional )
2010-11-15 19:00:42 +00:00
* @ param bool $followRedirects defaults to true
*
* @ return mixed File on success , - 1 on some errors
*
2014-06-02 01:08:48 +01:00
* @ throws ServerException on failure
2010-05-25 21:09:21 +01:00
*/
2020-09-21 21:54:23 +01:00
public static function processNew ( $given_url , ? Notice $notice = null , bool $followRedirects = true )
2019-06-09 23:26:48 +01:00
{
2014-06-02 01:08:48 +01:00
if ( empty ( $given_url )) {
throw new ServerException ( 'No given URL to process' );
}
2009-05-13 19:27:32 +01:00
$given_url = File_redirection :: _canonUrl ( $given_url );
2014-06-02 01:08:48 +01:00
if ( empty ( $given_url )) {
throw new ServerException ( 'No canonical URL from given URL to process' );
}
2015-11-02 05:15:08 +00:00
$redir = File_redirection :: where ( $given_url );
2017-05-02 08:07:39 +01:00
try {
$file = $redir -> getFile ();
} catch ( EmptyPkeyValueException $e ) {
common_log ( LOG_ERR , 'File_redirection::where gave object with empty file_id for given_url ' . _ve ( $given_url ));
throw new ServerException ( 'URL processing failed without new File object' );
} catch ( NoResultException $e ) {
2015-11-02 05:15:08 +00:00
// This should not happen
2017-05-02 08:07:39 +01:00
common_log ( LOG_ERR , 'File_redirection after discovery could still not return a File object.' );
2015-11-02 05:15:08 +00:00
throw new ServerException ( 'URL processing failed without new File object' );
2009-05-13 19:27:32 +01:00
}
2009-06-22 23:48:31 +01:00
2015-06-04 16:36:11 +01:00
if ( $notice instanceof Notice ) {
File_to_post :: processNew ( $file , $notice );
2009-08-28 04:23:31 +01:00
}
2015-11-02 05:15:08 +00:00
2014-06-02 01:08:48 +01:00
return $file ;
2009-05-13 19:27:32 +01:00
}
2009-06-01 02:03:55 +01:00
2019-06-09 23:26:48 +01:00
public static function respectsQuota ( Profile $scoped , $fileSize )
{
2009-07-07 20:55:10 +01:00
if ( $fileSize > common_config ( 'attachments' , 'file_quota' )) {
2011-12-28 11:44:42 +00:00
// TRANS: Message used to be inserted as %2$s in the text "No file may
// TRANS: be larger than %1$d byte and the file you sent was %2$s.".
// TRANS: %1$d is the number of bytes of an uploaded file.
2019-06-09 23:26:48 +01:00
$fileSizeText = sprintf ( _m ( '%1$d byte' , '%1$d bytes' , $fileSize ), $fileSize );
2011-12-28 11:44:42 +00:00
$fileQuota = common_config ( 'attachments' , 'file_quota' );
2010-07-29 12:01:04 +01:00
// TRANS: Message given if an upload is larger than the configured maximum.
2011-12-28 11:44:42 +00:00
// TRANS: %1$d (used for plural) is the byte limit for uploads,
// TRANS: %2$s is the proper form of "n bytes". This is the only ways to have
// TRANS: gettext support multiple plurals in the same message, unfortunately...
2013-10-05 17:43:41 +01:00
throw new ClientException (
2019-06-09 23:26:48 +01:00
sprintf (
2019-09-11 06:15:16 +01:00
_m (
2019-06-09 23:26:48 +01:00
'No file may be larger than %1$d byte and the file you sent was %2$s. Try to upload a smaller version.' ,
'No file may be larger than %1$d bytes and the file you sent was %2$s. Try to upload a smaller version.' ,
$fileQuota
),
2019-09-11 06:15:16 +01:00
$fileQuota ,
$fileSizeText
)
2019-06-09 23:26:48 +01:00
);
2009-06-01 02:03:55 +01:00
}
2013-10-05 17:43:41 +01:00
$file = new File ;
2019-08-17 02:33:31 +01:00
$query = " SELECT sum(size) AS total
FROM file
INNER JOIN file_to_post
ON file_to_post . file_id = file . id
INNER JOIN notice
ON file_to_post . post_id = notice . id
WHERE profile_id = { $scoped -> id } AND
2020-09-21 21:54:23 +01:00
filename IS NULL AND
file . url IS NOT NULL " ;
2013-10-05 17:43:41 +01:00
$file -> query ( $query );
$file -> fetch ();
$total = $file -> total + $fileSize ;
2009-06-01 02:03:55 +01:00
if ( $total > common_config ( 'attachments' , 'user_quota' )) {
2010-07-29 12:01:04 +01:00
// TRANS: Message given if an upload would exceed user quota.
2010-11-04 17:33:39 +00:00
// TRANS: %d (number) is the user quota in bytes and is used for plural.
2013-10-05 17:43:41 +01:00
throw new ClientException (
2019-06-09 23:26:48 +01:00
sprintf (
2019-09-11 06:15:16 +01:00
_m (
2019-06-09 23:26:48 +01:00
'A file this large would exceed your user quota of %d byte.' ,
'A file this large would exceed your user quota of %d bytes.' ,
common_config ( 'attachments' , 'user_quota' )
),
2019-09-11 06:15:16 +01:00
common_config ( 'attachments' , 'user_quota' )
)
2019-06-09 23:26:48 +01:00
);
2009-06-01 02:03:55 +01:00
}
2020-06-09 18:26:06 +01:00
$query .= ' AND EXTRACT(MONTH FROM file.modified) = EXTRACT(MONTH FROM CURRENT_DATE)'
. ' AND EXTRACT(YEAR FROM file.modified) = EXTRACT(YEAR FROM CURRENT_DATE)' ;
2013-10-05 17:43:41 +01:00
$file -> query ( $query );
$file -> fetch ();
$total = $file -> total + $fileSize ;
2009-06-01 02:03:55 +01:00
if ( $total > common_config ( 'attachments' , 'monthly_quota' )) {
2010-07-29 12:01:04 +01:00
// TRANS: Message given id an upload would exceed a user's monthly quota.
2010-11-04 17:33:39 +00:00
// TRANS: $d (number) is the monthly user quota in bytes and is used for plural.
2013-10-05 17:43:41 +01:00
throw new ClientException (
2019-06-09 23:26:48 +01:00
sprintf (
2019-09-11 06:15:16 +01:00
_m (
2019-06-09 23:26:48 +01:00
'A file this large would exceed your monthly quota of %d byte.' ,
'A file this large would exceed your monthly quota of %d bytes.' ,
common_config ( 'attachments' , 'monthly_quota' )
),
2019-09-11 06:15:16 +01:00
common_config ( 'attachments' , 'monthly_quota' )
)
2019-06-09 23:26:48 +01:00
);
2009-06-01 02:03:55 +01:00
}
return true ;
}
2009-06-22 23:48:31 +01:00
2015-05-30 14:41:04 +01:00
public function getFilename ()
{
2016-03-09 22:49:01 +00:00
return self :: tryFilename ( $this -> filename );
2015-05-30 14:41:04 +01:00
}
2019-10-19 01:16:37 +01:00
public function getSize () : int
2016-06-24 14:53:23 +01:00
{
2019-10-19 01:16:37 +01:00
return ( int ) $this -> size ;
2016-06-24 14:53:23 +01:00
}
2009-06-22 23:48:31 +01:00
// where should the file go?
2019-06-09 23:26:48 +01:00
public static function filename ( Profile $profile , $origname , $mimetype )
2009-06-23 15:29:43 +01:00
{
2016-02-25 21:15:54 +00:00
$ext = self :: guessMimeExtension ( $mimetype , $origname );
2010-11-04 00:05:26 +00:00
2014-04-16 22:17:27 +01:00
// Normalize and make the original filename more URL friendly.
2014-06-22 16:03:27 +01:00
$origname = basename ( $origname , " . $ext " );
2014-04-16 22:17:27 +01:00
if ( class_exists ( 'Normalizer' )) {
// http://php.net/manual/en/class.normalizer.php
// http://www.unicode.org/reports/tr15/
$origname = Normalizer :: normalize ( $origname , Normalizer :: FORM_KC );
}
$origname = preg_replace ( '/[^A-Za-z0-9\.\_]/' , '_' , $origname );
2014-06-22 16:03:27 +01:00
$nickname = $profile -> getNickname ();
2014-04-16 22:17:27 +01:00
$datestamp = strftime ( '%Y%m%d' , time ());
do {
// generate new random strings until we don't run into a filename collision.
$random = strtolower ( common_confirmation_code ( 16 ));
$filename = " $nickname - $datestamp - $origname - $random . $ext " ;
} while ( file_exists ( self :: path ( $filename )));
return $filename ;
2009-06-23 15:29:43 +01:00
}
2009-06-22 23:48:31 +01:00
2019-06-16 00:33:12 +01:00
/**
* @ param string $filename
2020-07-05 01:25:11 +01:00
* @ return null | string | bool Value from the 'extblacklist' array , in the config
2019-10-19 01:16:37 +01:00
* @ throws ServerException
2019-06-16 00:33:12 +01:00
*/
2019-09-11 06:15:16 +01:00
public static function getSafeExtension ( string $filename )
{
2019-06-30 13:36:33 +01:00
if ( preg_match ( '/^.+?\.([A-Za-z0-9]+)$/' , $filename , $matches ) === 1 ) {
2019-06-16 00:33:12 +01:00
// we matched on a file extension, so let's see if it means something.
$ext = mb_strtolower ( $matches [ 1 ]);
$blacklist = common_config ( 'attachments' , 'extblacklist' );
// If we got an extension from $filename we want to check if it's in a blacklist
// so we avoid people uploading restricted files
if ( array_key_exists ( $ext , $blacklist )) {
if ( ! is_string ( $blacklist [ $ext ])) {
2019-06-30 13:36:33 +01:00
// Blocked
2019-06-16 00:33:12 +01:00
return false ;
}
// return a safe replacement extension ('php' => 'phps' for example)
return $blacklist [ $ext ];
2019-06-30 13:36:33 +01:00
} else {
// the attachment extension based on its filename was not blacklisted so it's ok to use it
return $ext ;
2019-06-16 00:33:12 +01:00
}
} else {
2019-06-30 13:36:33 +01:00
// No extension
return null ;
2019-06-16 00:33:12 +01:00
}
}
2016-02-25 21:15:54 +00:00
/**
2019-06-07 14:08:27 +01:00
* @ param $mimetype string The mimetype we ' ve discovered for this file .
* @ param $filename string An optional filename which we can use on failure .
* @ return mixed | string
* @ throws ClientException
2019-10-19 01:16:37 +01:00
* @ throws ServerException
2016-02-25 21:15:54 +00:00
*/
2019-06-09 23:26:48 +01:00
public static function guessMimeExtension ( $mimetype , $filename = null )
2015-02-24 20:11:25 +00:00
{
try {
2016-02-25 21:15:54 +00:00
// first see if we know the extension for our mimetype
2015-02-24 20:11:25 +00:00
$ext = common_supported_mime_to_ext ( $mimetype );
2016-02-25 21:15:54 +00:00
// we do, so use it!
return $ext ;
2016-07-06 07:59:16 +01:00
} catch ( UnknownMimeExtensionException $e ) {
2016-02-25 21:15:54 +00:00
// We don't know the extension for this mimetype, but let's guess.
2016-07-06 07:59:16 +01:00
// If we can't recognize the extension from the MIME, we try
// to guess based on filename, if one was supplied.
2019-06-16 00:33:12 +01:00
if ( ! is_null ( $filename )) {
2019-08-17 02:33:31 +01:00
$ext = self :: getSafeExtension ( $filename );
2019-06-16 00:33:12 +01:00
if ( $ext === false ) {
// we don't have a safe replacement extension
2019-08-17 02:33:31 +01:00
throw new ClientException ( _m ( 'Blacklisted file extension.' ));
2019-06-16 00:33:12 +01:00
} else {
return $ext ;
2016-02-25 21:15:54 +00:00
}
2016-01-12 12:46:25 +00:00
}
2016-07-06 07:59:16 +01:00
} catch ( Exception $e ) {
common_log ( LOG_INFO , 'Problem when figuring out extension for mimetype: ' . _ve ( $e ));
2015-02-24 20:11:25 +00:00
}
2016-02-25 21:15:54 +00:00
// If nothing else has given us a result, try to extract it from
// the mimetype value (this turns .jpg to .jpeg for example...)
$matches = array ();
2019-06-16 00:33:12 +01:00
// Will get jpeg from image/jpeg as well as json from application/jrd+json
if ( ! preg_match ( '/[\/+-\.]([a-z0-9]+)/' , mb_strtolower ( $mimetype ), $matches )) {
throw new Exception ( " Malformed mimetype: { $mimetype } " );
2016-02-25 21:15:54 +00:00
}
2016-02-25 21:32:07 +00:00
return mb_strtolower ( $matches [ 1 ]);
2015-02-24 20:11:25 +00:00
}
2010-02-01 16:48:31 +00:00
/**
* Validation for as - saved base filenames
2020-06-27 22:39:09 +01:00
* @ param mixed $filename
2019-06-07 14:08:27 +01:00
* @ return false | int
2010-02-01 16:48:31 +00:00
*/
2019-06-09 23:26:48 +01:00
public static function validFilename ( $filename )
2010-02-01 16:48:31 +00:00
{
2010-02-02 17:30:15 +00:00
return preg_match ( '/^[A-Za-z0-9._-]+$/' , $filename );
2010-02-01 16:48:31 +00:00
}
2020-06-27 22:39:09 +01:00
/**
* @ param mixed $filename
* @ return string
* @ throws InvalidFilenameException
*/
public static function tryFilename ( $filename ) : string
2016-03-09 22:49:01 +00:00
{
2019-06-09 23:26:48 +01:00
if ( ! self :: validFilename ( $filename )) {
2016-03-09 22:49:01 +00:00
throw new InvalidFilenameException ( $filename );
}
// if successful, return the filename for easy if-statementing
return $filename ;
}
2010-02-01 16:48:31 +00:00
/**
2020-06-27 22:39:09 +01:00
* Construct a path
*
* @ param mixed $filename Will be tested by tryFilename
* @ param string | null $dir Attachments directory by default
* @ param bool $test_filename
2019-06-07 14:08:27 +01:00
* @ return string
* @ throws InvalidFilenameException
2019-10-19 01:16:37 +01:00
* @ throws ServerException
2010-02-01 16:48:31 +00:00
*/
2020-06-27 22:39:09 +01:00
public static function path ( $filename , ? string $dir = null , bool $test_filename = true )
2009-06-23 15:29:43 +01:00
{
2020-06-27 22:39:09 +01:00
if ( $test_filename ) {
self :: tryFilename ( $filename );
}
2016-03-09 22:49:01 +00:00
2020-06-27 22:39:09 +01:00
$dir = $dir ? ? common_config ( 'attachments' , 'dir' );
2009-06-22 23:48:31 +01:00
2016-03-07 19:13:07 +00:00
if ( ! in_array ( $dir [ mb_strlen ( $dir ) - 1 ], [ '/' , '\\' ])) {
$dir .= DIRECTORY_SEPARATOR ;
2009-06-23 15:29:43 +01:00
}
2009-06-22 23:48:31 +01:00
2009-06-23 15:29:43 +01:00
return $dir . $filename ;
}
2009-06-22 23:48:31 +01:00
2020-09-21 21:54:23 +01:00
/**
* Don ' t use for attachments , only for assets .
*
* @ param $filename
* @ return mixed | string
* @ throws InvalidFilenameException
* @ throws ServerException
*/
2019-06-09 23:26:48 +01:00
public static function url ( $filename )
2009-06-23 15:29:43 +01:00
{
2016-03-09 22:49:01 +00:00
self :: tryFilename ( $filename );
2010-10-14 19:22:17 +01:00
2019-06-09 23:26:48 +01:00
if ( common_config ( 'site' , 'private' )) {
return common_local_url (
'getfile' ,
array ( 'filename' => $filename )
);
2010-10-14 19:22:17 +01:00
}
2009-06-22 23:48:31 +01:00
2015-02-27 11:44:15 +00:00
if ( GNUsocial :: useHTTPS ()) {
2010-10-14 19:22:17 +01:00
$sslserver = common_config ( 'attachments' , 'sslserver' );
2009-06-22 23:48:31 +01:00
2010-10-14 19:22:17 +01:00
if ( empty ( $sslserver )) {
// XXX: this assumes that background dir == site dir + /file/
// not true if there's another server
if ( is_string ( common_config ( 'site' , 'sslserver' )) &&
mb_strlen ( common_config ( 'site' , 'sslserver' )) > 0 ) {
$server = common_config ( 'site' , 'sslserver' );
2019-06-09 23:26:48 +01:00
} elseif ( common_config ( 'site' , 'server' )) {
2010-10-14 19:22:17 +01:00
$server = common_config ( 'site' , 'server' );
}
$path = common_config ( 'site' , 'path' ) . '/file/' ;
} else {
$server = $sslserver ;
$path = common_config ( 'attachments' , 'sslpath' );
if ( empty ( $path )) {
$path = common_config ( 'attachments' , 'path' );
}
2010-01-05 22:47:37 +00:00
}
2010-10-14 19:22:17 +01:00
$protocol = 'https' ;
} else {
$path = common_config ( 'attachments' , 'path' );
2010-01-05 22:47:37 +00:00
$server = common_config ( 'attachments' , 'server' );
2009-06-22 23:48:31 +01:00
2010-01-05 22:47:37 +00:00
if ( empty ( $server )) {
$server = common_config ( 'site' , 'server' );
}
2009-06-22 23:48:31 +01:00
2010-02-11 22:06:57 +00:00
$ssl = common_config ( 'attachments' , 'ssl' );
2010-01-05 22:47:37 +00:00
2010-02-11 22:06:57 +00:00
$protocol = ( $ssl ) ? 'https' : 'http' ;
2010-10-14 19:22:17 +01:00
}
2010-02-11 22:06:57 +00:00
2010-10-14 19:22:17 +01:00
if ( $path [ strlen ( $path ) - 1 ] != '/' ) {
$path .= '/' ;
2010-01-05 22:47:37 +00:00
}
2010-10-14 19:22:17 +01:00
if ( $path [ 0 ] != '/' ) {
$path = '/' . $path ;
}
return $protocol . '://' . $server . $path . $filename ;
2009-06-23 15:29:43 +01:00
}
2009-07-14 18:33:40 +01:00
2019-06-09 23:26:48 +01:00
public static $_enclosures = array ();
2016-01-07 16:35:37 +00:00
2019-06-09 23:26:48 +01:00
public function getEnclosure ()
{
2016-01-07 16:35:37 +00:00
if ( isset ( self :: $_enclosures [ $this -> getID ()])) {
return self :: $_enclosures [ $this -> getID ()];
}
2009-08-26 20:40:51 +01:00
$enclosure = ( object ) array ();
2016-01-14 17:29:21 +00:00
foreach ( array ( 'title' , 'url' , 'date' , 'modified' , 'size' , 'mimetype' , 'width' , 'height' ) as $key ) {
if ( $this -> $key !== '' ) {
$enclosure -> $key = $this -> $key ;
}
2014-08-05 09:54:00 +01:00
}
2016-01-07 16:35:37 +00:00
$needMoreMetadataMimetypes = array ( null , 'application/xhtml+xml' , 'text/html' );
2014-08-05 09:54:00 +01:00
2020-09-21 21:54:23 +01:00
if ( isset ( $enclosure -> url ) && in_array ( common_bare_mime ( $enclosure -> mimetype ), $needMoreMetadataMimetypes )) {
2014-08-05 09:54:00 +01:00
// This fetches enclosure metadata for non-local links with unset/HTML mimetypes,
// which may be enriched through oEmbed or similar (implemented as plugins)
Event :: handle ( 'FileEnclosureMetadata' , array ( $this , & $enclosure ));
2009-07-14 18:33:40 +01:00
}
2016-01-07 16:35:37 +00:00
if ( empty ( $enclosure -> mimetype )) {
2014-08-05 09:54:00 +01:00
// This means we either don't know what it is, so it can't
// be shown as an enclosure, or it is an HTML link which
// does not link to a resource with further metadata.
2021-02-16 18:30:21 +00:00
// throw new ServerException('Unknown enclosure mimetype, not enough metadata');
// It's not really an error that must be shown or handled...
common_debug ( 'Unknown enclosure mimetype, not enough metadata' );
2014-06-02 00:26:23 +01:00
}
2016-01-07 16:35:37 +00:00
self :: $_enclosures [ $this -> getID ()] = $enclosure ;
2009-08-27 02:51:54 +01:00
return $enclosure ;
2009-07-14 18:33:40 +01:00
}
2010-03-03 00:30:09 +00:00
2016-01-07 16:35:37 +00:00
public function hasThumbnail ()
{
try {
$this -> getThumbnail ();
} catch ( Exception $e ) {
return false ;
}
return true ;
}
2010-11-09 01:22:01 +00:00
/**
2020-09-21 21:54:23 +01:00
* Get the attachment ' s thumbnail record , if any or generate one .
2010-11-09 01:22:01 +00:00
*
2020-09-21 21:54:23 +01:00
* @ param int | null $width Max width of thumbnail in pixels . ( if null , use common_config values )
* @ param int | null $height Max height of thumbnail in pixels . ( if null , square - crop to $width )
* @ param bool $crop Crop to the max - values ' aspect ratio
* @ param bool $force_still Don ' t allow fallback to showing original ( such as animated GIF )
* @ param bool | null $upscale Whether or not to scale smaller images up to larger thumbnail sizes . ( null = site default )
2014-04-21 10:35:42 +01:00
*
2010-11-09 01:22:01 +00:00
* @ return File_thumbnail
2015-02-25 14:13:47 +00:00
*
2020-09-21 21:54:23 +01:00
* @ throws ClientException
* @ throws FileNotFoundException
* @ throws FileNotStoredLocallyException
* @ throws InvalidFilenameException
* @ throws NoResultException
* @ throws ServerException on various other errors
* @ throws UnsupportedMediaException if , despite trying , we can ' t understand how to make a thumbnail for this format
* @ throws UseFileAsThumbnailException if the file is considered an image itself and should be itself as thumbnail
2010-11-09 01:22:01 +00:00
*/
2020-09-21 21:54:23 +01:00
public function getThumbnail ( ? int $width = null , ? int $height = null , bool $crop = false , bool $force_still = true , ? bool $upscale = null ) : File_thumbnail
{
return File_thumbnail :: fromFileObject ( $this , $width , $height , $crop , $force_still , $upscale );
2014-04-21 19:39:28 +01:00
}
2014-04-16 18:14:26 +01:00
public function getPath ()
{
2015-02-24 20:11:25 +00:00
$filepath = self :: path ( $this -> filename );
if ( ! file_exists ( $filepath )) {
throw new FileNotFoundException ( $filepath );
}
return $filepath ;
2014-04-16 18:14:26 +01:00
}
2015-01-12 18:22:10 +00:00
2019-06-29 20:10:20 +01:00
/**
* Returns the path to either a file , or it 's thumbnail if the file doesn' t exist .
* This is useful in case the original file is deleted , or , as is the case for Embed
* thumbnails , we only have a thumbnail and not a file
2019-10-19 01:16:37 +01:00
* @ param File_thumbnail | null $thumbnail
2019-06-29 20:10:20 +01:00
* @ return string Path
* @ throws FileNotFoundException
* @ throws FileNotStoredLocallyException
* @ throws InvalidFilenameException
* @ throws ServerException
*/
2019-10-19 01:57:36 +01:00
public function getFileOrThumbnailPath ( ? File_thumbnail $thumbnail = null ) : string
2019-06-29 20:10:20 +01:00
{
2019-06-30 15:24:11 +01:00
if ( ! empty ( $thumbnail )) {
return $thumbnail -> getPath ();
}
2019-06-29 20:10:20 +01:00
if ( ! empty ( $this -> filename )) {
$filepath = self :: path ( $this -> filename );
if ( file_exists ( $filepath )) {
return $filepath ;
} else {
throw new FileNotFoundException ( $filepath );
}
} else {
try {
2019-07-11 23:49:16 +01:00
return File_thumbnail :: byFile ( $this , true ) -> getPath ();
2019-06-29 20:10:20 +01:00
} catch ( NoResultException $e ) {
// File not stored locally
throw new FileNotStoredLocallyException ( $this );
}
}
}
/**
* Return the mime type of the thumbnail if we have it , or , if not , of the File
2019-10-19 01:16:37 +01:00
* @ param File_thumbnail | null $thumbnail
2019-06-29 20:10:20 +01:00
* @ return string
* @ throws FileNotFoundException
* @ throws NoResultException
* @ throws ServerException
* @ throws UnsupportedMediaException
2019-10-19 01:16:37 +01:00
* @ throws Exception
2019-06-29 20:10:20 +01:00
*/
2019-10-19 01:16:37 +01:00
public function getFileOrThumbnailMimetype ( ? File_thumbnail $thumbnail = null ) : string
2019-06-29 20:10:20 +01:00
{
2019-06-30 15:24:11 +01:00
if ( ! empty ( $thumbnail )) {
$filepath = $thumbnail -> getPath ();
2019-07-11 23:49:16 +01:00
} elseif ( ! empty ( $this -> filename )) {
2019-06-29 20:10:20 +01:00
return $this -> mimetype ;
2019-07-11 23:49:16 +01:00
} else {
$filepath = File_thumbnail :: byFile ( $this , true ) -> getPath ();
2019-06-29 20:10:20 +01:00
}
2019-06-30 15:24:11 +01:00
$info = @ getimagesize ( $filepath );
if ( $info !== false ) {
return $info [ 'mime' ];
} else {
2019-08-17 02:33:31 +01:00
throw new UnsupportedMediaException ( _m ( " Thumbnail is not an image. " ));
2019-06-30 15:24:11 +01:00
}
}
/**
* Return the size of the thumbnail if we have it , or , if not , of the File
2019-10-19 01:16:37 +01:00
* @ param File_thumbnail | null $thumbnail
2019-06-30 15:24:11 +01:00
* @ return int
* @ throws FileNotFoundException
* @ throws NoResultException
* @ throws ServerException
*/
2019-10-19 01:16:37 +01:00
public function getFileOrThumbnailSize ( ? File_thumbnail $thumbnail = null ) : int
2019-06-30 15:24:11 +01:00
{
if ( ! empty ( $thumbnail )) {
return filesize ( $thumbnail -> getPath ());
} elseif ( ! empty ( $this -> filename )) {
2019-10-19 01:16:37 +01:00
return $this -> getSize ();
2019-06-30 15:24:11 +01:00
} else {
return filesize ( File_thumbnail :: byFile ( $this ) -> getPath ());
}
2019-06-29 20:10:20 +01:00
}
2016-05-01 10:26:28 +01:00
public function getAttachmentUrl ()
{
return common_local_url ( 'attachment' , array ( 'attachment' => $this -> getID ()));
}
2019-06-25 23:20:17 +01:00
public function getAttachmentDownloadUrl ()
{
2020-09-21 21:54:23 +01:00
return common_local_url ( 'attachment_download' , [ 'filehash' => $this -> filehash ]);
2019-06-25 23:20:17 +01:00
}
2019-06-26 01:54:55 +01:00
public function getAttachmentViewUrl ()
{
2020-09-21 21:54:23 +01:00
return common_local_url ( 'attachment_view' , [ 'filehash' => $this -> filehash ]);
2019-06-26 01:54:55 +01:00
}
2016-07-06 23:44:50 +01:00
/**
2020-09-21 21:54:23 +01:00
* @ param bool | null $use_local true means require local , null means prefer original , false means use whatever is stored
2019-06-07 14:08:27 +01:00
* @ return string
* @ throws FileNotStoredLocallyException
2016-07-06 23:44:50 +01:00
*/
2020-09-21 21:54:23 +01:00
public function getUrl ( ? bool $use_local = null ) : ? string
2014-04-16 22:17:27 +01:00
{
2016-07-06 23:44:50 +01:00
if ( $use_local !== false ) {
2020-09-21 21:54:23 +01:00
if ( empty ( $this -> url )) {
2016-07-06 23:44:50 +01:00
// A locally stored file, so let's generate a URL for our instance.
2019-06-26 03:27:51 +01:00
return $this -> getAttachmentViewUrl ();
2016-07-06 23:44:50 +01:00
}
if ( $use_local ) {
2020-09-21 21:54:23 +01:00
// if the file isn't ours but and we require a local URL anyway
2016-07-06 23:44:50 +01:00
throw new FileNotStoredLocallyException ( $this );
}
2015-01-12 18:22:10 +00:00
}
2020-09-21 21:54:23 +01:00
// The original file's URL
2014-04-16 22:17:27 +01:00
return $this -> url ;
}
2014-04-16 18:14:26 +01:00
2019-06-09 23:26:48 +01:00
public static function getByUrl ( $url )
2015-02-19 17:34:48 +00:00
{
$file = new File ();
$file -> urlhash = self :: hashurl ( $url );
if ( ! $file -> find ( true )) {
throw new NoResultException ( $file );
}
return $file ;
}
2015-02-24 20:11:25 +00:00
/**
2019-06-07 14:08:27 +01:00
* @ param string $hashstr String of ( preferrably lower case ) hexadecimal characters , same as result of 'hash_file(...)'
* @ return File
* @ throws NoResultException
2015-02-24 20:11:25 +00:00
*/
2019-06-09 23:26:48 +01:00
public static function getByHash ( $hashstr )
2015-02-24 20:11:25 +00:00
{
$file = new File ();
$file -> filehash = strtolower ( $hashstr );
2020-09-21 21:54:23 +01:00
if ( ! $file -> find ()) {
2015-02-24 20:11:25 +00:00
throw new NoResultException ( $file );
}
return $file ;
}
2015-01-12 18:22:10 +00:00
public function updateUrl ( $url )
{
2015-02-17 17:55:12 +00:00
$file = File :: getKV ( 'urlhash' , self :: hashurl ( $url ));
2015-01-12 18:22:10 +00:00
if ( $file instanceof File ) {
throw new ServerException ( 'URL already exists in DB' );
}
2019-06-09 23:26:48 +01:00
$result = $this -> query ( sprintf (
2020-07-27 17:10:33 +01:00
<<< 'END'
UPDATE % 1 $s
SET urlhash = % 2 $s , url = % 3 $s , modified = CURRENT_TIMESTAMP
WHERE urlhash = % 4 $s ;
END ,
2019-06-09 23:26:48 +01:00
$this -> tableName (),
$this -> _quote (( string ) self :: hashurl ( $url )),
$this -> _quote (( string ) $url ),
$this -> _quote (( string ) $this -> urlhash )
));
2015-01-12 18:22:10 +00:00
if ( $result === false ) {
common_log_db_error ( $this , 'UPDATE' , __FILE__ );
2015-06-06 18:35:10 +01:00
throw new ServerException ( " Could not UPDATE { $this -> tableName () } .url " );
2015-01-12 18:22:10 +00:00
}
return $result ;
}
2010-12-28 20:57:31 +00:00
/**
* Blow the cache of notices that link to this URL
*
* @ param boolean $last Whether to blow the " last " cache too
*
* @ return void
*/
2019-06-09 23:26:48 +01:00
public function blowCache ( $last = false )
2010-12-28 19:58:55 +00:00
{
2015-06-04 16:02:45 +01:00
self :: blow ( 'file:notice-ids:%s' , $this -> id );
2010-12-28 19:58:55 +00:00
if ( $last ) {
2015-06-04 16:02:45 +01:00
self :: blow ( 'file:notice-ids:%s;last' , $this -> id );
2010-12-28 19:58:55 +00:00
}
2010-12-28 21:44:49 +00:00
self :: blow ( 'file:notice-count:%d' , $this -> id );
2010-12-28 19:58:55 +00:00
}
/**
* Stream of notices linking to this URL
*
* @ param integer $offset Offset to show ; default is 0
* @ param integer $limit Limit of notices to show
* @ param integer $since_id Since this notice
* @ param integer $max_id Before this notice
*
* @ return array ids of notices that link to this file
*/
2019-06-09 23:26:48 +01:00
public function stream ( $offset = 0 , $limit = NOTICES_PER_PAGE , $since_id = 0 , $max_id = 0 )
2010-12-28 19:58:55 +00:00
{
2016-03-01 13:51:47 +00:00
// FIXME: Try to get the Profile::current() here in some other way to avoid mixing
// the current session user with possibly background/queue processing.
$stream = new FileNoticeStream ( $this , Profile :: current ());
2011-03-23 15:29:55 +00:00
return $stream -> getNotices ( $offset , $limit , $since_id , $max_id );
2010-12-28 19:58:55 +00:00
}
2019-06-09 23:26:48 +01:00
public function noticeCount ()
2010-12-28 21:44:49 +00:00
{
$cacheKey = sprintf ( 'file:notice-count:%d' , $this -> id );
2019-10-19 01:57:36 +01:00
2010-12-28 21:44:49 +00:00
$count = self :: cacheGet ( $cacheKey );
if ( $count === false ) {
$f2p = new File_to_post ();
$f2p -> file_id = $this -> id ;
$count = $f2p -> count ();
self :: cacheSet ( $cacheKey , $count );
2019-06-09 23:26:48 +01:00
}
2010-12-28 21:44:49 +00:00
return $count ;
}
2014-05-12 13:33:41 +01:00
2021-02-16 18:30:21 +00:00
// A file with no url and with filename is a local file.
public function isLocal () : bool
2014-05-12 13:33:41 +01:00
{
2020-09-21 21:54:23 +01:00
return empty ( $this -> url ) && ! empty ( $this -> filename );
2014-05-12 13:33:41 +01:00
}
2014-05-12 14:16:41 +01:00
2021-02-16 18:30:21 +00:00
// A file with an url but no filename is a remote file that wasn't fetched, not even the thumbnail.
public function isNonFetchedRemoteFile () : bool
{
return ! empty ( $this -> url ) && empty ( $this -> filename );
}
// A file with an url and filename is a fetched remote file (maybe just a thumbnail of it).
public function isFetchedRemoteFile () : bool
{
return ! empty ( $this -> url ) && ! empty ( $this -> filename );
}
// A file with no filename nor url is a redirect.
public function isRedirect () : bool
{
return empty ( $this -> url ) && empty ( $this -> filename );
}
// Is in a remote location.
public function isStoredRemotely () : bool
{
return empty ( $this -> filename );
}
2020-09-21 21:54:23 +01:00
public function unlink () {
2014-05-12 14:16:41 +01:00
// Delete the file, if it exists locally
if ( ! empty ( $this -> filename ) && file_exists ( self :: path ( $this -> filename ))) {
$deleted = @ unlink ( self :: path ( $this -> filename ));
if ( ! $deleted ) {
common_log ( LOG_ERR , sprintf ( 'Could not unlink existing file: "%s"' , self :: path ( $this -> filename )));
}
}
2020-09-21 21:54:23 +01:00
}
public function delete ( $useWhere = false )
{
// Delete the file, if it exists locally
$this -> unlink ();
2014-05-12 14:16:41 +01:00
// Clear out related things in the database and filesystem, such as thumbnails
2020-08-13 21:56:31 +01:00
$related = [
'File_redirection' ,
'File_thumbnail' ,
2020-09-21 21:54:23 +01:00
'File_to_post'
2020-08-13 21:56:31 +01:00
];
Event :: handle ( 'FileDeleteRelated' , [ $this , & $related ]);
foreach ( $related as $cls ) {
$inst = new $cls ();
$inst -> file_id = $this -> id ;
if ( $inst -> find ()) {
while ( $inst -> fetch ()) {
$inst -> delete ();
2015-04-15 22:25:12 +01:00
}
}
2014-05-12 14:16:41 +01:00
}
// And finally remove the entry from the database
return parent :: delete ( $useWhere );
}
2014-08-05 10:30:45 +01:00
public function getTitle ()
{
[MEDIA] File downloader now in PHP, added proper name in the UI and changed the format for new attachment file names
The file downloader was changed from a simple redirect to the file to one
implemented in PHP, which should make it safer, by making it possible disallow
direct access to the file, to prevent executing of atttachments
The filename has a new format:
bin2hex("{$original_name}")."-{$filehash}"
This format should be respected. Notice the dash, which is important to distinguish it from the previous
format, which was "{$hash}.{$ext}"
This change was made to both make the experience more user friendly, by
providing a readable name for files, as opposed to it's hash. This name is taken
from the upload filename, but, clearly, as this wasn't done before, it's
impossible to have a proper name for older files, so those are displayed as
"untitled.{$ext}".
This new name is displayed in the UI, instead of the previous name.
2019-06-11 02:42:33 +01:00
$title = $this -> title ? : MediaFile :: getDisplayName ( $this );
2014-08-05 10:30:45 +01:00
return $title ? : null ;
}
2015-02-17 17:55:12 +00:00
2016-07-23 20:00:57 +01:00
public function setTitle ( $title )
{
$orig = clone ( $this );
$this -> title = mb_strlen ( $title ) > 0 ? $title : null ;
return $this -> update ( $orig );
}
2019-06-09 23:26:48 +01:00
public static function hashurl ( $url )
2015-02-17 17:55:12 +00:00
{
if ( empty ( $url )) {
throw new Exception ( 'No URL provided to hash algorithm.' );
}
return hash ( self :: URLHASH_ALG , $url );
}
2015-02-19 17:59:28 +00:00
2019-06-09 23:26:48 +01:00
public static function beforeSchemaUpdate ()
2015-02-19 17:59:28 +00:00
{
$table = strtolower ( get_called_class ());
$schema = Schema :: get ();
$schemadef = $schema -> getTableDef ( $table );
// 2015-02-19 We have to upgrade our table definitions to have the urlhash field populated
2015-02-19 18:36:59 +00:00
if ( isset ( $schemadef [ 'fields' ][ 'urlhash' ]) && isset ( $schemadef [ 'unique keys' ][ 'file_urlhash_key' ])) {
2015-02-19 17:59:28 +00:00
// We already have the urlhash field, so no need to migrate it.
return ;
}
2015-02-19 18:40:36 +00:00
echo " \n Found old $table table, upgrading it to contain 'urlhash' field... " ;
2015-05-27 20:31:29 +01:00
$file = new File ();
2019-09-11 06:15:16 +01:00
$file -> query ( sprintf (
'SELECT id, LEFT(url, 191) AS shortenedurl, COUNT(*) FROM %1$s ' .
'WHERE LENGTH(url) > 191 GROUP BY id, shortenedurl HAVING COUNT(*) > 1' ,
common_database_tablename ( $table )
));
2015-05-27 20:31:29 +01:00
print " \n Found { $file -> N } URLs with too long entries in file table \n " ;
while ( $file -> fetch ()) {
// We've got a URL that is too long for our future file table
// so we'll cut it. We could save the original URL, but there is
// no guarantee it is complete anyway since the previous max was 255 chars.
$dupfile = new File ();
// First we find file entries that would be duplicates of this when shortened
// ... and we'll just throw the dupes out the window for now! It's already so borken.
2017-07-27 18:39:55 +01:00
$dupfile -> query ( sprintf ( 'SELECT * FROM file WHERE LEFT(url, 191) = %1$s' , $dupfile -> _quote ( $file -> shortenedurl )));
2015-05-27 20:31:29 +01:00
// Leave one of the URLs in the database by using ->find(true) (fetches first entry)
if ( $dupfile -> find ( true )) {
print " \n Shortening url entry for $table id: { $file -> id } [ " ;
$orig = clone ( $dupfile );
2017-07-27 18:39:55 +01:00
$origurl = $dupfile -> url ; // save for logging purposes
2015-05-27 20:31:29 +01:00
$dupfile -> url = $file -> shortenedurl ; // make sure it's only 191 chars from now on
$dupfile -> update ( $orig );
print " \n Deleting duplicate entries of too long URL on $table id: { $file -> id } [ " ;
// only start deleting with this fetch.
2019-06-09 23:26:48 +01:00
while ( $dupfile -> fetch ()) {
2017-07-27 18:39:55 +01:00
common_log ( LOG_INFO , sprintf ( 'Deleting duplicate File entry of %1$d: %2$d (original URL: %3$s collides with these first 191 characters: %4$s' , $dupfile -> id , $file -> id , $origurl , $file -> shortenedurl ));
2015-05-27 20:31:29 +01:00
print " . " ;
$dupfile -> delete ();
}
print " ] \n " ;
} else {
print " \n Warning! URL suddenly disappeared from database: { $file -> url } \n " ;
}
}
2015-05-27 20:54:51 +01:00
echo " ...and now all the non-duplicates which are longer than 191 characters... \n " ;
2020-07-27 17:10:33 +01:00
$file -> query ( 'UPDATE file SET url = LEFT(url, 191) WHERE LENGTH(url) > 191' );
2015-05-27 20:31:29 +01:00
echo " \n ...now running hacky pre-schemaupdate change for $table : " ;
2015-02-19 17:59:28 +00:00
// We have to create a urlhash that is _not_ the primary key,
// transfer data and THEN run checkSchema
2019-06-09 23:26:48 +01:00
$schemadef [ 'fields' ][ 'urlhash' ] = array (
2015-02-19 17:59:28 +00:00
'type' => 'varchar' ,
'length' => 64 ,
2015-05-27 21:37:20 +01:00
'not null' => false , // this is because when adding column, all entries will _be_ NULL!
2015-02-19 21:06:43 +00:00
'description' => 'sha256 of destination URL (url field)' ,
2015-02-19 17:59:28 +00:00
);
2019-06-09 23:26:48 +01:00
$schemadef [ 'fields' ][ 'url' ] = array (
2015-02-19 21:06:43 +00:00
'type' => 'text' ,
'description' => 'destination URL after following possible redirections' ,
);
unset ( $schemadef [ 'unique keys' ]);
2015-02-19 17:59:28 +00:00
$schema -> ensureTable ( $table , $schemadef );
echo " DONE. \n " ;
$classname = ucfirst ( $table );
$tablefix = new $classname ;
// urlhash is hash('sha256', $url) in the File table
2015-02-19 18:40:36 +00:00
echo " Updating urlhash fields in $table table... " ;
2019-09-11 12:14:40 +01:00
switch ( common_config ( 'db' , 'type' )) {
case 'pgsql' :
$url_sha256 = 'encode(sha256(CAST("url" AS bytea)), \'hex\')' ;
break ;
case 'mysql' :
$url_sha256 = 'sha2(`url`, 256)' ;
break ;
default :
throw new ServerException ( 'Unknown DB type selected.' );
}
2019-06-09 23:26:48 +01:00
$tablefix -> query ( sprintf (
2020-07-27 17:10:33 +01:00
'UPDATE %1$s SET urlhash = %2$s, modified = CURRENT_TIMESTAMP;' ,
2019-09-11 06:15:16 +01:00
$tablefix -> escapedTableName (),
2019-09-11 12:14:40 +01:00
$url_sha256
2019-06-09 23:26:48 +01:00
));
2015-02-19 17:59:28 +00:00
echo " DONE. \n " ;
echo " Resuming core schema upgrade... " ;
}
2016-02-02 23:22:18 +00:00
}