| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  | <?php | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if (!defined('GNUSOCIAL')) { exit(1); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // FIXME: To support remote video/whatever files, this plugin needs reworking.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class StoreRemoteMediaPlugin extends Plugin | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2019-06-03 01:56:52 +01:00
										 |  |  |     const PLUGIN_VERSION = '2.0.0'; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-07-06 17:26:15 +01:00
										 |  |  |     // settings which can be set in config.php with addPlugin('Embed', array('param'=>'value', ...));
 | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  |     // WARNING, these are _regexps_ (slashes added later). Always escape your dots and end your strings
 | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  |     public $domain_whitelist = [ | 
					
						
							|  |  |  |         // hostname             => service provider
 | 
					
						
							|  |  |  |         '^i\d*\.ytimg\.com$'    => 'YouTube', | 
					
						
							|  |  |  |         '^i\d*\.vimeocdn\.com$' => 'Vimeo', | 
					
						
							|  |  |  |         ]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     public $append_whitelist = [];    // fill this array as domain_whitelist to add more trusted sources
 | 
					
						
							|  |  |  |     public $check_whitelist  = false; // security/abuse precaution
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     public $domain_blacklist = []; | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |     public $check_blacklist = false; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  |     public $max_image_bytes = 10 * 1024 * 1024;  // 10MiB max image size by default
 | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  |     protected $imgData = []; | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     // these should be declared protected everywhere
 | 
					
						
							|  |  |  |     public function initialize() | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         parent::initialize(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $this->domain_whitelist = array_merge($this->domain_whitelist, $this->append_whitelist); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     public function onCreateFileImageThumbnailSource(File $file, &$imgPath, $media=null) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         // If we are on a private node, we won't do any remote calls (just as a precaution until
 | 
					
						
							|  |  |  |         // we can configure this from config.php for the private nodes)
 | 
					
						
							|  |  |  |         if (common_config('site', 'private')) { | 
					
						
							|  |  |  |             return true; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if ($media !== 'image') { | 
					
						
							|  |  |  |             return true; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // If there is a local filename, it is either a local file already or has already been downloaded.
 | 
					
						
							|  |  |  |         if (!empty($file->filename)) { | 
					
						
							|  |  |  |             return true; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |         $remoteUrl = $file->getUrl(); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-06-28 00:18:27 +01:00
										 |  |  |         if (empty($remoteUrl)) { | 
					
						
							|  |  |  |             return true; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |         if (!$this->checkWhiteList($remoteUrl) || | 
					
						
							|  |  |  |             !$this->checkBlackList($remoteUrl)) { | 
					
						
							|  |  |  | 		    return true; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  |         // Relative URL, something's off
 | 
					
						
							|  |  |  |         if (empty(parse_url($remoteUrl, PHP_URL_HOST))) { | 
					
						
							|  |  |  |             common_err("StoreRemoteMedia found a url without host (\"{$remoteUrl}\") for file with id = {$file->id}"); | 
					
						
							|  |  |  |             return true; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-07 01:54:37 +01:00
										 |  |  |         try { | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |             $http = new HTTPClient(); | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  |             common_debug(sprintf('Performing HEAD request for remote file id==%u to avoid '. | 
					
						
							|  |  |  |                                  'unnecessarily downloading too large files. URL: %s', | 
					
						
							|  |  |  |                                  $file->getID(), $remoteUrl)); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-07-11 23:49:16 +01:00
										 |  |  |             $url = $remoteUrl; | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |             $head = $http->head($remoteUrl); | 
					
						
							|  |  |  |             $remoteUrl = $head->getEffectiveUrl();   // to avoid going through redirects again
 | 
					
						
							| 
									
										
										
										
											2019-07-11 23:49:16 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |             if (empty($remoteUrl)) { | 
					
						
							|  |  |  |                 common_log(LOG_ERR, "URL after redirects is somehow empty, for URL {$url}"); | 
					
						
							|  |  |  |                 return true; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |             if (!$this->checkBlackList($remoteUrl)) { | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  |                 common_log(LOG_WARN, sprintf('%s: Non-blacklisted URL %s redirected to blacklisted URL %s', | 
					
						
							|  |  |  |                                              __CLASS__, $file->getUrl(), $remoteUrl)); | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |                 return true; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             $headers = $head->getHeader(); | 
					
						
							| 
									
										
										
										
											2019-07-11 23:49:16 +01:00
										 |  |  |             $headers = array_change_key_case($headers, CASE_LOWER); | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  |             $filesize = isset($headers['content-length']) ?: $file->getSize(); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |             if (empty($filesize)) { | 
					
						
							|  |  |  |                 // file size not specified on remote server
 | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  |                 common_debug(sprintf('%s: Ignoring remote media because we did not get a ' . | 
					
						
							|  |  |  |                                      'content length for file id==%u', __CLASS__, $file->getID())); | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |                 return true; | 
					
						
							|  |  |  |             } elseif ($filesize > $this->max_image_bytes) { | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  |                 //FIXME: When we perhaps start fetching videos etc. we'll need to
 | 
					
						
							|  |  |  |                 // differentiate max_image_bytes from that...
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |                 // file too big according to plugin configuration
 | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  |                 common_debug(sprintf('%s: Skipping remote media because content length (%u) ' . | 
					
						
							|  |  |  |                                      'is larger than plugin configured max_image_bytes (%u) ' . | 
					
						
							|  |  |  |                                      'for file id==%u', __CLASS__, intval($filesize), | 
					
						
							|  |  |  |                                      $this->max_image_bytes, $file->getID())); | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |                 return true; | 
					
						
							|  |  |  |             } elseif ($filesize > common_config('attachments', 'file_quota')) { | 
					
						
							|  |  |  |                 // file too big according to site configuration
 | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  |                 common_debug(sprintf('%s: Skipping remote media because content length (%u) ' . | 
					
						
							|  |  |  |                                      'is larger than file_quota (%u) for file id==%u', | 
					
						
							|  |  |  |                                      __CLASS__, intval($filesize), | 
					
						
							|  |  |  |                                      common_config('attachments', 'file_quota'), $file->getID())); | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |                 return true; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             // Then we download the file to memory and test whether it's actually an image file
 | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  |             common_debug(sprintf('Downloading remote file id=%u (should be size %u) ' . | 
					
						
							|  |  |  |                                  'with effective URL: %s', $file->getID(), $filesize, _ve($remoteUrl))); | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |             $imgData = HTTPClient::quickGet($remoteUrl); | 
					
						
							| 
									
										
										
										
											2016-02-07 01:54:37 +01:00
										 |  |  |         } catch (HTTP_Request2_ConnectionException $e) { | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  |             common_log(LOG_ERR, __CLASS__.': '._ve(get_class($e)).' on URL: ' . | 
					
						
							|  |  |  |                        _ve($file->getUrl()).' threw exception: '.$e->getMessage()); | 
					
						
							| 
									
										
										
										
											2016-02-07 01:54:37 +01:00
										 |  |  |             return true; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  |         $info = @getimagesizefromstring($imgData); | 
					
						
							|  |  |  |         if ($info === false) { | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |             throw new UnsupportedMediaException(_('Remote file format was not identified as an image.'), $remoteUrl); | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  |         } elseif (!$info[0] || !$info[1]) { | 
					
						
							|  |  |  |             throw new UnsupportedMediaException(_('Image file had impossible geometry (0 width or height)')); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $filehash = hash(File::FILEHASH_ALG, $imgData); | 
					
						
							|  |  |  |         try { | 
					
						
							|  |  |  |             // Exception will be thrown before $file is set to anything, so old $file value will be kept
 | 
					
						
							|  |  |  |             $file = File::getByHash($filehash); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             //FIXME: Add some code so we don't have to store duplicate File rows for same hash files.
 | 
					
						
							|  |  |  |         } catch (NoResultException $e) { | 
					
						
							| 
									
										
										
										
											2019-07-11 23:49:16 +01:00
										 |  |  |             $original_name = HTTPClient::get_filename($remoteUrl, $headers); | 
					
						
							|  |  |  |             $filename = MediaFile::encodeFilename($original_name, $filehash); | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  |             $fullpath = File::path($filename); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-07-11 23:49:16 +01:00
										 |  |  |             common_debug("StoreRemoteMedia retrieved url {$remoteUrl} for file with id={$file->id} " . | 
					
						
							|  |  |  |                          "and will store in {$fullpath}"); | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  |             // Write the file to disk if it doesn't exist yet. Throw Exception on failure.
 | 
					
						
							| 
									
										
										
										
											2019-07-11 23:49:16 +01:00
										 |  |  |             if ((!file_exists($fullpath) || substr($fullpath, 0, strlen(INSTALLDIR)) != INSTALLDIR) && | 
					
						
							|  |  |  |                 file_put_contents($fullpath, $imgData) === false) { | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  |                 throw new ServerException(_('Could not write downloaded file to disk.')); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             // Updated our database for the file record
 | 
					
						
							|  |  |  |             $orig = clone($file); | 
					
						
							|  |  |  |             $file->filehash = $filehash; | 
					
						
							|  |  |  |             $file->filename = $filename; | 
					
						
							|  |  |  |             $file->width = $info[0];    // array indexes documented on php.net:
 | 
					
						
							|  |  |  |             $file->height = $info[1];   // https://php.net/manual/en/function.getimagesize.php
 | 
					
						
							|  |  |  |             // Throws exception on failure.
 | 
					
						
							| 
									
										
										
										
											2016-01-28 16:42:59 +01:00
										 |  |  |             $file->updateWithKeys($orig); | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  |         // Get rid of the file from memory
 | 
					
						
							|  |  |  |         unset($imgData); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-06-30 13:36:33 +01:00
										 |  |  |         // Output
 | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  |         $imgPath = $file->getPath(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return false; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |      * @return boolean          true if given url passes blacklist check | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  |      */ | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |     protected function checkBlackList($url) | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |         if (!$this->check_blacklist) { | 
					
						
							|  |  |  |             return true; | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |         $host = parse_url($url, PHP_URL_HOST); | 
					
						
							|  |  |  |         foreach ($this->domain_blacklist as $regex => $provider) { | 
					
						
							|  |  |  |             if (preg_match("/$regex/", $host)) { | 
					
						
							|  |  |  |                 return false; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return true; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |     /*** | 
					
						
							|  |  |  |      * @return boolean          true if given url passes whitelist check | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected function checkWhiteList($url) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         if (!$this->check_whitelist) { | 
					
						
							|  |  |  |             return true; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  |         $host = parse_url($url, PHP_URL_HOST); | 
					
						
							|  |  |  |         foreach ($this->domain_whitelist as $regex => $provider) { | 
					
						
							|  |  |  |             if (preg_match("/$regex/", $host)) { | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |                 return true; | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-06-24 15:56:14 +02:00
										 |  |  |         return false; | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     public function onPluginVersion(array &$versions) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $versions[] = array('name' => 'StoreRemoteMedia', | 
					
						
							| 
									
										
										
										
											2019-06-03 01:56:52 +01:00
										 |  |  |                             'version' => self::PLUGIN_VERSION, | 
					
						
							| 
									
										
										
										
											2015-10-01 22:18:47 +02:00
										 |  |  |                             'author' => 'Mikael Nordfeldth', | 
					
						
							|  |  |  |                             'homepage' => 'https://gnu.io/', | 
					
						
							|  |  |  |                             'description' => | 
					
						
							|  |  |  |                             // TRANS: Plugin description.
 | 
					
						
							|  |  |  |                             _m('Plugin for downloading remotely attached files to local server.')); | 
					
						
							|  |  |  |         return true; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } |