forked from GNUsocial/gnu-social
		
	
		
			
				
	
	
		
			497 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			497 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| 
 | |
| // {{{ License
 | |
| // This file is part of GNU social - https://www.gnu.org/software/social
 | |
| //
 | |
| // GNU social is free software: you can redistribute it and/or modify
 | |
| // it under the terms of the GNU Affero General Public License as published by
 | |
| // the Free Software Foundation, either version 3 of the License, or
 | |
| // (at your option) any later version.
 | |
| //
 | |
| // GNU social is distributed in the hope that it will be useful,
 | |
| // but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
| // GNU Affero General Public License for more details.
 | |
| //
 | |
| // You should have received a copy of the GNU Affero General Public License
 | |
| // along with GNU social.  If not, see <http://www.gnu.org/licenses/>.
 | |
| // }}}
 | |
| 
 | |
| /**
 | |
|  * OEmbed and OpenGraph implementation for GNU social
 | |
|  *
 | |
|  * @package   GNUsocial
 | |
|  *
 | |
|  * @author    Mikael Nordfeldth
 | |
|  * @author    Stephen Paul Weber
 | |
|  * @author    hannes
 | |
|  * @author    Mikael Nordfeldth
 | |
|  * @author    Miguel Dantas
 | |
|  * @author    Diogo Peralta Cordeiro <mail@diogo.site>
 | |
|  * @authir    Hugo Sales <hugo@hsal.es>
 | |
|  *
 | |
|  * @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
 | |
|  * @license   https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
 | |
|  */
 | |
| 
 | |
| namespace Plugin\Embed;
 | |
| 
 | |
| use App\Core\Cache;
 | |
| use App\Core\DB\DB;
 | |
| use App\Core\Event;
 | |
| use App\Core\GSFile;
 | |
| use App\Core\HTTPClient;
 | |
| use App\Core\Log;
 | |
| use App\Core\Modules\Plugin;
 | |
| use App\Core\Router\RouteLoader;
 | |
| use App\Core\Router\Router;
 | |
| use App\Entity\Attachment;
 | |
| use App\Entity\AttachmentThumbnail;
 | |
| use App\Util\Common;
 | |
| use App\Util\Exception\DuplicateFoundException;
 | |
| use App\Util\Exception\NotFoundException;
 | |
| use App\Util\Formatting;
 | |
| use App\Util\TemporaryFile;
 | |
| use Embed\Embed as LibEmbed;
 | |
| use Exception;
 | |
| use Symfony\Component\HttpFoundation\Request;
 | |
| 
 | |
| /**
 | |
|  * Base class for the Embed plugin that does most of the heavy lifting to get
 | |
|  * and display representations for remote content.
 | |
|  *
 | |
|  * @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
 | |
|  * @license   https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
 | |
|  */
 | |
| class Embed extends Plugin
 | |
| {
 | |
|     /**
 | |
|      *  Settings which can be set in social.local.yaml
 | |
|      *  WARNING, these are _regexps_ (slashes added later). Always escape your dots and end ('$') your strings
 | |
|      */
 | |
|     public $domain_allowlist = [
 | |
|         // hostname => service provider
 | |
|         '.*' => '', // Default to allowing any host
 | |
|     ];
 | |
| 
 | |
|     /**
 | |
|      * This code executes when GNU social creates the page routing, and we hook
 | |
|      * on this event to add our action handler for Embed.
 | |
|      *
 | |
|      * @param $m URLMapper the router that was initialized.
 | |
|      *
 | |
|      * @return bool true if successful, the exception object if it isn't.
 | |
|      * @throws Exception
 | |
|      *
 | |
|      */
 | |
|     public function onAddRoute(RouteLoader $m): bool
 | |
|     {
 | |
|         $m->connect('oembed', 'main/oembed', Controller\Embed::class);
 | |
|         $m->connect('embed', 'main/embed', Controller\Embed::class);
 | |
|         return Event::next;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Insert oembed and opengraph tags in all HTML head elements
 | |
|      */
 | |
|     public function onShowHeadElements(Request $request, array &$result)
 | |
|     {
 | |
|         $matches = [];
 | |
|         preg_match(',/?([^/]+)/?(.*),', $request->getPathInfo(), $matches);
 | |
|         switch ($matches[1]) {
 | |
|             case 'attachment':
 | |
|                 $url = "{$matches[1]}/{$matches[2]}";
 | |
|                 break;
 | |
|         }
 | |
| 
 | |
|         if (isset($url)) {
 | |
|             foreach (['xml', 'json'] as $format) {
 | |
|                 $result[] = [
 | |
|                     'link' => [
 | |
|                         'rel' => 'alternate',
 | |
|                         'type' => "application/{$format}+oembed",
 | |
|                         'href' => Router::url('embed', ['format' => $format, 'url' => $url]),
 | |
|                         'title' => 'oEmbed',
 | |
|                     ],];
 | |
|             }
 | |
|         }
 | |
|         return Event::next;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Save embedding information for an Attachment, if applicable.
 | |
|      *
 | |
|      * Normally this event is called through File::saveNew()
 | |
|      *
 | |
|      * @param Attachment $attachment The newly inserted Attachment object.
 | |
|      *
 | |
|      * @return bool success
 | |
|      */
 | |
|     public function onAttachmentStoreNew(Attachment $attachment): bool
 | |
|     {
 | |
|         try {
 | |
|             DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]);
 | |
|         } catch (NotFoundException) {
 | |
|             if ($attachment->hasRemoteUrl() && $attachment->hasMimetype()) {
 | |
|                 $mimetype = $attachment->getMimetype();
 | |
|                 if (Formatting::startsWith($mimetype, 'text/html') || Formatting::startsWith($mimetype, 'application/xhtml+xml')) {
 | |
|                     try {
 | |
|                         $embed_data = $this->getEmbed($attachment->getRemoteUrl(), $attachment);
 | |
|                         $embed_data['attachment_id'] = $attachment->getId();
 | |
|                         DB::persist(Entity\AttachmentEmbed::create($embed_data));
 | |
|                         DB::flush();
 | |
|                     } catch (Exception $e) {
 | |
|                         Log::warning($e);
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         } catch (DuplicateFoundException) {
 | |
|             Log::warning("Strangely, an attachment_embed object exists for new file {$attachment->getID()}");
 | |
|         }
 | |
|         return Event::next;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Replace enclosure representation of an attachment with the data from embed
 | |
|      */
 | |
|     public function onAttachmentFileInfo(int $attachment_id, ?array &$enclosure)
 | |
|     {
 | |
|         try {
 | |
|             $embed = DB::findOneBy('attachment_embed', ['attachment_id' => $attachment_id]);
 | |
|         } catch (NotFoundException) {
 | |
|             return Event::next;
 | |
|         }
 | |
| 
 | |
|         // We know about this attachment, so we 'own' it, but know
 | |
|         // that it doesn't have an image
 | |
|         if (!$embed->isImage()) {
 | |
|             $enclosure = null;
 | |
|             return Event::stop;
 | |
|         }
 | |
| 
 | |
|         $enclosure = [
 | |
|             'filepath' => $embed->getFilepath(),
 | |
|             'mimetype' => $embed->getMimetype(),
 | |
|             'title' => $embed->getTitle(),
 | |
|             'width' => $embed->getWidth(),
 | |
|             'height' => $embed->getHeight(),
 | |
|             'url' => $embed->getMediaUrl(),
 | |
|         ];
 | |
| 
 | |
|         return Event::stop;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Show this attachment enhanced with the corresponing Embed data, if available
 | |
|      */
 | |
|     public function onShowAttachment(Attachment $attachment, array &$res)
 | |
|     {
 | |
|         try {
 | |
|             $embed = Cache::get('attachment-embed-' . $attachment->getId(),
 | |
|                 fn() => DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]));
 | |
|         } catch (DuplicateFoundException $e) {
 | |
|             Log::waring($e);
 | |
|             return Event::next;
 | |
|         } catch (NotFoundException) {
 | |
|             return Event::next;
 | |
|         }
 | |
|         if (is_null($embed) && empty($embed->getAuthorName()) && empty($embed->getProvider())) {
 | |
|             Log::debug('Embed doesn\'t have a representation for the attachment #' . $attachment->getId());
 | |
|             return Event::next;
 | |
|         }
 | |
| 
 | |
|         $attributes = $embed->getImageHTMLAttributes(['class' => 'u-photo embed']);
 | |
| 
 | |
|         $res[] = Formatting::twigRenderString(<<<END
 | |
| <article class="h-entry embed">
 | |
|     <header>
 | |
|         {% if attributes != false %}
 | |
|             <img class="u-photo embed" width="{{attributes['width']}}" height="{{attributes['height']}}" src="{{attributes['src']}}" />
 | |
|         {% endif %}
 | |
|         <h5 class="p-name embed">
 | |
|              <a class="u-url" href="{{attachment.getRemoteUrl()}}">{{embed.getTitle() | escape}}</a>
 | |
|         </h5>
 | |
|         <div class="p-author embed">
 | |
|              {% if embed.getAuthorName() is not null %}
 | |
|                   <div class="fn vcard author">
 | |
|                       {% if embed.getAuthorUrl() is null %}
 | |
|                            <p>{{embed.getAuthorName()}}</p>
 | |
|                       {% else %}
 | |
|                            <a href="{{embed.getAuthorUrl()}}" class="url">{{embed.getAuthorName()}}</a>
 | |
|                       {% endif %}
 | |
|                   </div>
 | |
|              {% endif %}
 | |
|              {% if embed.getProvider() is not null %}
 | |
|                   <div class="fn vcard">
 | |
|                       {% if embed.getProviderUrl() is null %}
 | |
|                           <p>{{embed.getProvider()}}</p>
 | |
|                       {% else %}
 | |
|                           <a href="{{embed.getProviderUrl()}}" class="url">{{embed.getProvider()}}</a>
 | |
|                       {% endif %}
 | |
|                   </div>
 | |
|              {% endif %}
 | |
|         </div>
 | |
|     </header>
 | |
|     <div class="p-summary embed">
 | |
|         {{ embed.getHtml() | escape }}
 | |
|     </div>
 | |
| </article>
 | |
| END, ['embed' => $embed, 'attributes' => $attributes, 'attachment' => $attachment]);
 | |
| 
 | |
|         return Event::stop;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * @return bool false on no check made, provider name on success
 | |
|      * @return string|false on no check made, provider name on success
 | |
|      *
 | |
|      * @throws ServerException if check is made but fails
 | |
|      *
 | |
|      */
 | |
|     protected function checkAllowlist(string $url): string|bool
 | |
|     {
 | |
|         if ($this->check_allowlist ?? false) {
 | |
|             return false;   // indicates "no check made"
 | |
|         }
 | |
| 
 | |
|         $host = parse_url($url, PHP_URL_HOST);
 | |
|         foreach ($this->domain_allowlist as $regex => $provider) {
 | |
|             if (preg_match("/{$regex}/", $host)) {
 | |
|                 return $provider;    // we trust this source, return provider name
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         throw new ServerException(_m('Domain not in remote thumbnail source allowlist: {host}', ['host' => $host]));
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Check the file size of a remote file using a HEAD request and checking
 | |
|      * the content-length variable returned.  This isn't 100% foolproof but is
 | |
|      * reliable enough for our purposes.
 | |
|      *
 | |
|      * @param string $url
 | |
|      * @param array|null $headers - if we already made a request
 | |
|      *
 | |
|      * @return int|null the file size if it succeeds, false otherwise.
 | |
|      */
 | |
|     private function getRemoteFileSize(string $url, ?array $headers = null): ?int
 | |
|     {
 | |
|         try {
 | |
|             if ($headers === null) {
 | |
|                 if (!Common::isValidHttpUrl($url)) {
 | |
|                     Log::error('Invalid URL in Embed::getRemoteFileSize()');
 | |
|                     return false;
 | |
|                 }
 | |
|                 $head = HTTPClient::head($url);
 | |
|                 $headers = $head->getHeaders();
 | |
|                 $headers = array_change_key_case($headers, CASE_LOWER);
 | |
|             }
 | |
|             return $headers['content-length'][0] ?? false;
 | |
|         } catch (Exception $e) {
 | |
|             Loog::error($e);
 | |
|             return false;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * A private helper function that uses a HEAD request to check the mime type
 | |
|      * of a remote URL to see it it's an image.
 | |
|      *
 | |
|      * @param mixed $url
 | |
|      * @param null|mixed $headers
 | |
|      *
 | |
|      * @return bool true if the remote URL is an image, or false otherwise.
 | |
|      */
 | |
|     private function isRemoteImage(string $url, ?array $headers = null): bool
 | |
|     {
 | |
|         try {
 | |
|             if ($headers === null) {
 | |
|                 if (!Common::isValidHttpUrl($url)) {
 | |
|                     Log::error('Invalid URL in Embed::getRemoteFileSize()');
 | |
|                     return false;
 | |
|                 }
 | |
|                 $head = HTTPClient::head($url);
 | |
|                 $headers = $head->getHeaders();
 | |
|                 $headers = array_change_key_case($headers, CASE_LOWER);
 | |
|             }
 | |
|             return !empty($headers['content-type']) && GSFile::mimetypeMajor($headers['content-type'][0]) === 'image';
 | |
|         } catch (Exception $e) {
 | |
|             Log::error($e);
 | |
|             return false;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Validate that $imgData is a valid image, place it in it's folder and resize
 | |
|      *
 | |
|      * @param $imgData - The image data to validate
 | |
|      * @param null|array $headers - The headers possible previous request to $url
 | |
|      */
 | |
|     protected function validateAndWriteImage($imgData, ?array $headers = null): array
 | |
|     {
 | |
|         $file = new TemporaryFile();
 | |
|         $file->write($imgData);
 | |
| 
 | |
|         Event::handle('HashFile', [$file->getRealPath(), &$hash]);
 | |
|         $filepath = Common::config('storage', 'dir') . "embed/{$hash}" . Common::config('thumbnail', 'extension');
 | |
|         $width = Common::config('plugin_embed', 'width');
 | |
|         $height = Common::config('plugin_embed', 'height');
 | |
|         $smart_crop = Common::config('plugin_embed', 'smart_crop');
 | |
| 
 | |
|         Event::handle('ResizeImagePath', [$file->getRealPath(), $filepath, &$width, &$height, $smart_crop, &$mimetype]);
 | |
| 
 | |
|         unset($file);
 | |
| 
 | |
|         if (!is_null($headers) && array_key_exists('content-disposition', $headers) && preg_match('/^.+; filename="(.+?)"$/', $headers['content-disposition'][0], $matches) === 1) {
 | |
|             $original_name = $matches[1];
 | |
|         }
 | |
| 
 | |
|         return [$filepath, $width, $height, $original_name ?? null, $mimetype];
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Create and store a thumbnail representation of a remote image
 | |
|      *
 | |
|      * @param Attachment $attachment
 | |
|      * @param string $media_url URL for the actual media representation
 | |
|      * @return array|bool
 | |
|      */
 | |
|     protected function storeRemoteThumbnail(Attachment $attachment, string $media_url): array|bool
 | |
|     {
 | |
|         if ($attachment->hasFilename() && file_exists($attachment->getPath())) {
 | |
|             throw new AlreadyFulfilledException(_m('A thumbnail seems to already exist for remote file with id=={id}', ['id' => $attachment->getId()]));
 | |
|         }
 | |
| 
 | |
|         if (Formatting::startsWith($media_url, 'file://')) {
 | |
|             $filename = Formatting::removePrefix($media_url, 'file://');
 | |
|             $info = getimagesize($filename);
 | |
|             $filename = basename($filename);
 | |
|             $width = $info[0];
 | |
|             $height = $info[1];
 | |
|         } else {
 | |
|             $this->checkAllowlist($media_url);
 | |
|             $head = HTTPClient::head($media_url);
 | |
|             $headers = $head->getHeaders();
 | |
|             $headers = array_change_key_case($headers, CASE_LOWER);
 | |
| 
 | |
|             try {
 | |
|                 $is_image = $this->isRemoteImage($media_url, $headers);
 | |
|                 if ($is_image == true) {
 | |
|                     $file_size = $this->getRemoteFileSize($media_url, $headers);
 | |
|                     $max_size = Common::config('attachments', 'file_quota');
 | |
|                     if (($file_size != false) && ($file_size > $max_size)) {
 | |
|                         throw new \Exception("Wanted to store remote thumbnail of size {$file_size} but the upload limit is {$max_size} so we aborted.");
 | |
|                     }
 | |
|                 } else {
 | |
|                     return false;
 | |
|                 }
 | |
|             } catch (Exception $err) {
 | |
|                 Log::debug('Could not determine size of remote image, aborted local storage.');
 | |
|                 throw $err;
 | |
|             }
 | |
| 
 | |
|             // First we download the file to memory and test whether it's actually an image file
 | |
|             Log::debug('Downloading remote thumbnail for file id==' . $attachment->getId() . " with thumbnail URL: {$media_url}");
 | |
| 
 | |
|             try {
 | |
|                 $imgData = HTTPClient::get($media_url)->getContent();
 | |
|                 if (isset($imgData)) {
 | |
|                     [$filepath, $width, $height, $original_name, $mimetype] = $this->validateAndWriteImage($imgData, $headers);
 | |
|                 } else {
 | |
|                     throw new UnsupportedMediaException(_m('HTTPClient returned an empty result'));
 | |
|                 }
 | |
|             } catch (UnsupportedMediaException $e) {
 | |
|                 // Couldn't find anything that looks like an image, nothing to do
 | |
|                 Log::debug($e);
 | |
|                 return false;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         DB::persist(AttachmentThumbnail::create(['attachment_id' => $attachment->getId(), 'width' => $width, 'height' => $height]));
 | |
|         DB::flush();
 | |
| 
 | |
|         return [$filepath, $width, $height, $original_name, $mimetype];
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Perform an oEmbed or OpenGraph lookup for the given $url.
 | |
|      *
 | |
|      * Some known hosts are allowlisted with API endpoints where we
 | |
|      * know they exist but autodiscovery data isn't available.
 | |
|      *
 | |
|      * Throws exceptions on failure.
 | |
|      *
 | |
|      * @param string $url
 | |
|      * @param Attachment $attachment
 | |
|      * @return array
 | |
|      */
 | |
|     public function getEmbed(string $url, Attachment $attachment): array
 | |
|     {
 | |
|         Log::info('Checking for remote URL metadata for ' . $url);
 | |
| 
 | |
|         try {
 | |
|             Log::info("Trying to find Embed data for {$url} with 'oscarotero/Embed'");
 | |
|             $embed = new LibEmbed();
 | |
|             $info = $embed->get($url);
 | |
|             $metadata['title'] = $info->title;
 | |
|             $metadata['html'] = $info->description;
 | |
|             $metadata['author_name'] = $info->authorName;
 | |
|             $metadata['author_url'] = $info->authorUrl;
 | |
|             $metadata['provider_name'] = $info->providerName;
 | |
|             $metadata['provider_url'] = $info->providerUrl;
 | |
| 
 | |
|             if (!is_null($info->image)) {
 | |
|                 $image_url = (string)$info->image;
 | |
| 
 | |
|                 if (Formatting::startsWith($image_url, 'data')) {
 | |
|                     // Inline image
 | |
|                     $imgData = base64_decode(substr($info->image, stripos($info->image, 'base64,') + 7));
 | |
|                     [$filepath, $width, $height, $original_name, $mimetype] = $this->validateAndWriteImage($imgData);
 | |
|                 } else {
 | |
|                     [$filepath, $width, $height, $original_name, $mimetype] = $this->storeRemoteThumbnail($attachment, $image_url);
 | |
|                 }
 | |
|                 $metadata['width'] = $width;
 | |
|                 $metadata['height'] = $height;
 | |
|                 $metadata['mimetype'] = $mimetype;
 | |
|                 $metadata['media_url'] = $image_url;
 | |
|                 $metadata['filename'] = Formatting::removePrefix($filepath, Common::config('storage', 'dir'));
 | |
|             }
 | |
|         } catch (Exception $e) {
 | |
|             Log::info("Failed to find Embed data for {$url} with 'oscarotero/Embed', got exception: " . $e);
 | |
|         }
 | |
| 
 | |
|         $metadata = self::normalize($metadata);
 | |
|         $attachment->setTitle($metadata['title']);
 | |
|         return $metadata;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Normalize fetched info.
 | |
|      */
 | |
|     public static function normalize(array $data): array
 | |
|     {
 | |
|         if (isset($metadata['url'])) {
 | |
|             // sometimes sites serve the path, not the full URL, for images
 | |
|             // let's "be liberal in what you accept from others"!
 | |
|             // add protocol and host if the thumbnail_url starts with /
 | |
|             if ($metadata['url'][0] == '/') {
 | |
|                 $thumbnail_url_parsed = parse_url($metadata['url']);
 | |
|                 $metadata['url'] = "{$thumbnail_url_parsed['scheme']}://{$thumbnail_url_parsed['host']}{$metadata['url']}";
 | |
|             }
 | |
| 
 | |
|             // Some wordpress opengraph implementations sometimes return a white blank image
 | |
|             // no need for us to save that!
 | |
|             if ($metadata['url'] == 'https://s0.wp.com/i/blank.jpg') {
 | |
|                 $metadata['url'] = null;
 | |
|             }
 | |
| 
 | |
|             if (!isset($data['width'])) {
 | |
|                 $data['width'] = Common::config('plugin_embed', 'width');
 | |
|                 $data['height'] = Common::config('plugin_embed', 'height');
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         return $data;
 | |
|     }
 | |
| }
 |