. // }}} /** * OEmbed and OpenGraph implementation for GNU social * * @package GNUsocial * * @author Mikael Nordfeldth * @author Stephen Paul Weber * @author hannes * @author Mikael Nordfeldth * @author Miguel Dantas * @author Hugo Sales * @author Diogo Peralta Cordeiro * @copyright 2014-2022 Free Software Foundation, Inc http://www.fsf.org * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later */ namespace Plugin\Embed; use App\Core\Cache; use App\Core\DB; use App\Core\Event; use App\Core\GSFile; use App\Core\HTTPClient; use function App\Core\I18n\_m; use App\Core\Log; use App\Core\Modules\Plugin; use App\Core\Router; use App\Entity\Note; use App\Util\Common; use App\Util\Exception\ClientException; use App\Util\Exception\DuplicateFoundException; use App\Util\Exception\NotFoundException; use App\Util\Exception\ServerException; use App\Util\Formatting; use App\Util\TemporaryFile; use Component\Attachment\Entity\Attachment; use Component\Link\Entity\Link; use Embed\Embed as LibEmbed; use Exception; use Symfony\Component\HttpFoundation\Request; use Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface; use Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface; use Symfony\Contracts\HttpClient\Exception\ServerExceptionInterface; use Symfony\Contracts\HttpClient\Exception\TransportExceptionInterface; /** * Base class for the Embed plugin that does most of the heavy lifting to get * and display representations for remote content. * * @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later */ class Embed extends Plugin { public function version(): string { return '3.0.1'; } /** * Settings which can be set in social.local.yaml * WARNING, these are _regexps_ (slashes added later). Always escape your dots and end ('$') your strings */ public bool $check_whitelist = false; public bool $check_blacklist = false; public array $domain_whitelist = [ // hostname '.*', // Default to allowing any host ]; public array $domain_blacklist = []; // Whether to maintain a copy of the original media or only a thumbnail of it public bool $store_image = true; public ?int $thumbnail_width; public ?int $thumbnail_height; public ?int $max_size; public ?bool $smart_crop; // TODO: storeThumbs setting private function getMaxFileSize(): int { return min(Common::config('plugin_embed', 'max_file_size'), Common::config('attachments', 'file_quota')); } /** * This code executes when GNU social creates the page routing, and we hook * on this event to add our action handler for Embed. * * @param Router $m the router that was initialized * * @throws Exception */ public function onAddRoute(Router $m): bool { $m->connect('oembed', 'main/oembed', Controller\OEmbed::class); return Event::next; } /** * Insert oembed and opengraph tags in all HTML head elements */ public function onShowHeadElements(Request $request, array &$result): bool { $matches = []; preg_match(',/?([^/]+)/?(.*),', $request->getPathInfo(), $matches); $url = match ($matches[1]) { 'attachment' => "{$matches[1]}/{$matches[2]}", default => null, }; if (\is_null($url)) { foreach (['xml', 'json'] as $format) { $result[] = [ 'link' => [ 'rel' => 'alternate', 'type' => "application/{$format}+oembed", 'href' => Router::url('oembed', ['format' => $format, 'url' => $url]), 'title' => 'oEmbed', ], ]; } } return Event::next; } /** * Show this attachment enhanced with the corresponding Embed data, if available */ public function onViewLink(array $vars, array &$res): bool { $link = $vars['link']; try { $embed = Cache::get( 'attachment-embed-' . $link->getId(), fn () => DB::findOneBy('attachment_embed', ['link_id' => $link->getId()]), ); } catch (DuplicateFoundException $e) { Log::warning($e->getMessage()); return Event::next; } catch (NotFoundException) { Log::debug("Embed doesn't have a representation for the link id={$link->getId()}. Must have been stored before the plugin was enabled."); return Event::next; } $attributes = $embed->getImageHTMLAttributes(); $res[] = Formatting::twigRenderFile( 'embed/embedView.html.twig', ['embed' => $embed, 'attributes' => $attributes, 'link' => $link, 'note' => $vars['note']], ); return Event::stop; } /** * Save embedding information for an Attachment, if applicable. * * @throws DuplicateFoundException */ public function onNewLinkFromNote(Link $link, Note $note): bool { // Only handle text mime $mimetype = $link->getMimetype(); if (\is_null($mimetype) || !(Formatting::startsWith($mimetype, 'text/html') || Formatting::startsWith($mimetype, 'application/xhtml+xml'))) { return Event::next; } // Ignore if already handled $attachment_embed = DB::find('attachment_embed', ['link_id' => $link->getId()]); if (!\is_null($attachment_embed)) { return Event::next; } // If an attachment already exist, do not create an Embed for it. Some other plugin must have done things $attachment_to_link = DB::find('attachment_to_link', ['link_id' => $link->getId()]); if (!\is_null($attachment_to_link)) { $attachment_id = $attachment_to_link->getAttachmentId(); try { $attachment = DB::findOneBy('attachment', ['id' => $attachment_id]); $attachment->livesIncrementAndGet(); return Event::next; } catch (DuplicateFoundException|NotFoundException $e) { Log::error($e->getMessage(), context: [$e]); } } // Create an Embed representation for this URL $embed_data = $this->getEmbedLibMetadata($link->getUrl()); $embed_data['link_id'] = $link->getId(); $img_data = $this->downloadThumbnail($embed_data['thumbnail_url']); switch ($img_data) { case null: // URL isn't usable $embed_data['thumbnail_url'] = null; // no break case false: // Thumbnail isn't acceptable DB::persist($attachment = Attachment::create(['mimetype' => $link->getMimetype()])); Event::handle('AttachmentStoreNew', [&$attachment]); break; default: // String is valid image data $temp_file = new TemporaryFile(); $temp_file->write($img_data); try { $attachment = GSFile::storeFileAsAttachment($temp_file); $embed_data['attachment_id'] = $attachment->getId(); } catch (ClientException) { DB::persist($attachment = Attachment::create(['mimetype' => $link->getMimetype()])); Event::handle('AttachmentStoreNew', [&$attachment]); } } $embed_data['attachment_id'] = $attachment->getId(); DB::persist(Entity\AttachmentEmbed::create($embed_data)); DB::flush(); return Event::stop; } /** * @return bool true if allowed by the lists, false otherwise */ private function allowedLink(string $url): bool { $passed_whitelist = !$this->check_whitelist; $passed_blacklist = !$this->check_blacklist; if ($this->check_whitelist) { $passed_whitelist = false; // don't trust be default $host = parse_url($url, \PHP_URL_HOST); foreach ($this->domain_whitelist as $regex => $provider) { if (preg_match("/{$regex}/", $host)) { $passed_whitelist = true; // we trust this source } } } if ($this->check_blacklist) { // assume it passed by default $host = parse_url($url, \PHP_URL_HOST); foreach ($this->domain_blacklist as $regex => $provider) { if (preg_match("/{$regex}/", $host)) { $passed_blacklist = false; // we blocked this source } } } return $passed_whitelist && $passed_blacklist; } /** * Perform an oEmbed or OpenGraph lookup for the given $url. * * Some known hosts are whitelisted with API endpoints where we * know they exist but autodiscovery data isn't available. * * Throws exceptions on failure. */ private function getEmbedLibMetadata(string $url): array { Log::info("Trying to find Embed data for {$url} with 'oscarotero/Embed'"); $embed = new LibEmbed(); $info = $embed->get($url); $metadata['title'] = $info->title; $metadata['description'] = $info->description; $metadata['author_name'] = $info->authorName; $root_url = parse_url($url); $root_url = "{$root_url['scheme']}://{$root_url['host']}"; $metadata['author_url'] = $info->authorUrl ? (string) $info->authorUrl : $root_url; $metadata['provider_name'] = $info->providerName; $metadata['provider_url'] = (string) ($info->providerUrl ?? $metadata['author_name']); if (!\is_null($info->image)) { $thumbnail_url = (string) $info->image; } else { $thumbnail_url = (string) $info->favicon; } // Check thumbnail URL validity $metadata['thumbnail_url'] = $thumbnail_url; return self::normalizeEmbedLibMetadata($metadata); } /** * Normalize fetched info. */ private static function normalizeEmbedLibMetadata(array $metadata): array { if (isset($metadata['thumbnail_url'])) { // sometimes sites serve the path, not the full URL, for images // let's "be liberal in what you accept from others"! // add protocol and host if the thumbnail_url starts with / if ($metadata['thumbnail_url'][0] == '/') { $metadata['thumbnail_url'] = "{$metadata['provider_url']}{$metadata['thumbnail_url']}"; } // Some wordpress opengraph implementations sometimes return a white blank image // no need for us to save that! if ($metadata['thumbnail_url'] == 'https://s0.wp.com/i/blank.jpg') { $metadata['thumbnail_url'] = null; } } return $metadata; } /** * Private helper that: * - checks if given URL is valid and is in fact an image (basic test), returns null if not; * - checks if respects file quota and whitelist/blacklist, returns false if not; * - downloads the thumbnail, returns a string if successful. * * @param string $url URL to the remote thumbnail */ private function downloadThumbnail(string $url): bool|string|null { // Is this a valid URL? if (!Common::isValidHttpUrl($url)) { Log::debug("Invalid URL ({$url}) in Embed->downloadThumbnail."); return null; } // Is this URL trusted? if (!$this->allowedLink($url)) { Log::info("Blocked URL ({$url}) in Embed->downloadThumbnail."); return false; } // Validate if the URL really does point to a remote image $head = HTTPClient::head($url); try { $headers = $head->getHeaders(); } catch (ClientExceptionInterface|RedirectionExceptionInterface|ServerExceptionInterface|TransportExceptionInterface $e) { Log::debug('Embed->downloadThumbnail@HTTPHead->getHeaders: ' . $e->getMessage(), [$e]); return null; } if (empty($headers['content-type']) || GSFile::mimetypeMajor($headers['content-type'][0]) !== 'image') { Log::debug("URL ({$url}) doesn't point to an image (content-type: " . (!empty($headers['content-type'][0]) ? $headers['content-type'][0] : 'not available') . ') in Embed->downloadThumbnail.'); return null; } // Does it respect the file quota? $file_size = $headers['content-length'][0] ?? null; $max_size = $this->getMaxFileSize(); if (\is_null($file_size) || $file_size > $max_size) { Log::debug("Went to download remote thumbnail of size {$file_size} but the plugin's filesize limit is {$max_size} so we aborted in Embed->downloadThumbnail."); return false; } // Download and return the file Log::debug("Downloading remote thumbnail from URL: {$url} in Embed->downloadThumbnail."); return HTTPClient::get($url)->getContent(); } public function onAttachmentGetBestTitle(Attachment $attachment, Note $note, ?string &$title) { try { $embed = DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]); $title = $embed->getTitle(); return Event::stop; } catch (NotFoundException) { } return Event::next; } /** * Event raised when GNU social polls the plugin for information about it. * Adds this plugin's version information to $versions array * * @param array $versions inherited from parent * * @throws ServerException * * @return bool true hook value */ public function onPluginVersion(array &$versions): bool { $versions[] = [ 'name' => 'Embed', 'version' => $this->version(), 'author' => 'Mikael Nordfeldth, Hugo Sales, Diogo Peralta Cordeiro', 'homepage' => GNUSOCIAL_PROJECT_URL, 'description', // TRANS: Plugin description. => _m('Plugin for using and representing oEmbed, OpenGraph and other data.'), ]; return Event::next; } }