forked from GNUsocial/gnu-social
500 lines
19 KiB
PHP
500 lines
19 KiB
PHP
<?php
|
|
|
|
// {{{ License
|
|
// This file is part of GNU social - https://www.gnu.org/software/social
|
|
//
|
|
// GNU social is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// GNU social is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with GNU social. If not, see <http://www.gnu.org/licenses/>.
|
|
// }}}
|
|
|
|
/**
|
|
* OEmbed and OpenGraph implementation for GNU social
|
|
*
|
|
* @package GNUsocial
|
|
*
|
|
* @author Mikael Nordfeldth
|
|
* @author Stephen Paul Weber
|
|
* @author hannes
|
|
* @author Mikael Nordfeldth
|
|
* @author Miguel Dantas
|
|
* @author Diogo Peralta Cordeiro <mail@diogo.site>
|
|
* @authir Hugo Sales <hugo@hsal.es>
|
|
*
|
|
* @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
|
|
* @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
|
|
*/
|
|
|
|
namespace Plugin\Embed;
|
|
|
|
use App\Core\Cache;
|
|
use App\Core\DB\DB;
|
|
use App\Core\Event;
|
|
use App\Core\GSFile;
|
|
use App\Core\HTTPClient;
|
|
use App\Core\Log;
|
|
use App\Core\Modules\Plugin;
|
|
use App\Core\Router\RouteLoader;
|
|
use App\Core\Router\Router;
|
|
use App\Core\Security;
|
|
use App\Entity\Attachment;
|
|
use App\Entity\AttachmentThumbnail;
|
|
use App\Util\Common;
|
|
use App\Util\Exception\DuplicateFoundException;
|
|
use App\Util\Exception\NotFoundException;
|
|
use App\Util\Formatting;
|
|
use App\Util\TemporaryFile;
|
|
use Embed\Embed as LibEmbed;
|
|
use Symfony\Component\HttpFoundation\Request;
|
|
|
|
/**
|
|
* Base class for the Embed plugin that does most of the heavy lifting to get
|
|
* and display representations for remote content.
|
|
*
|
|
* @copyright 2014-2021 Free Software Foundation, Inc http://www.fsf.org
|
|
* @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
|
|
*/
|
|
class Embed extends Plugin
|
|
{
|
|
/**
|
|
* Settings which can be set in social.local.yaml
|
|
* WARNING, these are _regexps_ (slashes added later). Always escape your dots and end ('$') your strings
|
|
*/
|
|
public $domain_allowlist = [
|
|
// hostname => service provider
|
|
'.*' => '', // Default to allowing any host
|
|
];
|
|
|
|
/**
|
|
* This code executes when GNU social creates the page routing, and we hook
|
|
* on this event to add our action handler for Embed.
|
|
*
|
|
* @param $m URLMapper the router that was initialized.
|
|
*
|
|
* @throws Exception
|
|
*
|
|
* @return void true if successful, the exception object if it isn't.
|
|
*/
|
|
public function onAddRoute(RouteLoader $m)
|
|
{
|
|
$m->connect('oembed', 'main/oembed', Controller\Embed::class);
|
|
$m->connect('embed', 'main/embed', Controller\Embed::class);
|
|
return Event::next;
|
|
}
|
|
|
|
/**
|
|
* Insert oembed and opengraph tags in all HTML head elements
|
|
*/
|
|
public function onShowHeadElements(Request $request, array &$result)
|
|
{
|
|
$matches = [];
|
|
preg_match(',/?([^/]+)/?(.*),', $request->getPathInfo(), $matches);
|
|
switch ($matches[1]) {
|
|
case 'attachment':
|
|
$url = "{$matches[1]}/{$matches[2]}";
|
|
break;
|
|
}
|
|
|
|
if (isset($url)) {
|
|
foreach (['xml', 'json'] as $format) {
|
|
$result[] = [
|
|
'link' => [
|
|
'rel' => 'alternate',
|
|
'type' => "application/{$format}+oembed",
|
|
'href' => Router::url('embed', ['format' => $format, 'url' => $url]),
|
|
'title' => 'oEmbed',
|
|
], ];
|
|
}
|
|
}
|
|
return Event::next;
|
|
}
|
|
|
|
/**
|
|
* Save embedding information for an Attachment, if applicable.
|
|
*
|
|
* Normally this event is called through File::saveNew()
|
|
*
|
|
* @param Attachment $attachment The newly inserted Attachment object.
|
|
*
|
|
* @return bool success
|
|
*/
|
|
public function onAttachmentStoreNew(Attachment $attachment)
|
|
{
|
|
try {
|
|
DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]);
|
|
} catch (NotFoundException) {
|
|
} catch (DuplicateFoundException) {
|
|
Log::warning("Strangely, an attachment_embed object exists for new file {$attachment->getID()}");
|
|
return Event::next;
|
|
}
|
|
|
|
if ($attachment->hasRemoteUrl() && $attachment->hasMimetype()) {
|
|
$mimetype = $attachment->getMimetype();
|
|
if (Formatting::startsWith($mimetype, 'text/html') || Formatting::startsWith($mimetype, 'application/xhtml+xml')) {
|
|
try {
|
|
$embed_data = $this->getEmbed($attachment->getRemoteUrl(), $attachment);
|
|
$embed_data['attachment_id'] = $attachment->getId();
|
|
DB::persist(Entity\AttachmentEmbed::create($embed_data));
|
|
DB::flush();
|
|
} catch (Exception $e) {
|
|
Log::warning($e);
|
|
return Event::next;
|
|
}
|
|
}
|
|
}
|
|
return Event::next;
|
|
}
|
|
|
|
/**
|
|
* Replace enclosure representation of an attachment with the data from embed
|
|
*/
|
|
public function onAttachmentFileInfo(int $attachment_id, ?array &$enclosure)
|
|
{
|
|
try {
|
|
$embed = DB::findOneBy('attachment_embed', ['attachment_id' => $attachment_id]);
|
|
} catch (NotFoundException) {
|
|
return Event::next;
|
|
}
|
|
|
|
// We know about this attachment, so we 'own' it, but know
|
|
// that it doesn't have an image
|
|
if (!$embed->isImage()) {
|
|
$enclosure = null;
|
|
return Event::stop;
|
|
}
|
|
|
|
$enclosure = [
|
|
'filepath' => $embed->getFilepath(),
|
|
'mimetype' => $embed->getMimetype(),
|
|
'title' => $embed->getTitle(),
|
|
'width' => $embed->getWidth(),
|
|
'height' => $embed->getHeight(),
|
|
'url' => $embed->getUrl(),
|
|
];
|
|
|
|
return Event::stop;
|
|
}
|
|
|
|
/**
|
|
* Show this attachment enhanced with the corresponing Embed data, if available
|
|
*/
|
|
public function onShowAttachment(Attachment $attachment, array &$res)
|
|
{
|
|
try {
|
|
$embed = Cache::get('attachment-embed-' . $attachment->getId(),
|
|
fn () => DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]));
|
|
} catch (DuplicateFoundException $e) {
|
|
Log::waring($e);
|
|
return Event::next;
|
|
} catch (NotFoundException) {
|
|
return Event::next;
|
|
}
|
|
if (is_null($embed) && empty($embed->getAuthorName()) && empty($embed->getProvider())) {
|
|
Log::debug('Embed doesn\'t have a representation for the attachment #' . $attachment->getId());
|
|
return Event::next;
|
|
}
|
|
|
|
$width = Common::config('thumbnail', 'width');
|
|
$height = Common::config('thumbnail', 'height');
|
|
$smart_crop = Common::config('thumbnail', 'smart_crop');
|
|
$attributes = $embed->getImageHTMLAttributes(['class' => 'u-photo embed']);
|
|
|
|
$res[] = Formatting::twigRender(<<<END
|
|
<article class="h-entry embed">
|
|
<header>
|
|
{% if attributes != false %}
|
|
<img class="u-photo embed" width="{{attributes['width']}}" height="{{attributes['height']}}" src="{{attributes['src']}}" />
|
|
{% endif %}
|
|
<h5 class="p-name embed">
|
|
<a class="u-url" href="{{embed.getUrl()}}">{{embed.getTitle() | escape}}</a>
|
|
</h5>
|
|
<div class="p-author embed">
|
|
{% if embed.getAuthorName() is not null %}
|
|
<div class="fn vcard author">
|
|
{% if embed.getAuthorUrl() is null %}
|
|
<p>{{embed.getAuthorName()}}</p>
|
|
{% else %}
|
|
<a href="{{embed.getAuthorUrl()}}" class="url">{{embed.getAuthorName()}}</a>
|
|
{% endif %}
|
|
</div>
|
|
{% endif %}
|
|
{% if embed.getProvider() is not null %}
|
|
<div class="fn vcard">
|
|
{% if embed.getProviderUrl() is null %}
|
|
<p>{{embed.getProvider()}}</p>
|
|
{% else %}
|
|
<a href="{{embed.getProviderUrl()}}" class="url">{{embed.getProvider()}}</a>
|
|
{% endif %}
|
|
</div>
|
|
{% endif %}
|
|
</div>
|
|
</header>
|
|
<div class="p-summary embed">
|
|
{{ embed.getHtml() | escape }}
|
|
</div>
|
|
</article>
|
|
END, ['embed' => $embed, 'attributes' => $attributes]);
|
|
|
|
return Event::stop;
|
|
}
|
|
|
|
/**
|
|
* @throws ServerException if check is made but fails
|
|
*
|
|
* @return bool false on no check made, provider name on success
|
|
*/
|
|
protected function checkAllowlist(string $url)
|
|
{
|
|
if ($this->check_allowlist ?? false) {
|
|
return false; // indicates "no check made"
|
|
}
|
|
|
|
$host = parse_url($url, PHP_URL_HOST);
|
|
foreach ($this->domain_allowlist as $regex => $provider) {
|
|
if (preg_match("/{$regex}/", $host)) {
|
|
return $provider; // we trust this source, return provider name
|
|
}
|
|
}
|
|
|
|
throw new ServerException(_m('Domain not in remote thumbnail source allowlist: {host}', ['host' => $host]));
|
|
}
|
|
|
|
/**
|
|
* Check the file size of a remote file using a HEAD request and checking
|
|
* the content-length variable returned. This isn't 100% foolproof but is
|
|
* reliable enough for our purposes.
|
|
*
|
|
* @param string $url
|
|
* @param array $headers - if we already made a request
|
|
*
|
|
* @return bool|string the file size if it succeeds, false otherwise.
|
|
*/
|
|
private function getRemoteFileSize(string $url, ?array $headers = null): ?int
|
|
{
|
|
try {
|
|
if ($headers === null) {
|
|
if (!Common::isValidHttpUrl($url)) {
|
|
Log::error('Invalid URL in Embed::getRemoteFileSize()');
|
|
return false;
|
|
}
|
|
$head = HTTPClient::head($url);
|
|
$headers = $head->getHeaders();
|
|
$headers = array_change_key_case($headers, CASE_LOWER);
|
|
}
|
|
return $headers['content-length'][0] ?? false;
|
|
} catch (Exception $e) {
|
|
Loog::error($e);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* A private helper function that uses a HEAD request to check the mime type
|
|
* of a remote URL to see it it's an image.
|
|
*
|
|
* @param mixed $url
|
|
* @param null|mixed $headers
|
|
*
|
|
* @return bool true if the remote URL is an image, or false otherwise.
|
|
*/
|
|
private function isRemoteImage(string $url, ?array $headers = null): ?int
|
|
{
|
|
try {
|
|
if ($headers === null) {
|
|
if (!Common::isValidHttpUrl($url)) {
|
|
Log::error('Invalid URL in Embed::getRemoteFileSize()');
|
|
return false;
|
|
}
|
|
$head = HTTPClient::head($url);
|
|
$headers = $head->getHeaders();
|
|
$headers = array_change_key_case($headers, CASE_LOWER);
|
|
}
|
|
return !empty($headers['content-type']) && GSFile::mimetypeMajor($headers['content-type'][0]) === 'image';
|
|
} catch (Exception $e) {
|
|
Loog::error($e);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Validate that $imgData is a valid image, place it in it's folder and resize
|
|
*
|
|
* @param $imgData - The image data to validate
|
|
* @param null|string $url - The url where the image came from, to fetch metadata
|
|
* @param null|array $headers - The headers possible previous request to $url
|
|
*/
|
|
protected function validateAndWriteImage($imgData, string $url, array $headers): array
|
|
{
|
|
$file = new TemporaryFile();
|
|
$file->write($imgData);
|
|
|
|
Event::handle('HashFile', [$file->getPathname(), &$hash]);
|
|
$filepath = Common::config('storage', 'dir') . "embed/{$hash}" . Common::config('thumbnail', 'extension');
|
|
$width = Common::config('thumbnail', 'width');
|
|
$height = Common::config('thumbnail', 'height');
|
|
$smart_crop = Common::config('thumbnail', 'smart_crop');
|
|
Event::handle('ResizeImagePath', [$file->getPathname(), $filepath, $width, $height, $smart_crop, &$mimetype]);
|
|
|
|
unset($file);
|
|
|
|
if (array_key_exists('content-disposition', $headers) && preg_match('/^.+; filename="(.+?)"$/', $headers['content-disposition'][0], $matches) === 1) {
|
|
$original_name = $matches[1];
|
|
}
|
|
|
|
return [$filepath, $width, $height, $original_name ?? null, $mimetype];
|
|
}
|
|
|
|
/**
|
|
* Create and store a thumbnail representation of a remote image
|
|
*/
|
|
protected function storeRemoteThumbnail(Attachment $attachment): array | bool
|
|
{
|
|
if ($attachment->hasFilename() && file_exists($attachment->getPath())) {
|
|
throw new AlreadyFulfilledException(_m('A thumbnail seems to already exist for remote file with id=={id}', ['id' => $attachment->getId()]));
|
|
}
|
|
|
|
$url = $attachment->getRemoteUrl();
|
|
|
|
if (Formatting::startsWith($url, 'file://')) {
|
|
$filename = Formatting::removePrefix($url, 'file://');
|
|
$info = getimagesize($filename);
|
|
$filename = basename($filename);
|
|
$width = $info[0];
|
|
$height = $info[1];
|
|
} else {
|
|
$this->checkAllowlist($url);
|
|
$head = HTTPClient::head($url);
|
|
$headers = $head->getHeaders();
|
|
$headers = array_change_key_case($headers, CASE_LOWER);
|
|
|
|
try {
|
|
$is_image = $this->isRemoteImage($url, $headers);
|
|
if ($is_image == true) {
|
|
$file_size = $this->getRemoteFileSize($url, $headers);
|
|
$max_size = Common::config('attachments', 'file_quota');
|
|
if (($file_size != false) && ($file_size > $max_size)) {
|
|
throw new \Exception("Wanted to store remote thumbnail of size {$file_size} but the upload limit is {$max_size} so we aborted.");
|
|
}
|
|
} else {
|
|
return false;
|
|
}
|
|
} catch (Exception $err) {
|
|
Log::debug('Could not determine size of remote image, aborted local storage.');
|
|
throw $err;
|
|
}
|
|
|
|
// First we download the file to memory and test whether it's actually an image file
|
|
Log::debug('Downloading remote thumbnail for file id==' . $attachment->getId() . " with thumbnail URL: {$url}");
|
|
try {
|
|
$imgData = HTTPClient::get($url)->getContent();
|
|
if (isset($imgData)) {
|
|
[$filepath, $width, $height, $original_name, $mimetype] = $this->validateAndWriteImage($imgData, $url, $headers);
|
|
} else {
|
|
throw new UnsupportedMediaException(_m('HTTPClient returned an empty result'));
|
|
}
|
|
} catch (UnsupportedMediaException $e) {
|
|
// Couldn't find anything that looks like an image, nothing to do
|
|
Log::debug($e);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
DB::persist(AttachmentThumbnail::create(['attachment_id' => $attachment->getId(), 'width' => $width, 'height' => $height]));
|
|
DB::flush();
|
|
|
|
return [$filepath, $width, $height, $original_name, $mimetype];
|
|
}
|
|
|
|
/**
|
|
* Perform an oEmbed or OpenGraph lookup for the given $url.
|
|
*
|
|
* Some known hosts are allowlisted with API endpoints where we
|
|
* know they exist but autodiscovery data isn't available.
|
|
*
|
|
* Throws exceptions on failure.
|
|
*
|
|
* @param string $url
|
|
*
|
|
* @throws EmbedHelper_BadHtmlException
|
|
* @throws HTTP_Request2_Exception
|
|
*
|
|
* @return object
|
|
*/
|
|
public function getEmbed(string $url, Attachment $attachment): array
|
|
{
|
|
Log::info('Checking for remote URL metadata for ' . $url);
|
|
|
|
try {
|
|
Log::info("Trying to find Embed data for {$url} with 'oscarotero/Embed'");
|
|
$embed = new LibEmbed();
|
|
$info = $embed->get($url);
|
|
$metadata['title'] = $info->title;
|
|
$metadata['html'] = Security::sanitize($info->description);
|
|
$metadata['url'] = $info->url;
|
|
$metadata['author_name'] = $info->authorName;
|
|
$metadata['author_url'] = $info->authorUrl;
|
|
$metadata['provider_name'] = $info->providerName;
|
|
$metadata['provider_url'] = $info->providerUrl;
|
|
|
|
if (!is_null($info->image)) {
|
|
if (Formatting::startsWith($info->image, 'data')) {
|
|
// Inline image
|
|
$imgData = base64_decode(substr($info->image, stripos($info->image, 'base64,') + 7));
|
|
[$filepath, $width, $height, $original_name, $mimetype] = $this->validateAndWriteImage($imgData);
|
|
} else {
|
|
$attachment->setRemoteUrl((string) $info->image);
|
|
[$filepath, $width, $height, $original_name, $mimetype] = $this->storeRemoteThumbnail($attachment);
|
|
}
|
|
$metadata['width'] = $width;
|
|
$metadata['height'] = $height;
|
|
$metadata['mimetype'] = $mimetype;
|
|
$metadata['filename'] = Formatting::removePrefix($filepath, Common::config('storage', 'dir'));
|
|
}
|
|
} catch (Exception $e) {
|
|
Log::info("Failed to find Embed data for {$url} with 'oscarotero/Embed', got exception: " . get_class($e));
|
|
}
|
|
|
|
$metadata = self::normalize($metadata);
|
|
$attachment->setTitle($metadata['title']);
|
|
return $metadata;
|
|
}
|
|
|
|
/**
|
|
* Normalize fetched info.
|
|
*/
|
|
public static function normalize(array $data): array
|
|
{
|
|
if (isset($metadata['url'])) {
|
|
// sometimes sites serve the path, not the full URL, for images
|
|
// let's "be liberal in what you accept from others"!
|
|
// add protocol and host if the thumbnail_url starts with /
|
|
if ($metadata['url'][0] == '/') {
|
|
$thumbnail_url_parsed = parse_url($metadata['url']);
|
|
$metadata['url'] = "{$thumbnail_url_parsed['scheme']}://{$thumbnail_url_parsed['host']}{$metadata['url']}";
|
|
}
|
|
|
|
// Some wordpress opengraph implementations sometimes return a white blank image
|
|
// no need for us to save that!
|
|
if ($metadata['url'] == 'https://s0.wp.com/i/blank.jpg') {
|
|
$metadata['url'] = null;
|
|
}
|
|
|
|
if (!isset($data['width'])) {
|
|
$data['width'] = Common::config('thumbnail', 'width');
|
|
$data['height'] = Common::config('thumbnail', 'height');
|
|
}
|
|
}
|
|
|
|
return $data;
|
|
}
|
|
}
|