[Emebed] Add Embed plugin and initial cleanup

This commit is contained in:
Hugo Sales 2021-04-25 21:23:46 +00:00
parent 2d91095260
commit 3d8c348cb8
Signed by: someonewithpc
GPG Key ID: 7D0C7EAFC9D835A0
71 changed files with 2186 additions and 480 deletions

View File

@ -1,4 +1,5 @@
<?php
// {{{ License
// This file is part of GNU social - https://www.gnu.org/software/social
//
// GNU social is free software: you can redistribute it and/or modify
@ -13,9 +14,10 @@
//
// You should have received a copy of the GNU Affero General Public License
// along with GNU social. If not, see <http://www.gnu.org/licenses/>.
// }}}
/**
* OembedPlugin implementation for GNU social
* Embed plugin implementation for GNU social
*
* @package GNUsocial
*
@ -23,27 +25,28 @@
* @author Mikael Nordfeldth <mmn@hethane.se>
* @author hannes
* @author Diogo Cordeiro <diogo@fc.up.pt>
* @author Hugo Sales <hugo@hsal.es>
* @copyright 2019 Free Software Foundation, Inc http://www.fsf.org
* @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
*/
namespace Plguin\Embed\actions;
namespace Plugin\Embed\Controller;
use App\Core\Controller;
use Symfony\Component\HttpFoundation\Request;
/**
* Oembed provider implementation
* Embed provider implementation
*
* This class handles all /main/oembed(.xml|.json)/ requests.
*
* @copyright 2019 Free Software Foundation, Inc http://www.fsf.org
* @copyright 2019, 2021 Free Software Foundation, Inc http://www.fsf.org
* @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
*/
class OEmbedAction extends Action
class Embed extends Controller
{
/** Placeholder */
protected function handle()
protected function handle(Request $request)
{
parent::handle();
$url = $this->trimmed('url');
$tls = parse_url($url, PHP_URL_SCHEME) == 'https';
$root_url = common_root_url($tls);

View File

@ -36,7 +36,19 @@
namespace Plugin\Embed;
use App\Core\Cache;
use App\Core\DB\DB;
use App\Core\Event;
use App\Core\HTTPClient;
use App\Core\Log;
use App\Core\Modules\Plugin;
use App\Core\Router\RouteLoader;
use App\Core\Router\Router;
use App\Entity\Attachment;
use App\Util\Exception\DuplicateFoundException;
use App\Util\Exception\NotFoundException;
use Plugin\Embed\Entity\FileEmbed;
use Symfony\Component\HttpFoundation\Request;
/**
* Base class for the Embed plugin that does most of the heavy lifting to get
@ -47,64 +59,15 @@ use App\Core\Modules\Plugin;
*/
class Embed extends Plugin
{
const PLUGIN_VERSION = '2.1.0';
// settings which can be set in config.php with addPlugin('Embed', ['param'=>'value', ...]);
// WARNING, these are _regexps_ (slashes added later). Always escape your dots and end ('$') your strings
public $domain_whitelist = [
/**
* Settings which can be set in social.local.yaml
* WARNING, these are _regexps_ (slashes added later). Always escape your dots and end ('$') your strings
*/
public $domain_allowlist = [
// hostname => service provider
'^i\d*\.ytimg\.com$' => 'YouTube',
'^i\d*\.vimeocdn\.com$' => 'Vimeo',
];
public $append_whitelist = []; // fill this array as domain_whitelist to add more trusted sources
public $check_whitelist = false; // security/abuse precaution
public $thumbnail_width = 128;
public $thumbnail_height = 128;
public $crop = true;
public $max_size;
protected $imgData = [];
/**
* Initialize the Embed plugin and set up the environment it needs for it.
* Returns true if it initialized properly, the exception object if it
* doesn't.
*/
public function initialize()
{
parent::initialize();
$this->domain_whitelist = array_merge($this->domain_whitelist, $this->append_whitelist);
// Load global configuration if specific not provided
$this->thumbnail_width = $this->thumbnail_width ?? common_config('thumbnail', 'width');
$this->thumbnail_height = $this->thumbnail_height ?? common_config('thumbnail', 'height');
$this->max_size = $this->max_size ?? common_config('attachments', 'file_quota');
$this->crop = $this->crop ?? common_config('thumbnail', 'crop');
}
/**
* The code executed on GNU social checking the database schema, which in
* this case is to make sure we have the plugin table we need.
*
* @return bool true if it ran successfully, the exception object if it doesn't.
*/
public function onCheckSchema()
{
$this->onEndUpgrade(); // Ensure rename
$schema = Schema::get();
$schema->ensureTable('file_embed', File_embed::schemaDef());
return true;
}
public function onEndUpgrade()
{
$schema = Schema::get();
return $schema->renameTable('file_oembed', 'file_embed');
}
/**
* This code executes when GNU social creates the page routing, and we hook
@ -116,9 +79,11 @@ class Embed extends Plugin
*
* @return void true if successful, the exception object if it isn't.
*/
public function onRouterInitialized(URLMapper $m)
public function onAddRoute(RouteLoader $m)
{
$m->connect('main/oembed', ['action' => 'oembed']);
$m->connect('oembed', 'main/oembed', Controller\Embed::class);
$m->connect('embed', 'main/embed', Controller\Embed::class);
return Event::next;
}
/**
@ -182,126 +147,83 @@ class Embed extends Plugin
return true;
}
/** Placeholder */
public function onEndShowHeadElements(Action $action)
/**
* Insert oembed and opengraph tags in all HTML head elements
*/
public function onShowHeadElements(Request $request, array $result)
{
switch ($action->getActionName()) {
case 'attachment':
$url = common_local_url('attachment', ['attachment' => $action->attachment->getID()]);
break;
case 'shownotice':
if (!$action->notice->isLocal()) {
return true;
}
try {
$url = $action->notice->getUrl();
} catch (InvalidUrlException $e) {
// The notice is probably a share or similar, which don't
// have a representational URL of their own.
return true;
}
break;
$matches = [];
preg_match(',/?([^/]+)/?.*,', $request->getPathInfo(), $matches);
switch ($matches[1]) {
case 'attachment':
$url = "{$matches[1]}/{$matches[2]}";
break;
}
if (isset($url)) {
foreach (['xml', 'json'] as $format) {
$action->element(
'link',
[
$result[] = [
'link' => [
'rel' => 'alternate',
'type' => "application/{$format}+oembed",
'href' => common_local_url('oembed', [], ['format' => $format, 'url' => $url]),
'href' => Router::url('embed', ['format' => $format, 'url' => $url]),
'title' => 'oEmbed',
]
);
], ];
}
}
return true;
return Event::next;
}
/** Placeholder */
public function onEndShowStylesheets(Action $action)
/**
* Save embedding information for an Attachment, if applicable.
*
* Normally this event is called through File::saveNew()
*
* @param Attachment $attachment The newly inserted Attachment object.
*
* @return bool success
*/
public function onAttachmentStoreNew(Attachment $attachment)
{
$action->cssLink($this->path('css/embed.css'));
try {
DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]);
} catch (NotFoundException) {
} catch (DuplicateFoundException) {
Log::warning("Strangely, an attachment_embed object exists for new file {$attachment->getID()}");
return Event::next;
}
if (!is_null($attachment->getRemoteUrl()) || (!is_null($mimetype = $attachment->getMimetype()) && (('text/html' === substr($mimetype, 0, 9) || 'application/xhtml+xml' === substr($mimetype, 0, 21))))) {
try {
$embed_data = EmbedHelper::getEmbed($attachment->getRemoteUrl());
dd($embed_data);
if ($embed_data === false) {
throw new Exception("Did not get Embed data from URL {$attachment->url}");
}
$attachment->setTitle($embed_data['title']);
} catch (Exception $e) {
Log::warning($e);
return true;
}
FileEmbed::saveNew($embed_data, $attachment->getId());
}
return true;
}
/**
* Save embedding information for a File, if applicable.
* Replace enclosure representation of an attachment with the data from embed
*
* Normally this event is called through File::saveNew()
*
* @param File $file The newly inserted File object.
*
* @return bool success
* @param mixed $enclosure
*/
public function onEndFileSaveNew(File $file)
{
$fe = File_embed::getKV('file_id', $file->getID());
if ($fe instanceof File_embed) {
common_log(LOG_WARNING, "Strangely, a File_embed object exists for new file {$file->getID()}", __FILE__);
return true;
}
if (isset($file->mimetype)
&& (('text/html' === substr($file->mimetype, 0, 9) || 'application/xhtml+xml' === substr($file->mimetype, 0, 21)))) {
try {
$embed_data = File_embed::getEmbed($file->url);
if ($embed_data === false) {
throw new Exception("Did not get Embed data from URL {$file->url}");
}
$file->setTitle($embed_data->title);
} catch (Exception $e) {
common_log(LOG_WARNING, sprintf(
__METHOD__ . ': %s thrown when getting embed data: %s',
get_class($e),
_ve($e->getMessage())
));
return true;
}
File_embed::saveNew($embed_data, $file->getID());
}
return true;
}
/** Placeholder */
public function onEndShowAttachmentLink(HTMLOutputter $out, File $file)
{
$embed = File_embed::getKV('file_id', $file->getID());
if (empty($embed->author_name) && empty($embed->provider)) {
return true;
}
$out->elementStart('div', ['id' => 'oembed_info', 'class' => 'e-content']);
foreach (['author_name' => ['class' => ' author', 'url' => 'author_url'],
'provider' => ['class' => '', 'url' => 'provider_url'], ]
as $field => $options) {
if (!empty($embed->{$field})) {
$out->elementStart('div', 'fn vcard' . $options['class']);
if (empty($embed->{$options['url']})) {
$out->text($embed->{$field});
} else {
$out->element(
'a',
['href' => $embed->{$options['url']},
'class' => 'url', ],
$embed->{$field}
);
}
}
}
$out->elementEnd('div');
return false;
}
/** Placeholder */
public function onFileEnclosureMetadata(File $file, &$enclosure)
public function onFileEnclosureMetadata(Attachment $attachment, &$enclosure)
{
// Never treat generic HTML links as an enclosure type!
// But if we have embed info, we'll consider it golden.
$embed = File_embed::getKV('file_id', $file->getID());
if (!$embed instanceof File_embed || !in_array($embed->type, ['photo', 'video'])) {
return true;
try {
$embed = DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]);
} catch (NotFoundException) {
return Event::next;
}
foreach (['mimetype', 'url', 'title', 'modified', 'width', 'height'] as $key) {
@ -313,187 +235,80 @@ class Embed extends Plugin
}
/** Placeholder */
public function onStartShowAttachmentRepresentation(HTMLOutputter $out, File $file)
public function onShowAttachment(Attachment $attachment, array &$res)
{
try {
$embed = File_embed::getByFile($file);
} catch (NoResultException $e) {
return true;
$embed = Cache::get('attachment-embed-' . $attachment->getId(),
fn () => DB::findOneBy('attachment_embed', ['attachment_id' => $attachment->getId()]));
} catch (DuplicateFoundException $e) {
Log::waring($e);
return Event::next;
} catch (NotFoundException) {
return Event::next;
}
if (is_null($embed) && empty($embed->getAuthorName()) && empty($embed->getProvider())) {
return Event::next;
}
// Show thumbnail as usual if it's a photo.
if ($embed->type === 'photo') {
return true;
}
$thumbnail = AttachmentThumbnail::getOrCreate(attachment: $attachment, width: $width, height: $height, crop: $smart_crop);
$attributes = $thumbnail->getHTMLAttributes(['class' => 'u-photo embed']);
$out->elementStart('article', ['class' => 'h-entry embed']);
$out->elementStart('header');
try {
$thumb = $file->getThumbnail($this->thumbnail_width, $this->thumbnail_height);
$out->element('img', $thumb->getHtmlAttrs(['class' => 'u-photo embed']));
unset($thumb);
} catch (FileNotFoundException $e) {
// Nothing to show
} catch (Exception $e) {
$out->element('div', ['class' => 'error'], $e->getMessage());
}
$out->elementStart('h5', ['class' => 'p-name embed']);
$out->element('a', ['class' => 'u-url', 'href' => $file->getUrl()], common_strip_html($embed->title));
$out->elementEnd('h5');
$out->elementStart('div', ['class' => 'p-author embed']);
if (!empty($embed->author_name)) {
// TRANS: text before the author name of embed attachment representation
// FIXME: The whole "By x from y" should be i18n because of different language constructions.
$out->text(_('By '));
$attrs = ['class' => 'h-card p-author'];
if (!empty($embed->author_url)) {
$attrs['href'] = $embed->author_url;
$tag = 'a';
} else {
$tag = 'span';
}
$out->element($tag, $attrs, $embed->author_name);
}
if (!empty($embed->provider)) {
// TRANS: text between the embed author name and provider url
// FIXME: The whole "By x from y" should be i18n because of different language constructions.
$out->text(_(' from '));
$attrs = ['class' => 'h-card'];
if (!empty($embed->provider_url)) {
$attrs['href'] = $embed->provider_url;
$tag = 'a';
} else {
$tag = 'span';
}
$out->element($tag, $attrs, $embed->provider);
}
$out->elementEnd('div');
$out->elementEnd('header');
$out->elementStart('div', ['class' => 'p-summary embed']);
$out->raw(common_purify($embed->html));
$out->elementEnd('div');
$out->elementStart('footer');
$out->elementEnd('footer');
$out->elementEnd('article');
$res[] = Formatting::twigRender(<<<END
<article class="h-entry embed">
<header>
<img class="u-photo embed" width="{{attributes['width']}}" height="{{attributes['height']}}" src="{{attributes['src']}}" />
<h5 class="p-name embed">
<a class="u-url" href="{{attachment.getUrl()}}">{{embed.getTitle() | escape}}</a>
</h5>
<div class="p-author embed">
{% if embed.getAuthorName() is not null %}
<div class="fn vcard author">
{% if embed.getAuthorUrl() is null %}
<p>{{embed.getAuthorName()}}</p>
{% else %}
<a href="{{embed.getAuthorUrl()}}" class="url">{{embed.getAuthorName()}}</a>
{% endif %}
</div>
{% endif %}
{% if embed.getProvider() is not null %}
<div class="fn vcard">
{% if embed.getProviderUrl() is null %}
<p>{{embed.getProvider()}}</p>
{% else %}
<a href="{{embed.getProviderUrl()}}" class="url">{{embed.getProvider()}}</a>
{% endif %}
</div>
{% endif %}
</div>
</header>
<div class="p-summary embed">
{{ embed.getHtml() | escape }}
</div>
</article>
END, ['embed' => $embed, 'thumbnail' => $thumbnail, 'attributes' => $attributes]);
return false;
}
/** Placeholder */
public function onShowUnsupportedAttachmentRepresentation(HTMLOutputter $out, File $file)
{
try {
$embed = File_embed::getByFile($file);
} catch (NoResultException $e) {
return true;
}
// the 'photo' type is shown through ordinary means, using StartShowAttachmentRepresentation!
switch ($embed->type) {
case 'video':
case 'link':
if (!empty($embed->html)
&& (GNUsocial::isAjax() || common_config('attachments', 'show_html'))) {
$purifier = new HTMLPurifier();
// FIXME: do we allow <object> and <embed> here? we did that when we used htmLawed,
// but I'm not sure anymore...
$out->raw($purifier->purify($embed->html));
}
return false;
}
return true;
return Event::stop;
}
/**
* This event executes when GNU social is creating a file thumbnail entry in
* the database. We glom onto this to create proper information for Embed
* object thumbnails.
*
* @param $file File the file of the created thumbnail
* @param &$imgPath null|string = the path to the created thumbnail (output)
* @param $media string = media type
*
* @throws FileNotFoundException
* @throws NoResultException
* @throws ServerException
*
* @return bool true if it succeeds (including non-action
* states where it isn't oEmbed data, so it doesn't mess up the event handle
* for other things hooked into it), or the exception if it fails.
*/
public function onCreateFileImageThumbnailSource(File $file, ?string &$imgPath, string $media): bool
{
// If we are on a private node, we won't do any remote calls (just as a precaution until
// we can configure this from config.php for the private nodes)
if (common_config('site', 'private')) {
return true;
}
// All our remote Embed images lack a local filename property in the File object
if ($file->isLocal()) {
common_debug(sprintf('File of id==%d is local (filename: %s), so nothing Embed ' .
'should handle.', $file->getID(), _ve($file->filename)));
return true;
}
try {
// If we have proper Embed data, there should be an entry in the File_thumbnail table.
// If not, we're not going to do anything.
$thumbnail = File_thumbnail::byFile($file);
} catch (NoResultException $e) {
// Not Embed data, or at least nothing we either can or want to use.
common_debug('No Embed data found for file id==' . $file->getID());
return true;
}
try {
$this->storeRemoteFileThumbnail($thumbnail);
} catch (AlreadyFulfilledException $e) {
// aw yiss!
} catch (Exception $e) {
common_debug(sprintf(
'Embed encountered an exception (%s) for file id==%d: %s',
get_class($e),
$file->getID(),
_ve($e->getMessage())
));
throw $e;
}
// Out
$imgPath = $thumbnail->getPath();
return !file_exists($imgPath);
}
public function onFileDeleteRelated(File $file, array &$related): bool
{
$related[] = 'File_embed';
return true;
}
/**
* @param mixed $url
*
* @throws ServerException if check is made but fails
*
* @return bool false on no check made, provider name on success
*/
protected function checkWhitelist($url)
protected function checkAllowlist(string $url)
{
if (!$this->check_whitelist) {
if (!$this->check_allowlist) {
return false; // indicates "no check made"
}
$host = parse_url($url, PHP_URL_HOST);
foreach ($this->domain_whitelist as $regex => $provider) {
foreach ($this->domain_allowlist as $regex => $provider) {
if (preg_match("/{$regex}/", $host)) {
return $provider; // we trust this source, return provider name
}
}
throw new ServerException(sprintf(_('Domain not in remote thumbnail source whitelist: %s'), $host));
throw new ServerException(_m('Domain not in remote thumbnail source allowlist: {host}', ['host' => $host]));
}
/**
@ -501,33 +316,32 @@ class Embed extends Plugin
* the content-length variable returned. This isn't 100% foolproof but is
* reliable enough for our purposes.
*
* @param mixed $url
* @param null|mixed $headers
* @param string $url
* @param array $headers - if we already made a request
*
* @return bool|string the file size if it succeeds, false otherwise.
*/
private function getRemoteFileSize($url, $headers = null)
private function getRemoteFileSize(string $url, ?array $headers = null): ?int
{
try {
if ($headers === null) {
if (!common_valid_http_url($url)) {
common_log(LOG_ERR, 'Invalid URL in Embed::getRemoteFileSize()');
if (!Common::isValidHttpUrl($url)) {
Log::error('Invalid URL in Embed::getRemoteFileSize()');
return false;
}
$head = (new HTTPClient())->head($url);
$headers = $head->getHeader();
$head = HTTPClient::head($url);
$headers = $head->getHeaders();
$headers = array_change_key_case($headers, CASE_LOWER);
}
return $headers['content-length'] ?? false;
} catch (Exception $err) {
common_log(LOG_ERR, __CLASS__ . ': getRemoteFileSize on URL : ' . _ve($url) .
' threw exception: ' . $err->getMessage());
} catch (Exception $e) {
Loog::error($e);
return false;
}
}
/**
* A private helper function that uses a CURL lookup to check the mime type
* A private helper function that uses a HEAD request to check the mime type
* of a remote URL to see it it's an image.
*
* @param mixed $url
@ -535,128 +349,68 @@ class Embed extends Plugin
*
* @return bool true if the remote URL is an image, or false otherwise.
*/
private function isRemoteImage($url, $headers = null)
private function isRemoteImage(string $url, ?array $headers = null): ?int
{
if (empty($headers)) {
if (!common_valid_http_url($url)) {
common_log(LOG_ERR, 'Invalid URL in Embed::isRemoteImage()');
return false;
try {
if ($headers === null) {
if (!Common::isValidHttpUrl($url)) {
Log::error('Invalid URL in Embed::getRemoteFileSize()');
return false;
}
$head = HTTPClient::head($url);
$headers = $head->getHeaders();
$headers = array_change_key_case($headers, CASE_LOWER);
}
$head = (new HTTPClient())->head($url);
$headers = $head->getHeader();
$headers = array_change_key_case($headers, CASE_LOWER);
return !empty($headers['content-type']) && GSFile::mimetypeMajor($headers['content-type']) === 'image';
} catch (Exception $e) {
Loog::error($e);
return false;
}
return !empty($headers['content-type']) && common_get_mime_media($headers['content-type']) === 'image';
}
/**
* Validate that $imgData is a valid image before writing it to
* disk, as well as resizing it to at most $this->thumbnail_width
* by $this->thumbnail_height
* Validate that $imgData is a valid image, place it in it's folder and resize
*
* @param $imgData - The image data to validate. Taken by reference to avoid copying
* @param $imgData - The image data to validate
* @param null|string $url - The url where the image came from, to fetch metadata
* @param null|array $headers - The headers possible previous request to $url
* @param null|int $file_id - The id of the file this image belongs to, used for logging
*/
protected function validateAndWriteImage(&$imgData, ?string $url = null, ?array $headers = null, ?int $file_id = null): array
protected function validateAndWriteImage($imgData, string $url, array $headers): array
{
$info = @getimagesizefromstring($imgData);
// array indexes documented on php.net:
// https://php.net/manual/en/function.getimagesize.php
if ($info === false) {
throw new UnsupportedMediaException(_('Remote file format was not identified as an image.'), $url);
} elseif (!$info[0] || !$info[1]) {
throw new UnsupportedMediaException(_('Image file had impossible geometry (0 width or height)'));
$file = new TemporaryFile();
$file->write($imgData);
if (array_key_exists('content-disposition', $headers) && preg_match('/^.+; filename="(.+?)"$/', $headers['content-disposition'], $matches) === 1) {
$original_name = $matches[1];
}
$width = min($info[0], $this->thumbnail_width);
$height = min($info[1], $this->thumbnail_height);
$filehash = hash(File::FILEHASH_ALG, $imgData);
$mimetype = $headers['content-type'];
Event::handle('AttachmentValidation', [$file, &$mimetype]);
try {
if (!empty($url)) {
$original_name = HTTPClient::get_filename($url, $headers);
}
$filename = MediaFile::encodeFilename($original_name ?? _m('Untitled attachment'), $filehash);
} catch (Exception $err) {
common_log(LOG_ERR, 'Went to write a thumbnail to disk in StoreRemoteMediaPlugin::storeRemoteThumbnail ' .
"but encountered error: {$err}");
throw $err;
}
$hash = hash_file(Attachment::FILEHASH_ALGO, $file->getPathname());
$filename = Common::config('attachments', 'dir') . "embed/{$hash}";
$file->commit($filename);
unset($file);
try {
$fullpath = File_thumbnail::path($filename);
// Write the file to disk. Throw Exception on failure
if (!file_exists($fullpath)) {
if (strpos($fullpath, INSTALLDIR) !== 0 || file_put_contents($fullpath, $imgData) === false) {
throw new ServerException(_('Could not write downloaded file to disk.'));
}
if (common_get_mime_media(MediaFile::getUploadedMimeType($fullpath)) !== 'image') {
@unlink($fullpath);
throw new UnsupportedMediaException(
_('Remote file format was not identified as an image.'),
$url
);
}
// If the image is not of the desired size, resize it
if ($this->crop && ($info[0] > $this->thumbnail_width || $info[1] > $this->thumbnail_height)) {
try {
// Temporary object, not stored in DB
$img = new ImageFile(-1, $fullpath);
list($width, $height, $x, $y, $w, $h) = $img->scaleToFit($this->thumbnail_width, $this->thumbnail_height, $this->crop);
// The boundary box for our resizing
$box = [
'width' => $width, 'height' => $height,
'x' => $x, 'y' => $y,
'w' => $w, 'h' => $h,
];
$width = $box['width'];
$height = $box['height'];
$img->resizeTo($fullpath, $box);
} catch (\Intervention\Image\Exception\NotReadableException $e) {
common_log(LOG_ERR, "StoreRemoteMediaPlugin::storeRemoteThumbnail was unable to decode image with Intervention: {$e}");
// No need to interrupt processing
}
}
} else {
throw new AlreadyFulfilledException('A thumbnail seems to already exist for remote file' .
($file_id ? 'with id==' . $file_id : '') . ' at path ' . $fullpath);
}
} catch (AlreadyFulfilledException $e) {
// Carry on
} catch (Exception $err) {
common_log(LOG_ERR, 'Went to write a thumbnail to disk in EmbedPlugin::storeRemoteThumbnail ' .
"but encountered error: {$err}");
throw $err;
} finally {
unset($imgData);
}
return [$filename, $width, $height];
return [$filename, $width, $height, $original_name, $mimetype];
}
/**
* Function to create and store a thumbnail representation of a remote image
*
* @param $thumbnail File_thumbnail object containing the file thumbnail
* @param $thumbnail FileThumbnail object containing the file thumbnail
*
* @return bool true if it succeeded, the exception if it fails, or false if it
* is limited by system limits (ie the file is too large.)
*/
protected function storeRemoteFileThumbnail(File_thumbnail $thumbnail)
protected function storeRemoteThumbnail(Attachment $attachment): bool
{
if (!empty($thumbnail->filename) && file_exists($thumbnail->getPath())) {
throw new AlreadyFulfilledException(
sprintf('A thumbnail seems to already exist for remote file with id==%u', $thumbnail->file_id)
);
$path = $attachment->getPath();
if (file_exists($path)) {
throw new AlreadyFulfilledException(_m('A thumbnail seems to already exist for remote file with id=={id}', ['id' => $attachment->id]));
}
$url = $thumbnail->url; // Important not to use the getter here.
$url = $attachment->getRemoteUrl();
if (substr($url, 0, 7) == 'file://') {
$filename = substr($url, 7);
@ -665,86 +419,48 @@ class Embed extends Plugin
$width = $info[0];
$height = $info[1];
} else {
$this->checkWhitelist($url);
$head = (new HTTPClient())->head($url);
$headers = $head->getHeader();
$this->checkAllowlist($url);
$head = HTTPClient::head($url);
$headers = $head->getHeaders();
$headers = array_change_key_case($headers, CASE_LOWER);
try {
$is_image = $this->isRemoteImage($url, $headers);
if ($is_image == true) {
$file_size = $this->getRemoteFileSize($url, $headers);
if (($file_size != false) && ($file_size > $this->max_size)) {
common_debug('Went to store remote thumbnail of size ' . $file_size .
' but the upload limit is ' . $this->max_size . ' so we aborted.');
$max_size = Common::config('attachments', 'file_quota');
if (($file_size != false) && ($file_size > $max_size)) {
Log::debug("Wanted to store remote thumbnail of size {$file_size} but the upload limit is {$max_size} so we aborted.");
return false;
}
} else {
return false;
}
} catch (Exception $err) {
common_debug('Could not determine size of remote image, aborted local storage.');
Log::debug('Could not determine size of remote image, aborted local storage.');
throw $err;
}
// First we download the file to memory and test whether it's actually an image file
// FIXME: To support remote video/whatever files, this needs reworking.
common_debug(sprintf(
'Downloading remote thumbnail for file id==%u with thumbnail URL: %s',
$thumbnail->file_id,
$url
));
Log::debug("Downloading remote thumbnail for file id=={$attachment->id} with thumbnail URL: {$url}");
try {
$imgData = HTTPClient::quickGet($url);
$imgData = HTTPClient::get($url);
if (isset($imgData)) {
list($filename, $width, $height) = $this->validateAndWriteImage(
$imgData,
$url,
$headers,
$thumbnail->file_id
);
[$filename, $width, $height, $original_name, $mimetype] = $this->validateAndWriteImage($imgData, $url, $headers);
} else {
throw new UnsupportedMediaException('HTTPClient returned an empty result');
throw new UnsupportedMediaException(_m('HTTPClient returned an empty result'));
}
} catch (UnsupportedMediaException $e) {
// Couldn't find anything that looks like an image, nothing to do
common_debug("Embed was not able to find an image for URL `{$url}`: " . $e->getMessage());
Log::debug($e);
return false;
}
}
try {
// Update our database for the thumbnail record
$orig = clone $thumbnail;
$thumbnail->filename = $filename;
$thumbnail->width = $width;
$thumbnail->height = $height;
// Throws exception on failure.
$thumbnail->updateWithKeys($orig);
} catch (Exception $err) {
common_log(LOG_ERR, 'Went to write a thumbnail entry to the database in ' .
'EmbedPlugin::storeRemoteThumbnail but encountered error: ' . $err);
throw $err;
}
return true;
}
DB::persist(AttachmentThumbnail::create(['attachment_id' => $attachment->id, 'width' => $width, 'height' => $height]));
$attachment->setFilename($filename);
DB::flush();
/**
* Event raised when GNU social polls the plugin for information about it.
* Adds this plugin's version information to $versions array
*
* @param &$versions array inherited from parent
*
* @return bool true hook value
*/
public function onPluginVersion(array &$versions): bool
{
$versions[] = ['name' => 'Embed',
'version' => self::PLUGIN_VERSION,
'author' => 'Mikael Nordfeldth',
'homepage' => GNUSOCIAL_ENGINE_URL,
'description' => // TRANS: Plugin description.
_m('Plugin for using and representing oEmbed, OpenGraph and other data.'), ];
return true;
}
}

View File

@ -0,0 +1,191 @@
<?php
// {{{ License
// This file is part of GNU social - https://www.gnu.org/software/social
//
// GNU social is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// GNU social is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with GNU social. If not, see <http://www.gnu.org/licenses/>.
// }}}
/**
* OembedPlugin implementation for GNU social
*
* @package GNUsocial
*
* @author Mikael Nordfeldth
* @author hannes
* @author Diogo Cordeiro <diogo@fc.up.pt>
* @author Hugo Sales <hugo@hsal.es>
* @copyright 2019, 2021 Free Software Foundation, Inc http://www.fsf.org
* @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
*/
namespace Plugin\Embed;
use App\Core\Event;
use App\Core\HTTPClient;
use App\Core\Log;
/**
* Utility class to wrap basic embed lookups.
*
* Denylisted hosts will use an alternate lookup method.
* Allowlisted hosts will use known embed API endpoints.
*
* Sites that provide discovery links will use them directly; a bug
* in use of discovery links with query strings is worked around.
*
* Others will fall back to oohembed (unless disabled).
* The API endpoint can be configured or disabled through config
* as 'oohembed'/'endpoint'.
*
* @copyright 2019, 2021 Free Software Foundation, Inc http://www.fsf.org
* @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
*/
class EmbedHelper
{
/**
* Perform or fake an oEmbed lookup for the given $url.
*
* Some known hosts are allowlisted with API endpoints where we
* know they exist but autodiscovery data isn't available.
*
* A few hosts are denylisted due to known problems with oohembed,
* in which case we'll look up the info another way and return
* equivalent data.
*
* Throws exceptions on failure.
*
* @param string $url
*
* @throws EmbedHelper_BadHtmlException
* @throws HTTP_Request2_Exception
*
* @return object
*/
public static function getEmbed(string $url)
{
Log::info('Checking for remote URL metadata for ' . $url);
$metadata = new \stdClass();
if (Event::handle('GetRemoteUrlMetadata', [$url, &$metadata])) {
// If that event didn't return anything, try downloading the body and parse it
$response = HTTPClient::get($url);
$body = $response->getBody();
// DOMDocument::loadHTML may throw warnings on unrecognized elements,
// and notices on unrecognized namespaces.
$old = error_reporting(error_reporting() & ~(E_WARNING | E_NOTICE));
// DOMDocument assumes ISO-8859-1 per HTML spec
// use UTF-8 if we find any evidence of that encoding
$utf8_evidence = false;
$unicode_check_dom = new DOMDocument();
$ok = $unicode_check_dom->loadHTML($body);
if (!$ok) {
throw new EmbedHelper_BadHtmlException();
}
$metaNodes = $unicode_check_dom->getElementsByTagName('meta');
foreach ($metaNodes as $metaNode) {
// case in-sensitive since Content-type and utf-8 can be written in many ways
if (stristr($metaNode->getAttribute('http-equiv'), 'content-type')
&& stristr($metaNode->getAttribute('content'), 'utf-8')) {
$utf8_evidence = true;
break;
} elseif (stristr($metaNode->getAttribute('charset'), 'utf-8')) {
$utf8_evidence = true;
break;
}
}
unset($unicode_check_dom);
// The Content-Type HTTP response header overrides encoding metatags in DOM
if (stristr($response->getHeader('Content-Type'), 'utf-8')) {
$utf8_evidence = true;
}
// add utf-8 encoding prolog if we have reason to believe this is utf-8 content
// DOMDocument('1.0', 'UTF-8') does not work!
$utf8_tag = $utf8_evidence ? '<?xml encoding="utf-8" ?>' : '';
$dom = new DOMDocument();
$ok = $dom->loadHTML($utf8_tag . $body);
unset($body); // storing the DOM in memory is enough...
error_reporting($old);
if (!$ok) {
throw new EmbedHelper_BadHtmlException();
}
Event::handle('GetRemoteUrlMetadataFromDom', [$url, $dom, &$metadata]);
}
return self::normalize($metadata);
}
/**
* Normalize oEmbed format.
*
* @param stdClass $data
*
* @throws Exception
*
* @return object
*/
public static function normalize(stdClass $data)
{
if (empty($data->type)) {
throw new Exception('Invalid oEmbed data: no type field.');
}
if ($data->type == 'image') {
// YFrog does this.
$data->type = 'photo';
}
if (isset($data->thumbnail_url)) {
if (!isset($data->thumbnail_width)) {
// !?!?!
$data->thumbnail_width = Common::config('thumbnail', 'width');
$data->thumbnail_height = Common::config('thumbnail', 'height');
}
}
return $data;
}
}
class EmbedHelper_Exception extends \Exception
{
public function __construct($message = '', $code = 0, $previous = null)
{
parent::__construct($message, $code, $previous);
}
}
class EmbedHelper_BadHtmlException extends EmbedHelper_Exception
{
public function __construct($previous = null)
{
return parent::__construct('Bad HTML in discovery data.', 0, $previous);
}
}
class EmbedHelper_DiscoveryException extends EmbedHelper_Exception
{
public function __construct($previous = null)
{
return parent::__construct('No oEmbed discovery data.', 0, $previous);
}
}

View File

@ -0,0 +1,180 @@
<?php
// {{{ License
// This file is part of GNU social - https://www.gnu.org/software/social
//
// GNU social is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// GNU social is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with GNU social. If not, see <http://www.gnu.org/licenses/>.
// }}}
/**
* OembedPlugin implementation for GNU social
*
* @package GNUsocial
*
* @author Stephen Paul Weber
* @author Mikael Nordfeldth
* @author Diogo Cordeiro <diogo@fc.up.pt>
* @copyright 2019 Free Software Foundation, Inc http://www.fsf.org
* @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
*/
namespace Plugin\Embed\Entity;
use App\Core\Entity;
/**
* Table Definition for file_embed
*
* @copyright 2019 Free Software Foundation, Inc http://www.fsf.org
* @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
*/
class AttachmentEmbed extends Entity
{
public $attachment_id; // int(4) primary_key not_null
public $version; // varchar(20)
public $type; // varchar(20)
public $mimetype; // varchar(50)
public $provider; // varchar(50)
public $provider_url; // varchar(191) not 255 because utf8mb4 takes more space
public $width; // int(4)
public $height; // int(4)
public $html; // text()
public $title; // varchar(191)
public $author_name; // varchar(50)
public $author_url; // varchar(191) not 255 because utf8mb4 takes more space
public $url; // varchar(191) not 255 because utf8mb4 takes more space
public $modified; // timestamp() not_null default_CURRENT_TIMESTAMP
public static function schemaDef()
{
return [
'name' => 'attachment_embed',
'fields' => [
'attachment_id' => ['type' => 'int', 'not null' => true, 'description' => 'oEmbed for that URL/file'],
'version' => ['type' => 'varchar', 'length' => 20, 'description' => 'oEmbed spec. version'],
'type' => ['type' => 'varchar', 'length' => 20, 'description' => 'oEmbed type: photo, video, link, rich'],
'mimetype' => ['type' => 'varchar', 'length' => 50, 'description' => 'mime type of resource'],
'provider' => ['type' => 'text', 'description' => 'name of this oEmbed provider'],
'provider_url' => ['type' => 'text', 'description' => 'URL of this oEmbed provider'],
'width' => ['type' => 'int', 'description' => 'width of oEmbed resource when available'],
'height' => ['type' => 'int', 'description' => 'height of oEmbed resource when available'],
'html' => ['type' => 'text', 'description' => 'html representation of this oEmbed resource when applicable'],
'title' => ['type' => 'text', 'description' => 'title of oEmbed resource when available'],
'author_name' => ['type' => 'text', 'description' => 'author name for this oEmbed resource'],
'author_url' => ['type' => 'text', 'description' => 'author URL for this oEmbed resource'],
'url' => ['type' => 'text', 'description' => 'URL for this oEmbed resource when applicable (photo, link)'],
'modified' => ['type' => 'timestamp', 'not null' => true, 'description' => 'date this record was modified'],
],
'primary key' => ['attachment_id'],
'foreign keys' => [
'file_embed_file_id_fkey' => ['file', ['file_id' => 'id']],
],
];
}
/**
* Fetch an entry by using a File's id
*/
public static function getByFile(File $file)
{
$fo = new File_embed();
$fo->file_id = $file->id;
if (!$fo->find(true)) {
throw new NoResultException($fo);
}
return $fo;
}
public function getUrl()
{
return $this->url;
}
/**
* Save embedding info for a new file.
*
* @param object $data Services_oEmbed_Object_*
* @param int $file_id
*/
public static function saveNew($data, $file_id)
{
$file_embed = new File_embed;
$file_embed->file_id = $file_id;
if (!isset($data->version)) {
common_debug('Embed: data->version undefined in variable $data: ' . var_export($data, true));
}
$file_embed->version = $data->version;
$file_embed->type = $data->type;
if (!empty($data->provider)) {
$file_embed->provider = $data->provider;
}
if (!empty($data->provider_name)) {
$file_embed->provider = $data->provider_name;
}
if (!empty($data->provider_url)) {
$file_embed->provider_url = $data->provider_url;
}
if (!empty($data->width)) {
$file_embed->width = (int) ($data->width);
}
if (!empty($data->height)) {
$file_embed->height = (int) ($data->height);
}
if (!empty($data->html)) {
$file_embed->html = $data->html;
}
if (!empty($data->title)) {
$file_embed->title = $data->title;
}
if (!empty($data->author_name)) {
$file_embed->author_name = $data->author_name;
}
if (!empty($data->author_url)) {
$file_embed->author_url = $data->author_url;
}
if (!empty($data->url)) {
$file_embed->url = $data->url;
$given_url = File_redirection::_canonUrl($file_embed->url);
if (!empty($given_url)) {
try {
$file = File::getByUrl($given_url);
$file_embed->mimetype = $file->mimetype;
} catch (NoResultException $e) {
// File_redirection::where argument 'discover' is false to avoid loops
$redir = File_redirection::where($given_url, false);
if (!empty($redir->file_id)) {
$file_id = $redir->file_id;
}
}
}
}
$result = $file_embed->insert();
if ($result === false) {
throw new ServerException('Failed to insert File_embed data into database!');
}
if (!empty($data->thumbnail_url) || ($data->type == 'photo')) {
$ft = File_thumbnail::getKV('file_id', $file_id);
if ($ft instanceof File_thumbnail) {
common_log(
LOG_WARNING,
"Strangely, a File_thumbnail object exists for new file {$file_id}",
__FILE__
);
} else {
File_thumbnail::saveNew($data, $file_id);
}
}
}
}

View File

@ -0,0 +1,85 @@
<?php
// {{{ License
// This file is part of GNU social - https://www.gnu.org/software/social
//
// GNU social is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// GNU social is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with GNU social. If not, see <http://www.gnu.org/licenses/>.
// }}}
/**
* OembedPlugin implementation for GNU social
*
* @package GNUsocial
*
* @author Mikael Nordfeldth
* @author Diogo Cordeiro <diogo@fc.up.pt>
* @copyright 2019 Free Software Foundation, Inc http://www.fsf.org
* @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
*/
namespace Plugin\Embed\Test;
use PHPUnit\Framework\TestCase;
final class EmbedTest extends TestCase
{
/**
* Run tests
*
* @param string $url
* @param string $expectedType
* @dataProvider sources
*/
public function testEmbed($url, $expectedType)
{
try {
$data = EmbedHelper::getObject($url);
static::assertSame($expectedType, $data->type);
if ($data->type == 'photo') {
static::assertTrue(!empty($data->thumbnail_url), 'Photo must have a URL.');
static::assertTrue(!empty($data->thumbnail_width), 'Photo must have a width.');
static::assertTrue(!empty($data->thumbnail_height), 'Photo must have a height.');
} elseif ($data->type == 'video') {
static::assertTrue(!empty($data->html), 'Video must have embedding HTML.');
static::assertTrue(!empty($data->thumbnail_url), 'Video should have a thumbnail.');
} else {
static::assertTrue(!empty($data->title), 'Page must have a title');
static::assertTrue(!empty($data->url), 'Page must have a URL');
}
if (!empty($data->thumbnail_url)) {
static::assertTrue(!empty($data->thumbnail_width), 'Thumbnail must list a width.');
static::assertTrue(!empty($data->thumbnail_height), 'Thumbnail must list a height.');
}
} catch (Exception $e) {
if ($expectedType == 'none') {
static::assertSame($expectedType, 'none', 'Should not have data for this URL.');
} else {
throw $e;
}
}
}
public static function sources()
{
return [
['https://notabug.org/', 'link'],
['http://www.youtube.com/watch?v=eUgLR232Cnw', 'video'],
[GNUSOCIAL_ENGINE_URL, 'link'],
['https://www.gnu.org/graphics/heckert_gnu.transp.small.png', 'photo'],
['http://vimeo.com/9283184', 'video'],
['http://leuksman.com/log/2010/10/29/statusnet-0-9-6-release/', 'none'],
['https://github.com/git/git/commit/85e9c7e1d42849c5c3084a9da748608468310c0e', 'link'],
];
}
}

View File

@ -0,0 +1,29 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the PACKAGE package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2020-08-04 01:05+0100\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"Language: \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=CHARSET\n"
"Content-Transfer-Encoding: 8bit\n"
#. TRANS: Exception. %s is the URL we tried to GET.
#: lib/embedhelper.php:87
#, php-format
msgid "Could not GET URL %s."
msgstr ""
#. TRANS: Plugin description.
#: EmbedPlugin.php:687
msgid "Plugin for using and representing oEmbed, OpenGraph and other data."
msgstr ""

View File

@ -0,0 +1,23 @@
# Translation file for GNU social - the free software social networking platform
# Copyright (C) 2015 - 2019 Free Software Foundation, Inc http://www.fsf.org
# This file is under https://www.gnu.org/licenses/agpl v3 or later
#
# Translators:
msgid ""
msgstr ""
"Project-Id-Version: GNU social\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2015-02-02 17:47+0100\n"
"PO-Revision-Date: 2015-02-07 09:29+0000\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: Afrikaans (http://www.transifex.com/gnu-social/gnu-social/language/af/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Language: af\n"
"Plural-Forms: nplurals=2; plural=(n != 1);\n"
#. TRANS: Plugin description.
#: OembedPlugin.php:190
msgid "Plugin for using and representing Oembed data."
msgstr ""

View File

@ -0,0 +1,23 @@
# Translation file for GNU social - the free software social networking platform
# Copyright (C) 2015 - 2019 Free Software Foundation, Inc http://www.fsf.org
# This file is under https://www.gnu.org/licenses/agpl v3 or later
#
# Translators:
msgid ""
msgstr ""
"Project-Id-Version: GNU social\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2015-02-02 17:47+0100\n"
"PO-Revision-Date: 2015-02-07 09:29+0000\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: Arabic (http://www.transifex.com/gnu-social/gnu-social/language/ar/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Language: ar\n"
"Plural-Forms: nplurals=6; plural=n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5;\n"
#. TRANS: Plugin description.
#: OembedPlugin.php:190
msgid "Plugin for using and representing Oembed data."
msgstr ""

View File

@ -0,0 +1,23 @@
# Translation file for GNU social - the free software social networking platform
# Copyright (C) 2015 - 2019 Free Software Foundation, Inc http://www.fsf.org
# This file is under https://www.gnu.org/licenses/agpl v3 or later
#
# Translators:
msgid ""
msgstr ""
"Project-Id-Version: GNU social\n"
"Report-Msgid-Bugs-To: \n"