2014-05-06 22:00:30 +01:00
< ? php
2021-04-19 19:51:05 +01:00
2021-10-10 09:26:18 +01:00
declare ( strict_types = 1 );
2021-04-19 19:51:05 +01:00
// {{{ License
2018-07-18 05:31:24 +01:00
// This file is part of GNU social - https://www.gnu.org/software/social
//
// GNU social is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// GNU social is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with GNU social. If not, see <http://www.gnu.org/licenses/>.
2021-04-19 19:51:05 +01:00
// }}}
2018-07-18 05:31:24 +01:00
/**
2019-07-06 04:31:02 +01:00
* OEmbed and OpenGraph implementation for GNU social
2018-07-18 05:31:24 +01:00
*
* @ package GNUsocial
2021-04-14 16:27:37 +01:00
*
2021-02-19 10:34:21 +00:00
* @ author Mikael Nordfeldth
2018-07-18 05:31:24 +01:00
* @ author Stephen Paul Weber
* @ author hannes
* @ author Mikael Nordfeldth
2021-02-19 10:34:21 +00:00
* @ author Miguel Dantas
2021-08-12 00:41:57 +01:00
* @ author Hugo Sales < hugo @ hsal . es >
2021-04-19 19:51:05 +01:00
* @ author Diogo Peralta Cordeiro < mail @ diogo . site >
2022-03-27 16:43:59 +01:00
* @ copyright 2014 - 2022 Free Software Foundation , Inc http :// www . fsf . org
2018-07-18 05:31:24 +01:00
* @ license https :// www . gnu . org / licenses / agpl . html GNU AGPL v3 or later
*/
2021-04-14 16:27:37 +01:00
namespace Plugin\Embed ;
2018-07-18 05:31:24 +01:00
2021-04-25 22:23:46 +01:00
use App\Core\Cache ;
2022-03-27 15:19:09 +01:00
use App\Core\DB ;
2021-04-25 22:23:46 +01:00
use App\Core\Event ;
2021-04-27 21:56:50 +01:00
use App\Core\GSFile ;
2021-04-25 22:23:46 +01:00
use App\Core\HTTPClient ;
2021-08-12 00:41:57 +01:00
use function App\Core\I18n\_m ;
2021-04-25 22:23:46 +01:00
use App\Core\Log ;
2021-04-19 19:51:05 +01:00
use App\Core\Modules\Plugin ;
2022-03-27 16:43:59 +01:00
use App\Core\Router ;
2021-08-12 00:41:57 +01:00
use App\Entity\Note ;
2021-04-27 21:56:50 +01:00
use App\Util\Common ;
2021-08-14 15:04:30 +01:00
use App\Util\Exception\ClientException ;
2021-04-25 22:23:46 +01:00
use App\Util\Exception\DuplicateFoundException ;
use App\Util\Exception\NotFoundException ;
2021-08-12 00:41:57 +01:00
use App\Util\Exception\ServerException ;
2021-04-28 16:03:17 +01:00
use App\Util\Formatting ;
2021-04-27 21:56:50 +01:00
use App\Util\TemporaryFile ;
2021-12-03 00:44:45 +00:00
use Component\Attachment\Entity\Attachment ;
use Component\Link\Entity\Link ;
2021-04-27 21:56:50 +01:00
use Embed\Embed as LibEmbed ;
2021-05-01 22:03:54 +01:00
use Exception ;
2021-04-25 22:23:46 +01:00
use Symfony\Component\HttpFoundation\Request ;
2021-12-03 00:44:45 +00:00
use Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface ;
use Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface ;
use Symfony\Contracts\HttpClient\Exception\ServerExceptionInterface ;
use Symfony\Contracts\HttpClient\Exception\TransportExceptionInterface ;
2019-07-07 13:26:10 +01:00
2018-07-18 05:31:24 +01:00
/**
2019-07-06 04:31:02 +01:00
* Base class for the Embed plugin that does most of the heavy lifting to get
2018-07-18 05:31:24 +01:00
* and display representations for remote content .
*
2021-02-19 10:34:21 +00:00
* @ copyright 2014 - 2021 Free Software Foundation , Inc http :// www . fsf . org
2018-07-18 05:31:24 +01:00
* @ license https :// www . gnu . org / licenses / agpl . html GNU AGPL v3 or later
*/
2021-04-19 19:51:05 +01:00
class Embed extends Plugin
2014-05-06 22:00:30 +01:00
{
2022-03-28 21:18:44 +01:00
public static function version () : string
2021-08-12 00:41:57 +01:00
{
2021-09-07 17:21:54 +01:00
return '3.0.1' ;
2021-08-12 00:41:57 +01:00
}
2021-04-25 22:23:46 +01:00
/**
* Settings which can be set in social . local . yaml
* WARNING , these are _regexps_ ( slashes added later ) . Always escape your dots and end ( '$' ) your strings
*/
2021-08-18 14:15:30 +01:00
public bool $check_whitelist = false ;
public bool $check_blacklist = false ;
2021-08-12 00:41:57 +01:00
public array $domain_whitelist = [
2021-08-18 14:15:30 +01:00
// hostname
'.*' , // Default to allowing any host
2019-07-06 04:31:02 +01:00
];
2021-08-18 14:15:30 +01:00
public array $domain_blacklist = [];
// Whether to maintain a copy of the original media or only a thumbnail of it
public bool $store_image = true ;
2021-08-14 15:04:30 +01:00
public ? int $thumbnail_width ;
public ? int $thumbnail_height ;
public ? int $max_size ;
public ? bool $smart_crop ;
2021-12-26 20:20:03 +00:00
// TODO: storeThumbs setting
2021-08-14 15:04:30 +01:00
2021-12-26 20:20:03 +00:00
private function getMaxFileSize () : int
2021-08-14 15:04:30 +01:00
{
2021-12-26 20:20:03 +00:00
return min ( Common :: config ( 'plugin_embed' , 'max_file_size' ), Common :: config ( 'attachments' , 'file_quota' ));
2021-08-14 15:04:30 +01:00
}
2018-07-18 05:31:24 +01:00
/**
* This code executes when GNU social creates the page routing , and we hook
2019-07-06 04:31:02 +01:00
* on this event to add our action handler for Embed .
2018-07-18 05:31:24 +01:00
*
2022-03-27 16:43:59 +01:00
* @ param Router $m the router that was initialized
2021-04-14 16:27:37 +01:00
*
2019-07-12 03:13:40 +01:00
* @ throws Exception
2018-07-18 05:31:24 +01:00
*/
2022-03-27 16:43:59 +01:00
public function onAddRoute ( Router $m ) : bool
2014-05-06 22:00:30 +01:00
{
2021-09-25 13:12:32 +01:00
$m -> connect ( 'oembed' , 'main/oembed' , Controller\OEmbed :: class );
2021-04-25 22:23:46 +01:00
return Event :: next ;
2014-05-06 22:00:30 +01:00
}
2021-04-25 22:23:46 +01:00
/**
* Insert oembed and opengraph tags in all HTML head elements
*/
2021-08-13 19:36:34 +01:00
public function onShowHeadElements ( Request $request , array & $result ) : bool
2014-05-06 22:00:30 +01:00
{
2021-04-25 22:23:46 +01:00
$matches = [];
2021-04-28 21:15:43 +01:00
preg_match ( ',/?([^/]+)/?(.*),' , $request -> getPathInfo (), $matches );
2021-08-13 19:36:34 +01:00
$url = match ( $matches [ 1 ]) {
'attachment' => " { $matches [ 1 ] } / { $matches [ 2 ] } " ,
default => null ,
};
2014-05-06 22:00:30 +01:00
2021-10-10 09:26:18 +01:00
if ( \is_null ( $url )) {
2019-07-14 23:35:11 +01:00
foreach ([ 'xml' , 'json' ] as $format ) {
2021-04-25 22:23:46 +01:00
$result [] = [
'link' => [
2021-07-22 20:49:12 +01:00
'rel' => 'alternate' ,
'type' => " application/ { $format } +oembed " ,
2021-09-25 13:12:32 +01:00
'href' => Router :: url ( 'oembed' , [ 'format' => $format , 'url' => $url ]),
2021-04-14 16:27:37 +01:00
'title' => 'oEmbed' ,
2021-07-22 20:49:12 +01:00
], ];
2019-07-14 23:35:11 +01:00
}
}
2021-04-25 22:23:46 +01:00
return Event :: next ;
2016-03-16 23:31:45 +00:00
}
2021-08-13 19:36:34 +01:00
/**
* Show this attachment enhanced with the corresponding Embed data , if available
*/
2021-08-13 20:09:20 +01:00
public function onViewLink ( array $vars , array & $res ) : bool
2021-08-13 19:36:34 +01:00
{
2021-08-13 20:09:20 +01:00
$link = $vars [ 'link' ];
2021-08-13 19:36:34 +01:00
try {
2021-10-10 09:26:18 +01:00
$embed = Cache :: get (
'attachment-embed-' . $link -> getId (),
fn () => DB :: findOneBy ( 'attachment_embed' , [ 'link_id' => $link -> getId ()]),
);
2021-08-13 19:36:34 +01:00
} catch ( DuplicateFoundException $e ) {
2021-12-02 21:23:16 +00:00
Log :: warning ( $e -> getMessage ());
2021-08-13 19:36:34 +01:00
return Event :: next ;
} catch ( NotFoundException ) {
2021-08-13 20:09:20 +01:00
Log :: debug ( " Embed doesn't have a representation for the link id= { $link -> getId () } . Must have been stored before the plugin was enabled. " );
2021-08-13 19:36:34 +01:00
return Event :: next ;
}
2021-08-14 15:04:30 +01:00
$attributes = $embed -> getImageHTMLAttributes ();
2021-08-13 19:36:34 +01:00
2021-10-10 09:26:18 +01:00
$res [] = Formatting :: twigRenderFile (
'embed/embedView.html.twig' ,
2021-12-27 02:47:04 +00:00
[ 'embed' => $embed , 'attributes' => $attributes , 'link' => $link , 'note' => $vars [ 'note' ]],
2021-10-10 09:26:18 +01:00
);
2021-08-13 19:36:34 +01:00
return Event :: stop ;
}
2014-05-06 22:00:30 +01:00
/**
2021-04-25 22:23:46 +01:00
* Save embedding information for an Attachment , if applicable .
2014-05-06 22:00:30 +01:00
*
2021-08-18 14:15:30 +01:00
* @ throws DuplicateFoundException
2014-05-06 22:00:30 +01:00
*/
2021-08-13 20:09:20 +01:00
public function onNewLinkFromNote ( Link $link , Note $note ) : bool
2014-05-06 22:00:30 +01:00
{
2021-08-12 00:41:57 +01:00
// Only handle text mime
2021-08-13 20:09:20 +01:00
$mimetype = $link -> getMimetype ();
2021-12-03 00:44:45 +00:00
if ( \is_null ( $mimetype ) || ! ( Formatting :: startsWith ( $mimetype , 'text/html' ) || Formatting :: startsWith ( $mimetype , 'application/xhtml+xml' ))) {
2021-08-12 00:41:57 +01:00
return Event :: next ;
}
// Ignore if already handled
2021-08-13 20:09:20 +01:00
$attachment_embed = DB :: find ( 'attachment_embed' , [ 'link_id' => $link -> getId ()]);
2021-10-10 09:26:18 +01:00
if ( ! \is_null ( $attachment_embed )) {
2021-08-12 00:41:57 +01:00
return Event :: next ;
}
2021-08-13 19:36:34 +01:00
// If an attachment already exist, do not create an Embed for it. Some other plugin must have done things
2021-09-05 20:42:40 +01:00
$attachment_to_link = DB :: find ( 'attachment_to_link' , [ 'link_id' => $link -> getId ()]);
2021-10-10 09:26:18 +01:00
if ( ! \is_null ( $attachment_to_link )) {
2021-09-05 20:42:40 +01:00
$attachment_id = $attachment_to_link -> getAttachmentId ();
2021-08-12 00:41:57 +01:00
try {
2021-08-13 19:36:34 +01:00
$attachment = DB :: findOneBy ( 'attachment' , [ 'id' => $attachment_id ]);
$attachment -> livesIncrementAndGet ();
return Event :: next ;
2021-10-10 09:26:18 +01:00
} catch ( DuplicateFoundException | NotFoundException $e ) {
2021-12-26 20:20:03 +00:00
Log :: error ( $e -> getMessage (), context : [ $e ]);
2014-05-06 22:00:30 +01:00
}
}
2021-08-13 19:36:34 +01:00
// Create an Embed representation for this URL
2021-08-13 20:09:20 +01:00
$embed_data = $this -> getEmbedLibMetadata ( $link -> getUrl ());
$embed_data [ 'link_id' ] = $link -> getId ();
$img_data = $this -> downloadThumbnail ( $embed_data [ 'thumbnail_url' ]);
2021-08-13 19:36:34 +01:00
switch ( $img_data ) {
case null : // URL isn't usable
$embed_data [ 'thumbnail_url' ] = null ;
// no break
case false : // Thumbnail isn't acceptable
2021-08-13 20:09:20 +01:00
DB :: persist ( $attachment = Attachment :: create ([ 'mimetype' => $link -> getMimetype ()]));
2021-08-13 19:36:34 +01:00
Event :: handle ( 'AttachmentStoreNew' , [ & $attachment ]);
break ;
default : // String is valid image data
$temp_file = new TemporaryFile ();
$temp_file -> write ( $img_data );
2021-08-14 15:04:30 +01:00
try {
2021-09-22 15:08:30 +01:00
$attachment = GSFile :: storeFileAsAttachment ( $temp_file );
2021-08-14 15:04:30 +01:00
$embed_data [ 'attachment_id' ] = $attachment -> getId ();
} catch ( ClientException ) {
DB :: persist ( $attachment = Attachment :: create ([ 'mimetype' => $link -> getMimetype ()]));
Event :: handle ( 'AttachmentStoreNew' , [ & $attachment ]);
}
2021-08-13 19:36:34 +01:00
}
$embed_data [ 'attachment_id' ] = $attachment -> getId ();
DB :: persist ( Entity\AttachmentEmbed :: create ( $embed_data ));
DB :: flush ();
return Event :: stop ;
2014-05-06 22:00:30 +01:00
}
2021-08-18 14:15:30 +01:00
/**
* @ return bool true if allowed by the lists , false otherwise
*/
private function allowedLink ( string $url ) : bool
{
$passed_whitelist = ! $this -> check_whitelist ;
$passed_blacklist = ! $this -> check_blacklist ;
if ( $this -> check_whitelist ) {
$passed_whitelist = false ; // don't trust be default
2021-10-10 09:26:18 +01:00
$host = parse_url ( $url , \PHP_URL_HOST );
2021-08-18 14:15:30 +01:00
foreach ( $this -> domain_whitelist as $regex => $provider ) {
if ( preg_match ( " / { $regex } / " , $host )) {
$passed_whitelist = true ; // we trust this source
}
}
}
if ( $this -> check_blacklist ) {
// assume it passed by default
2021-10-10 09:26:18 +01:00
$host = parse_url ( $url , \PHP_URL_HOST );
2021-08-18 14:15:30 +01:00
foreach ( $this -> domain_blacklist as $regex => $provider ) {
if ( preg_match ( " / { $regex } / " , $host )) {
$passed_blacklist = false ; // we blocked this source
}
}
}
return $passed_whitelist && $passed_blacklist ;
}
2021-04-25 22:23:46 +01:00
/**
2021-08-12 00:41:57 +01:00
* Perform an oEmbed or OpenGraph lookup for the given $url .
*
* Some known hosts are whitelisted with API endpoints where we
* know they exist but autodiscovery data isn ' t available .
*
* Throws exceptions on failure .
2021-04-25 22:23:46 +01:00
*/
2021-08-13 19:36:34 +01:00
private function getEmbedLibMetadata ( string $url ) : array
2014-05-06 22:00:30 +01:00
{
2021-08-13 19:36:34 +01:00
Log :: info ( " Trying to find Embed data for { $url } with 'oscarotero/Embed' " );
$embed = new LibEmbed ();
$info = $embed -> get ( $url );
$metadata [ 'title' ] = $info -> title ;
$metadata [ 'description' ] = $info -> description ;
$metadata [ 'author_name' ] = $info -> authorName ;
2021-09-07 17:21:54 +01:00
$root_url = parse_url ( $url );
$root_url = " { $root_url [ 'scheme' ] } :// { $root_url [ 'host' ] } " ;
2021-12-03 00:44:45 +00:00
$metadata [ 'author_url' ] = $info -> authorUrl ? ( string ) $info -> authorUrl : $root_url ;
2021-12-02 22:10:11 +00:00
$metadata [ 'provider_name' ] = $info -> providerName ;
2021-12-26 22:17:26 +00:00
$metadata [ 'provider_url' ] = ( string ) ( $info -> providerUrl ? ? $metadata [ 'author_name' ]);
2021-08-13 19:36:34 +01:00
2021-10-10 09:26:18 +01:00
if ( ! \is_null ( $info -> image )) {
2021-08-13 19:36:34 +01:00
$thumbnail_url = ( string ) $info -> image ;
} else {
$thumbnail_url = ( string ) $info -> favicon ;
2014-05-06 22:00:30 +01:00
}
2021-04-28 16:03:17 +01:00
2021-08-13 19:36:34 +01:00
// Check thumbnail URL validity
$metadata [ 'thumbnail_url' ] = $thumbnail_url ;
return self :: normalizeEmbedLibMetadata ( $metadata );
2021-08-12 00:41:57 +01:00
}
2021-04-28 16:03:17 +01:00
2021-08-12 00:41:57 +01:00
/**
* Normalize fetched info .
*/
2021-08-13 19:36:34 +01:00
private static function normalizeEmbedLibMetadata ( array $metadata ) : array
2021-08-12 00:41:57 +01:00
{
if ( isset ( $metadata [ 'thumbnail_url' ])) {
// sometimes sites serve the path, not the full URL, for images
// let's "be liberal in what you accept from others"!
// add protocol and host if the thumbnail_url starts with /
if ( $metadata [ 'thumbnail_url' ][ 0 ] == '/' ) {
2021-09-07 17:21:54 +01:00
$metadata [ 'thumbnail_url' ] = " { $metadata [ 'provider_url' ] } { $metadata [ 'thumbnail_url' ] } " ;
2021-08-12 00:41:57 +01:00
}
// Some wordpress opengraph implementations sometimes return a white blank image
// no need for us to save that!
if ( $metadata [ 'thumbnail_url' ] == 'https://s0.wp.com/i/blank.jpg' ) {
$metadata [ 'thumbnail_url' ] = null ;
}
}
return $metadata ;
2014-05-06 22:00:30 +01:00
}
2016-03-16 23:31:45 +00:00
2018-07-18 05:31:24 +01:00
/**
2021-08-13 19:36:34 +01:00
* Private helper that :
* - checks if given URL is valid and is in fact an image ( basic test ), returns null if not ;
* - checks if respects file quota and whitelist / blacklist , returns false if not ;
* - downloads the thumbnail , returns a string if successful .
2018-07-18 05:31:24 +01:00
*
2021-08-13 20:09:20 +01:00
* @ param string $url URL to the remote thumbnail
2018-07-18 05:31:24 +01:00
*/
2021-08-13 20:09:20 +01:00
private function downloadThumbnail ( string $url ) : bool | string | null
2015-01-25 01:34:40 +00:00
{
2021-08-13 19:36:34 +01:00
// Is this a valid URL?
2021-08-13 20:09:20 +01:00
if ( ! Common :: isValidHttpUrl ( $url )) {
Log :: debug ( " Invalid URL ( { $url } ) in Embed->downloadThumbnail. " );
2021-08-13 19:36:34 +01:00
return null ;
2015-01-25 01:34:40 +00:00
}
2021-08-13 19:36:34 +01:00
// Is this URL trusted?
2021-08-13 20:09:20 +01:00
if ( ! $this -> allowedLink ( $url )) {
Log :: info ( " Blocked URL ( { $url } ) in Embed->downloadThumbnail. " );
2021-04-25 22:23:46 +01:00
return false ;
2018-07-18 05:31:24 +01:00
}
2021-08-13 19:36:34 +01:00
// Validate if the URL really does point to a remote image
2021-12-03 00:44:45 +00:00
$head = HTTPClient :: head ( $url );
2021-12-02 22:49:13 +00:00
try {
$headers = $head -> getHeaders ();
} catch ( ClientExceptionInterface | RedirectionExceptionInterface | ServerExceptionInterface | TransportExceptionInterface $e ) {
2021-12-26 20:20:03 +00:00
Log :: debug ( 'Embed->downloadThumbnail@HTTPHead->getHeaders: ' . $e -> getMessage (), [ $e ]);
2021-12-02 22:49:13 +00:00
return null ;
}
2021-08-13 19:36:34 +01:00
if ( empty ( $headers [ 'content-type' ]) || GSFile :: mimetypeMajor ( $headers [ 'content-type' ][ 0 ]) !== 'image' ) {
2021-08-13 20:09:20 +01:00
Log :: debug ( " URL ( { $url } ) doesn't point to an image (content-type: " . ( ! empty ( $headers [ 'content-type' ][ 0 ]) ? $headers [ 'content-type' ][ 0 ] : 'not available' ) . ') in Embed->downloadThumbnail.' );
2021-08-13 19:36:34 +01:00
return null ;
2019-08-19 01:40:31 +01:00
}
2021-08-13 19:36:34 +01:00
// Does it respect the file quota?
2021-12-02 21:23:16 +00:00
$file_size = $headers [ 'content-length' ][ 0 ] ? ? null ;
2021-12-26 20:20:03 +00:00
$max_size = $this -> getMaxFileSize ();
2021-12-03 00:44:45 +00:00
if ( \is_null ( $file_size ) || $file_size > $max_size ) {
2021-12-26 20:20:03 +00:00
Log :: debug ( " Went to download remote thumbnail of size { $file_size } but the plugin's filesize limit is { $max_size } so we aborted in Embed->downloadThumbnail. " );
2021-08-13 19:36:34 +01:00
return false ;
2018-07-18 05:31:24 +01:00
}
2021-08-13 19:36:34 +01:00
// Download and return the file
2021-08-13 20:09:20 +01:00
Log :: debug ( " Downloading remote thumbnail from URL: { $url } in Embed->downloadThumbnail. " );
return HTTPClient :: get ( $url ) -> getContent ();
2021-04-27 21:56:50 +01:00
}
2021-09-15 16:49:29 +01:00
public function onAttachmentGetBestTitle ( Attachment $attachment , Note $note , ? string & $title )
{
try {
$embed = DB :: findOneBy ( 'attachment_embed' , [ 'attachment_id' => $attachment -> getId ()]);
$title = $embed -> getTitle ();
return Event :: stop ;
} catch ( NotFoundException ) {
}
return Event :: next ;
}
2021-04-27 21:56:50 +01:00
/**
2021-08-12 00:41:57 +01:00
* Event raised when GNU social polls the plugin for information about it .
* Adds this plugin ' s version information to $versions array
2021-04-27 21:56:50 +01:00
*
2021-09-06 23:47:28 +01:00
* @ param array $versions inherited from parent
2021-04-27 21:56:50 +01:00
*
2021-08-13 19:36:34 +01:00
* @ throws ServerException
*
2021-08-12 00:41:57 +01:00
* @ return bool true hook value
2021-04-27 21:56:50 +01:00
*/
2021-08-12 00:41:57 +01:00
public function onPluginVersion ( array & $versions ) : bool
2021-04-27 21:56:50 +01:00
{
2021-08-12 00:41:57 +01:00
$versions [] = [
2021-10-10 09:26:18 +01:00
'name' => 'Embed' ,
'version' => $this -> version (),
'author' => 'Mikael Nordfeldth, Hugo Sales, Diogo Peralta Cordeiro' ,
'homepage' => GNUSOCIAL_PROJECT_URL ,
'description' , // TRANS: Plugin description. => _m('Plugin for using and representing oEmbed, OpenGraph and other data.'),
2021-08-12 00:41:57 +01:00
];
return Event :: next ;
2014-05-06 22:00:30 +01:00
}
}