upstream V3 development https://www.gnusocial.rocks/v3
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

508 lines
18 KiB

  1. <?php
  2. declare(strict_types = 1);
  3. // {{{ License
  4. // This file is part of GNU social - https://www.gnu.org/software/social
  5. //
  6. // GNU social is free software: you can redistribute it and/or modify
  7. // it under the terms of the GNU Affero General Public License as published by
  8. // the Free Software Foundation, either version 3 of the License, or
  9. // (at your option) any later version.
  10. //
  11. // GNU social is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. // GNU Affero General Public License for more details.
  15. //
  16. // You should have received a copy of the GNU Affero General Public License
  17. // along with GNU social. If not, see <http://www.gnu.org/licenses/>.
  18. // }}}
  19. /**
  20. * String formatting utilities
  21. *
  22. * @package GNUsocial
  23. * @category Util
  24. *
  25. * @author Hugo Sales <hugo@hsal.es>
  26. * @copyright 2020-2021 Free Software Foundation, Inc http://www.fsf.org
  27. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  28. */
  29. namespace App\Util;
  30. use App\Core\DB;
  31. use App\Core\Event;
  32. use App\Core\Log;
  33. use App\Entity\Actor;
  34. use App\Util\Exception\NicknameException;
  35. use App\Util\Exception\ServerException;
  36. use Component\Circle\Circle;
  37. use Component\Circle\Entity\ActorCircle;
  38. use Component\Group\Entity\LocalGroup;
  39. use Component\Tag\Tag;
  40. use Exception;
  41. use Functional as F;
  42. use InvalidArgumentException;
  43. abstract class Formatting
  44. {
  45. private static ?\Twig\Environment $twig;
  46. public static function setTwig(\Twig\Environment $twig)
  47. {
  48. self::$twig = $twig;
  49. }
  50. public static function twigRenderString(string $template, array $context): string
  51. {
  52. return self::$twig->createTemplate($template, null)->render($context);
  53. }
  54. public static function twigRenderFile(string $template_path, array $context): string
  55. {
  56. return self::$twig->render($template_path, $context);
  57. }
  58. /**
  59. * Normalize path by converting \ to /
  60. */
  61. public static function normalizePath(string $path): string
  62. {
  63. return preg_replace(',(/|\\\\)+,', '/', $path);
  64. }
  65. /**
  66. * Get plugin name from it's path, or null if not a plugin
  67. */
  68. public static function moduleFromPath(string $path): ?string
  69. {
  70. foreach (['/plugins/', '/components/'] as $mod_p) {
  71. $module = mb_strpos($path, $mod_p);
  72. if ($module === false) {
  73. continue;
  74. }
  75. $cut = $module + mb_strlen($mod_p);
  76. $cut2 = mb_strpos($path, '/', $cut);
  77. if ($cut2) {
  78. $final = mb_substr($path, $cut, $cut2 - $cut);
  79. } else {
  80. // We might be running directly from the plugins dir?
  81. // If so, there's no place to store locale info.
  82. $m = 'The GNU social install dir seems to contain a piece named \'plugin\' or \'component\'';
  83. Log::critical($m);
  84. throw new ServerException($m);
  85. }
  86. return $final;
  87. }
  88. return null;
  89. }
  90. /**
  91. * Check whether $haystack starts with $needle
  92. *
  93. * @param array|string $haystack if array, check that all strings start with $needle (see below)
  94. * @param array|string $needle if array, check that one of the $needles is found
  95. */
  96. public static function startsWith(array|string $haystack, array|string $needle): bool
  97. {
  98. if (\is_string($haystack)) {
  99. return F\some(\is_array($needle) ? $needle : [$needle], fn ($n) => str_starts_with($haystack, $n));
  100. } else {
  101. return F\every($haystack, fn ($haystack) => self::startsWith($haystack, $needle));
  102. }
  103. }
  104. /**
  105. * Check whether $haystack ends with $needle
  106. *
  107. * @param array|string $haystack if array, check that all strings end with $needle (see below)
  108. * @param array|string $needle if array, check that one of the $needles is found
  109. */
  110. public static function endsWith(array|string $haystack, array|string $needle): bool
  111. {
  112. if (\is_string($haystack)) {
  113. return F\some(\is_array($needle) ? $needle : [$needle], fn ($n) => str_ends_with($haystack, $n));
  114. } else {
  115. return F\every($haystack, fn ($haystack) => self::endsWith($haystack, $needle));
  116. }
  117. }
  118. /**
  119. * If $haystack starts with $needle, remove it from the beginning
  120. */
  121. public static function removePrefix(string $haystack, string $needle)
  122. {
  123. return self::startsWith($haystack, $needle) ? mb_substr($haystack, mb_strlen($needle)) : $haystack;
  124. }
  125. /**
  126. * If $haystack ends with $needle, remove it from the end
  127. */
  128. public static function removeSuffix(string $haystack, string $needle): string
  129. {
  130. return self::endsWith($haystack, $needle) && !empty($needle) ? mb_substr($haystack, 0, -mb_strlen($needle)) : $haystack;
  131. }
  132. public static function camelCaseToSnakeCase(string $str): string
  133. {
  134. return mb_strtolower(preg_replace('/([a-z])([A-Z])/', '$1_$2', $str));
  135. }
  136. public static function snakeCaseToCamelCase(string $str): string
  137. {
  138. return implode('', F\map(preg_split('/[\b_]/', $str), F\ary('ucfirst', 1)));
  139. }
  140. public static function snakeCaseToPascalCase(string $str): string
  141. {
  142. return ucfirst(self::snakeCaseToCamelCase($str));
  143. }
  144. /**
  145. * Indent $in, a string or array, $level levels
  146. *
  147. * @param array|string $in
  148. * @param int $level How many levels of indentation
  149. * @param int $count How many spaces per indentation
  150. */
  151. public static function indent($in, int $level = 1, int $count = 2): string
  152. {
  153. if (\is_string($in)) {
  154. return self::indent(explode("\n", $in), $level, $count);
  155. } elseif (\is_array($in)) {
  156. $indent = str_repeat(' ', $count * $level);
  157. return implode("\n", F\map(
  158. F\select(
  159. $in,
  160. F\ary(fn ($s) => $s != '', 1),
  161. ),
  162. fn ($val) => F\concat($indent . $val),
  163. ));
  164. }
  165. throw new InvalidArgumentException('Formatting::indent\'s first parameter must be either an array or a string. Input was: ' . $in);
  166. }
  167. public const SPLIT_BY_SPACE = ' ';
  168. public const JOIN_BY_SPACE = ' ';
  169. public const SPLIT_BY_COMMA = ', ';
  170. public const JOIN_BY_COMMA = ', ';
  171. public const SPLIT_BY_BOTH = '/[, ]/';
  172. /**
  173. * Convert scalars, objects implementing __toString or arrays to strings
  174. */
  175. public static function toString($value, string $join_type = self::JOIN_BY_COMMA): string
  176. {
  177. if (!\in_array($join_type, [static::JOIN_BY_SPACE, static::JOIN_BY_COMMA])) {
  178. throw new Exception('Formatting::toString received invalid join option');
  179. } else {
  180. if (!\is_array($value)) {
  181. return (string) $value;
  182. } else {
  183. return implode($join_type, $value);
  184. }
  185. }
  186. }
  187. /**
  188. * Convert a user supplied string to array and return whether the conversion was successful
  189. */
  190. public static function toArray(string $input, &$output, string $split_type = self::SPLIT_BY_COMMA): bool
  191. {
  192. if (!\in_array($split_type, [static::SPLIT_BY_SPACE, static::SPLIT_BY_COMMA, static::SPLIT_BY_BOTH])) {
  193. throw new Exception('Formatting::toArray received invalid split option');
  194. }
  195. if ($input == '') {
  196. $output = [];
  197. return true;
  198. }
  199. $matches = [];
  200. if (preg_match('/^ *\[?([^,]+(, ?[^,]+)*)\]? *$/', $input, $matches)) {
  201. switch ($split_type) {
  202. case self::SPLIT_BY_BOTH:
  203. $arr = preg_split($split_type, $matches[1], 0, \PREG_SPLIT_NO_EMPTY);
  204. break;
  205. case self::SPLIT_BY_COMMA:
  206. $arr = preg_split('/, ?/', $matches[1]);
  207. break;
  208. default:
  209. $arr = explode($split_type[0], $matches[1]);
  210. }
  211. $output = str_replace([' \'', '\'', ' "', '"'], '', $arr);
  212. $output = F\map($output, F\ary('trim', 1));
  213. return true;
  214. }
  215. return false;
  216. }
  217. /**
  218. * Like \explode, but with multiple options for the delimiter. Will use the first one present
  219. */
  220. public static function explode(array $separators, string $string)
  221. {
  222. foreach ($separators as $sep) {
  223. if (str_contains($string, $sep)) {
  224. return explode($sep, $string);
  225. }
  226. }
  227. return $string;
  228. }
  229. /**
  230. * Render a plain text note content into HTML, extracting links and tags
  231. */
  232. public static function renderPlainText(string $text, ?string $language = null): string
  233. {
  234. $text = self::quoteAndRemoveControlCodes($text);
  235. // Split \n\n into paragraphs, process each paragraph and merge
  236. return implode("\n", F\map(explode("\n\n", $text), function (string $paragraph) use ($language) {
  237. $paragraph = nl2br($paragraph, use_xhtml: false);
  238. Event::handle('RenderPlainTextNoteContent', [&$paragraph, $language]);
  239. return HTML::html(['p' => [$paragraph]], options: ['raw' => true, 'indent' => false]);
  240. }));
  241. }
  242. /**
  243. * Quote HTML special chars and strip Unicode text
  244. * formatting/direction codes. This is can be pretty dangerous for
  245. * visualisation of text or be used for mischief
  246. */
  247. public static function quoteAndRemoveControlCodes(string $text): string
  248. {
  249. // Quote special chars
  250. $text = htmlspecialchars($text, flags: \ENT_QUOTES | \ENT_SUBSTITUTE, double_encode: false);
  251. // Normalize newlines to strictly \n and remove ASCII control codes
  252. return preg_replace(['/[\x{0}-\x{8}\x{b}-\x{c}\x{e}-\x{19}\x{200b}-\x{200f}\x{202a}-\x{202e}]/u', '/\R/u'], ['', "\n"], $text);
  253. }
  254. /**
  255. * Convert $str to it's closest ASCII representation
  256. */
  257. public static function slugify(string $str, int $length = 64): string
  258. {
  259. // php-intl is highly recommended...
  260. if (!\function_exists('transliterator_transliterate')) {
  261. $str = preg_replace('/[^\pL\pN]/u', '', $str);
  262. $str = mb_convert_case($str, \MB_CASE_LOWER, 'UTF-8');
  263. return mb_substr($str, 0, $length);
  264. }
  265. $str = transliterator_transliterate('Any-Latin;' // any charset to latin compatible
  266. . 'NFD;' // decompose
  267. . '[:Nonspacing Mark:] Remove;' // remove nonspacing marks (accents etc.)
  268. . 'NFC;' // composite again
  269. . '[:Punctuation:] Remove;' // remove punctuation (.,¿? etc.)
  270. . 'Lower();' // turn into lowercase
  271. . 'Latin-ASCII;', // get ASCII equivalents (ð to d for example)
  272. $str, );
  273. return mb_substr(preg_replace('/[^\pL\pN]/u', '', $str), 0, $length);
  274. }
  275. /**
  276. * Find @-mentions in the given text, using the given notice object as context.
  277. * References will be resolved with common_relative_profile() against the user
  278. * who posted the notice.
  279. *
  280. * Note the return data format is internal, to be used for building links and
  281. * such. Should not be used directly; rather, call common_linkify_mentions().
  282. *
  283. * @param Actor $actor the Actor that is sending the current text
  284. */
  285. public static function findMentions(string $text, Actor $actor): array
  286. {
  287. $mentions = [];
  288. // XXX: We remove <span> because when content is in html the tag comes as #<span>hashtag</span>
  289. $text = str_replace('<span>', '', $text);
  290. if (Event::handle('StartFindMentions', [$actor, $text, &$mentions])) {
  291. // @person mentions
  292. $person_matches = self::findMentionsRaw($text, '@');
  293. foreach ($person_matches as $match) {
  294. try {
  295. $nickname = Nickname::normalize($match[0], check_already_used: false, check_is_allowed: false);
  296. } catch (NicknameException) {
  297. // Bogus match? Drop it.
  298. continue;
  299. }
  300. $mentioned = $actor->findRelativeActor($nickname);
  301. if ($mentioned instanceof Actor) {
  302. $url = $mentioned->getUri(); // prefer the URI as URL, if it is one.
  303. if (!Common::isValidHttpUrl($url)) {
  304. $url = $mentioned->getUrl();
  305. }
  306. $mention = [
  307. 'mentioned' => [$mentioned],
  308. 'type' => 'mention',
  309. 'text' => $match[0],
  310. 'position' => $match[1],
  311. 'length' => mb_strlen($match[0]),
  312. 'title' => $mentioned->getFullname() ?? $mentioned->getNickname(),
  313. 'url' => $url,
  314. ];
  315. $mentions[] = $mention;
  316. }
  317. }
  318. // @#circle/self-tag => mention of all subscribed circles tagged 'tag'
  319. $tag_matches = [];
  320. preg_match_all(
  321. Circle::TAG_CIRCLE_REGEX,
  322. $text,
  323. $tag_matches,
  324. \PREG_OFFSET_CAPTURE,
  325. );
  326. foreach ($tag_matches[1] as $tag_match) {
  327. $tag = Tag::extract($tag_match[0]);
  328. if (!Tag::validate($tag)) {
  329. continue; // Ignore invalid tags
  330. }
  331. $ac = DB::findOneBy(ActorCircle::class, [
  332. 'tag' => $tag, // Notify circle of name tag WHERE
  333. 'tagger' => $actor->getID(), // Circle was created by Actor
  334. ], return_null: true);
  335. if (\is_null($ac) || $ac->getPrivate()) {
  336. continue;
  337. }
  338. $mentions[] = [
  339. 'mentioned' => $ac->getSubscribedActors(),
  340. 'type' => 'list',
  341. 'text' => $tag_match[0],
  342. 'position' => $tag_match[1],
  343. 'length' => mb_strlen($tag_match[0]),
  344. 'url' => $ac->getUrl(),
  345. ];
  346. }
  347. // !group/!org mentions
  348. $group_matches = self::findMentionsRaw($text, '!');
  349. foreach ($group_matches as $match) {
  350. try {
  351. $nickname = Nickname::normalize($match[0], check_already_used: false, check_is_allowed: false);
  352. } catch (NicknameException) {
  353. // Bogus match? Drop it.
  354. continue;
  355. }
  356. $mentioned = LocalGroup::getActorByNickname($nickname);
  357. if ($mentioned instanceof Actor) {
  358. $url = $mentioned->getUri(); // prefer the URI as URL, if it is one.
  359. if (!Common::isValidHttpUrl($url)) {
  360. $url = $mentioned->getUrl();
  361. }
  362. $mentions[] = [
  363. 'mentioned' => [$mentioned],
  364. 'type' => 'group',
  365. 'text' => $match[0],
  366. 'position' => $match[1],
  367. 'length' => mb_strlen($match[0]),
  368. 'title' => $mentioned->getFullname() ?? $mentioned->getNickname(),
  369. 'url' => $url,
  370. ];
  371. }
  372. }
  373. Event::handle('EndFindMentions', [$actor, $text, &$mentions]);
  374. }
  375. return $mentions;
  376. }
  377. /**
  378. * Does the actual regex pulls to find @-mentions in text.
  379. * Should generally not be called directly; for use in common_find_mentions.
  380. *
  381. * @param string $preMention Character(s) that signals a mention ('@', '!'...)
  382. *
  383. * @return array of PCRE match arrays
  384. */
  385. private static function findMentionsRaw(string $text, string $preMention = '@'): array
  386. {
  387. $atmatches = [];
  388. // the regexp's "(?!\@)" makes sure it doesn't matches the single "@remote" in "@remote@server.com"
  389. preg_match_all(
  390. '/' . Nickname::BEFORE_MENTIONS . preg_quote($preMention, '/') . '(' . Nickname::DISPLAY_FMT . ')\b(?!\@)/',
  391. $text,
  392. $atmatches,
  393. \PREG_OFFSET_CAPTURE,
  394. );
  395. return $atmatches[1];
  396. }
  397. /**
  398. * Finds @-mentions within the partially-rendered text section and
  399. * turns them into live links.
  400. *
  401. * Should generally not be called except from common_render_content().
  402. *
  403. * @param string $text partially-rendered HTML
  404. * @param Actor $author the Actor that is composing the current notice
  405. *
  406. * @return array [partially-rendered HTML, array of mentions]
  407. */
  408. public static function linkifyMentions(string $text, Actor $author, ?string $locale): array
  409. {
  410. $mentions = self::findMentions($text, $author);
  411. // We need to go through in reverse order by position,
  412. // so our positions stay valid despite our fudging with the
  413. // string!
  414. $points = [];
  415. foreach ($mentions as $mention) {
  416. $points[$mention['position']] = $mention;
  417. }
  418. krsort($points);
  419. foreach ($points as $position => $mention) {
  420. $linkText = self::linkifyMentionArray($mention);
  421. $text = substr_replace($text, $linkText, $position, $mention['length']);
  422. }
  423. return [$text, $mentions];
  424. }
  425. public static function linkifyMentionArray(array $mention)
  426. {
  427. $output = null;
  428. if (Event::handle('StartLinkifyMention', [$mention, &$output]) === Event::next) {
  429. $attrs = [
  430. 'href' => $mention['url'],
  431. 'class' => 'h-card u-url p-nickname ' . $mention['type'], // https://microformats.org/wiki/h-card
  432. ];
  433. if (!empty($mention['title'])) {
  434. $attrs['title'] = $mention['title'];
  435. }
  436. $output = HTML::html(['span' => ['attrs' => ['class' => 'h-card'],
  437. HTML::html(['a' => ['attrs' => $attrs, $mention['title'] ?? $mention['text']]], options: ['indent' => false]),
  438. ]], options: ['indent' => false, 'raw' => true]);
  439. Event::handle('EndLinkifyMention', [$mention, &$output]);
  440. }
  441. return $output;
  442. }
  443. /**
  444. * Split words by `-`, `_` or lower to upper case transitions
  445. */
  446. public static function splitWords(string $words): array
  447. {
  448. return preg_split('/-|_|(?<=\p{Ll})(?=\p{Lu})/u', $words);
  449. }
  450. }