From 2d057024b9f9632806b425f037c3632563eaf8c2 Mon Sep 17 00:00:00 2001 From: Hugo Sales Date: Wed, 24 Nov 2021 15:51:01 +0000 Subject: [PATCH] [TAGS][ENTITY][Note] Properly store the note language, pass it along when rendering content. Add mechanism for stemming tags, with fallback to simply slug-ifying them --- components/Posting/Posting.php | 64 +++++++++++++++---------------- components/Tag/Controller/Tag.php | 13 +++++-- components/Tag/Tag.php | 30 ++++++++++----- src/Entity/Actor.php | 5 +-- src/Entity/Language.php | 20 +++++++++- src/Entity/Note.php | 40 +++++++++---------- src/Util/Formatting.php | 8 ++-- 7 files changed, 107 insertions(+), 73 deletions(-) diff --git a/components/Posting/Posting.php b/components/Posting/Posting.php index f92e8901e5..c330fb5820 100644 --- a/components/Posting/Posting.php +++ b/components/Posting/Posting.php @@ -1,6 +1,6 @@ getActor(); + $actor = $user->getActor(); $actor_id = $user->getId(); - $to_tags = []; - $tags = Cache::get( + $to_tags = []; + $tags = Cache::get( "actor-circle-{$actor_id}", - fn() => DB::dql('select c.tag from App\Entity\ActorCircle c where c.tagger = :tagger', ['tagger' => $actor_id]), + fn () => DB::dql('select c.tag from App\Entity\ActorCircle c where c.tagger = :tagger', ['tagger' => $actor_id]), ); foreach ($tags as $t) { - $t = $t['tag']; + $t = $t['tag']; $to_tags[$t] = $t; } @@ -94,7 +94,7 @@ class Posting extends Component Event::handle('PostingAvailableContentTypes', [&$available_content_types]); $context_actor = null; // This is where we'd plug in the group in which the actor is posting, or whom they're replying to - $form_params = [ + $form_params = [ ['to', ChoiceType::class, ['label' => _m('To:'), 'multiple' => false, 'expanded' => false, 'choices' => $to_tags]], ['visibility', ChoiceType::class, ['label' => _m('Visibility:'), 'multiple' => false, 'expanded' => false, 'data' => 'public', 'choices' => [_m('Public') => 'public', _m('Instance') => 'instance', _m('Private') => 'private']]], ['content', TextareaType::class, ['label' => _m('Content:'), 'data' => $initial_content, 'attr' => ['placeholder' => _m($placeholder)], 'constraints' => [new Length(['max' => Common::config('site', 'text_limit')])]]], @@ -102,25 +102,25 @@ class Posting extends Component FormFields::language($actor, $context_actor, label: 'Note language:', help: 'The language in which you wrote this note, so others can see it'), ]; - if (count($available_content_types) > 1) { + if (\count($available_content_types) > 1) { $form_params[] = ['content_type', ChoiceType::class, [ - 'label' => _m('Text format:'), 'multiple' => false, 'expanded' => false, - 'data' => $available_content_types[array_key_first($available_content_types)], + 'label' => _m('Text format:'), 'multiple' => false, 'expanded' => false, + 'data' => $available_content_types[array_key_first($available_content_types)], 'choices' => $available_content_types, ], ]; } $form_params[] = ['post_note', SubmitType::class, ['label' => _m('Post')]]; - $form = Form::create($form_params); + $form = Form::create($form_params); $form->handleRequest($request); if ($form->isSubmitted()) { try { if ($form->isValid()) { - $data = $form->getData(); + $data = $form->getData(); $content_type = $data['content_type'] ?? $available_content_types[array_key_first($available_content_types)]; - self::storeLocalNote($user->getActor(), $data['content'], $content_type, $data['attachments']); + self::storeLocalNote($user->getActor(), $data['content'], $content_type, $data['language'], $data['attachments']); throw new RedirectException(); } } catch (FormSizeFileException $sizeFileException) { @@ -140,35 +140,35 @@ class Posting extends Component * $actor_id, possibly as a reply to note $reply_to and with flag * $is_local. Sanitizes $content and $attachments * - * @param Actor $actor - * @param string $content - * @param string $content_type - * @param array $attachments Array of UploadedFile to be stored as GSFiles associated to this note + * @param array $attachments Array of UploadedFile to be stored as GSFiles associated to this note * @param array $processed_attachments Array of [Attachment, Attachment's name] to be associated to this $actor and Note - * @return \App\Core\Entity|mixed + * + * @throws \App\Util\Exception\DuplicateFoundException * @throws ClientException * @throws ServerException - * @throws \App\Util\Exception\DuplicateFoundException + * + * @return \App\Core\Entity|mixed */ - public static function storeLocalNote(Actor $actor, string $content, string $content_type, array $attachments = [], $processed_attachments = []) + public static function storeLocalNote(Actor $actor, string $content, string $content_type, string $language, array $attachments = [], $processed_attachments = []) { $rendered = null; - Event::handle('RenderNoteContent', [$content, $content_type, &$rendered, $actor]); + Event::handle('RenderNoteContent', [$content, $content_type, &$rendered, $actor, $language]); $note = Note::create([ - 'actor_id' => $actor->getId(), - 'content' => $content, + 'actor_id' => $actor->getId(), + 'content' => $content, 'content_type' => $content_type, - 'rendered' => $rendered, - 'is_local' => true, + 'rendered' => $rendered, + 'language_id' => Language::getFromLocale($language)->getId(), + 'is_local' => true, ]); /** @var UploadedFile[] $attachments */ foreach ($attachments as $f) { - $filesize = $f->getSize(); + $filesize = $f->getSize(); $max_file_size = Common::getUploadLimit(); if ($max_file_size < $filesize) { throw new ClientException(_m('No file may be larger than {quota} bytes and the file you sent was {size} bytes. ' - . 'Try to upload a smaller version.', ['quota' => $max_file_size, 'size' => $filesize],)); + . 'Try to upload a smaller version.', ['quota' => $max_file_size, 'size' => $filesize], )); } Event::handle('EnforceUserFileQuota', [$filesize, $actor->getId()]); $processed_attachments[] = [GSFile::storeFileAsAttachment($f), $f->getClientOriginalName()]; @@ -193,12 +193,12 @@ class Posting extends Component return $note; } - public function onRenderNoteContent(string $content, string $content_type, ?string &$rendered, Actor $author, ?Note $reply_to = null) + public function onRenderNoteContent(string $content, string $content_type, ?string &$rendered, Actor $author, string $language, ?Note $reply_to = null) { switch ($content_type) { case 'text/plain': - $rendered = Formatting::renderPlainText($content); - $rendered = Formatting::linkifyMentions($rendered, $author, $reply_to); + $rendered = Formatting::renderPlainText($content, $language); + $rendered = Formatting::linkifyMentions($rendered, $author, $language, $reply_to); return Event::stop; case 'text/html': // TODO: It has to linkify and stuff as well diff --git a/components/Tag/Controller/Tag.php b/components/Tag/Controller/Tag.php index 9e18403881..c4a6849111 100644 --- a/components/Tag/Controller/Tag.php +++ b/components/Tag/Controller/Tag.php @@ -13,14 +13,19 @@ class Tag extends Controller { public function tag(string $tag) { - $user = Common::user(); - $page = $this->int('page') ?: 1; - $canonical = CompTag::canonicalTag($tag); + $actor = Common::actor(); + $page = $this->int('page') ?: 1; + $lang = $this->string('lang'); + if (\is_null($lang)) { + $langs = $actor->getPreferredLanguageChoices(); + $lang = $langs[array_key_first($langs)]; + } + $canonical = CompTag::canonicalTag($tag, $lang); $notes = Cache::pagedStream( key: "tag-{$canonical}", query: 'select n from note n join note_tag nt with n.id = nt.note_id where nt.canonical = :canon order by nt.created DESC, nt.note_id DESC', query_args: ['canon' => $canonical], - actor: $user, + actor: $actor, page: $page, ); diff --git a/components/Tag/Tag.php b/components/Tag/Tag.php index 72beba2258..9769b47812 100644 --- a/components/Tag/Tag.php +++ b/components/Tag/Tag.php @@ -28,6 +28,7 @@ use App\Core\DB\DB; use App\Core\Event; use App\Core\Modules\Component; use App\Core\Router\Router; +use App\Entity\Language; use App\Entity\Note; use App\Entity\NoteTag; use App\Util\Formatting; @@ -64,8 +65,8 @@ class Tag extends Component $processed_tags = false; preg_match_all(self::TAG_REGEX, $content, $matched_tags, \PREG_SET_ORDER); foreach ($matched_tags as $match) { - $tag = $match[2]; - $canonical_tag = self::canonicalTag($tag); + $tag = self::ensureLength($match[2]); + $canonical_tag = self::canonicalTag($tag, Language::getFromId($note->getLanguageId())->getLocale()); DB::persist(NoteTag::create(['tag' => $tag, 'canonical' => $canonical_tag, 'note_id' => $note->getId()])); Cache::pushList("tag-{$canonical_tag}", $note); $processed_tags = true; @@ -75,21 +76,32 @@ class Tag extends Component } } - public function onRenderContent(string &$text) + public function onRenderContent(string &$text, string $language) { - $text = preg_replace_callback(self::TAG_REGEX, fn ($m) => $m[1] . $this->tagLink($m[2]), $text); + $text = preg_replace_callback(self::TAG_REGEX, fn ($m) => $m[1] . self::tagLink($m[2], $language), $text); } - private function tagLink(string $tag): string + private static function tagLink(string $tag, string $language): string { - $canonical = self::canonicalTag($tag); - $url = Router::url('tag', ['tag' => $canonical]); + $tag = self::ensureLength($tag); + $canonical = self::canonicalTag($tag, $language); + $url = Router::url('tag', ['tag' => $canonical, 'lang' => $language]); return HTML::html(['a' => ['attrs' => ['href' => $url, 'title' => $tag, 'rel' => 'tag'], $tag]], options: ['indent' => false]); } - public static function canonicalTag(string $tag): string + public static function ensureLength(string $tag): string { - return mb_substr(Formatting::slugify($tag), 0, self::MAX_TAG_LENGTH); + return mb_substr($tag, 0, self::MAX_TAG_LENGTH); + } + + public static function canonicalTag(string $tag, string $language): string + { + $result = ''; + if (Event::handle('StemWord', [$language, $tag, &$result]) !== Event::stop) { + $result = Formatting::slugify($tag); + } + $result = str_replace('#', '', $result); + return self::ensureLength($result); } /** diff --git a/src/Entity/Actor.php b/src/Entity/Actor.php index fe2b704721..45c96c0f83 100644 --- a/src/Entity/Actor.php +++ b/src/Entity/Actor.php @@ -57,7 +57,7 @@ class Actor extends Entity private int $id; private string $nickname; private ?string $fullname = null; - private int $roles = 4; + private int $roles = 4; private ?string $homepage; private ?string $bio; private ?string $location; @@ -389,8 +389,7 @@ class Actor extends Entity fn (Language $l) => $l->getLocale(), ), ) ?: [ - Common::config('site', 'language') => (Cache::getHashMapKey('languages', Common::config('site', 'language')) - ?: DB::findOneBy('language', ['locale' => Common::config('site', 'language')])), + Common::config('site', 'language') => Language::getFromLocale(Common::config('site', 'language')), ]; return array_merge(...F\map(array_values($langs), fn ($l) => $l->toChoiceFormat())); } diff --git a/src/Entity/Language.php b/src/Entity/Language.php index 04e4086147..1129e79fa5 100644 --- a/src/Entity/Language.php +++ b/src/Entity/Language.php @@ -108,6 +108,24 @@ class Language extends Entity // @codeCoverageIgnoreEnd // }}} Autocode + public static function getFromId(int $id): self + { + return Cache::getHashMapKey( + 'languages-id', + (string) $id, + calculate_map: fn () => F\reindex(DB::dql('select l from language l'), fn (self $l) => (string) $l->getId()), + ); + } + + public static function getFromLocale(string $locale): self + { + return Cache::getHashMapKey( + 'languages', + $locale, + calculate_map: fn () => F\reindex(DB::dql('select l from language l'), fn (self $l) => $l->getLocale()), + ); + } + public static function getLanguageChoices(): array { $langs = Cache::getHashMap( @@ -136,7 +154,7 @@ class Language extends Entity $key = array_key_first($preferred_language_choices); $locale = $preferred_language_choices[$key]; unset($preferred_language_choices[$key], $language_choices[$key]); - $short_display = Cache::getHashMapKey('languages', $locale)->getShortDisplay(); + $short_display = self::getFromLocale($locale)->getShortDisplay(); $preferred_language_choices[$short_display] = trim($locale); $language_choices[$short_display] = trim($locale); } diff --git a/src/Entity/Note.php b/src/Entity/Note.php index 711e158085..cf611fff43 100644 --- a/src/Entity/Note.php +++ b/src/Entity/Note.php @@ -53,7 +53,7 @@ class Note extends Entity private ?string $source; private int $scope = VisibilityScope::PUBLIC; private string $url; - private string $language; + private int $language_id; private DateTimeInterface $created; private DateTimeInterface $modified; @@ -159,14 +159,14 @@ class Note extends Entity return $this; } - public function getLanguage(): string + public function getLanguageId(): int { - return $this->language; + return $this->language_id; } - public function setLanguage(string $language): self + public function setLanguageId(int $language_id): self { - $this->language = $language; + $this->language_id = $language_id; return $this; } @@ -284,18 +284,18 @@ class Note extends Entity $scope = VisibilityScope::create($this->scope); return $scope->public || (!\is_null($a) && ( - ($scope->subscriber && 0 != DB::count('subscription', ['subscriber' => $a->getId(), 'subscribed' => $this->actor_id])) + ($scope->subscriber && 0 != DB::count('subscription', ['subscriber' => $a->getId(), 'subscribed' => $this->actor_id])) || ($scope->addressee && 0 != DB::count('notification', ['activity_id' => $this->id, 'actor_id' => $a->getId()])) || ($scope->group && [] != DB::dql( - <<<'EOF' - select m from group_member m - join group_inbox i with m.group_id = i.group_id - join note n with i.activity_id = n.id - where n.id = :note_id and m.actor_id = :actor_id - EOF, - ['note_id' => $this->id, 'actor_id' => $a->getId()], - )) - )); + <<<'EOF' + select m from group_member m + join group_inbox i with m.group_id = i.group_id + join note n with i.activity_id = n.id + where n.id = :note_id and m.actor_id = :actor_id + EOF, + ['note_id' => $this->id, 'actor_id' => $a->getId()], + )) + )); } public static function schemaDef(): array @@ -310,11 +310,11 @@ class Note extends Entity 'rendered' => ['type' => 'text', 'description' => 'rendered note content, so we can keep the microtags (if not local)'], 'is_local' => ['type' => 'bool', 'not null' => true, 'description' => 'was this note generated by a local actor'], 'source' => ['type' => 'varchar', 'foreign key' => true, 'length' => 32, 'target' => 'NoteSource.code', 'multiplicity' => 'many to one', 'description' => 'fkey to source of note, like "web", "im", or "clientname"'], - 'scope' => ['type' => 'int', 'not null' => true, 'default' => VisibilityScope::PUBLIC, 'description' => 'bit map for distribution scope; 0 = everywhere; 1 = this server only; 2 = addressees; 4 = groups; 8 = subscribers; 16 = messages; null = default'], - 'url' => ['type' => 'text', 'description' => 'Permalink to Note'], - 'language' => ['type' => 'int', 'foreign key' => true, 'target' => 'Language.id', 'multiplicity' => 'one to many', 'description' => 'The language for this note'], - 'created' => ['type' => 'datetime', 'not null' => true, 'default' => 'CURRENT_TIMESTAMP', 'description' => 'date this record was created'], - 'modified' => ['type' => 'timestamp', 'not null' => true, 'default' => 'CURRENT_TIMESTAMP', 'description' => 'date this record was modified'], + 'scope' => ['type' => 'int', 'not null' => true, 'default' => VisibilityScope::PUBLIC, 'description' => 'bit map for distribution scope; 0 = everywhere; 1 = this server only; 2 = addressees; 4 = groups; 8 = subscribers; 16 = messages; null = default'], + 'url' => ['type' => 'text', 'description' => 'Permalink to Note'], + 'language_id' => ['type' => 'int', 'foreign key' => true, 'target' => 'Language.id', 'multiplicity' => 'one to many', 'description' => 'The language for this note'], + 'created' => ['type' => 'datetime', 'not null' => true, 'default' => 'CURRENT_TIMESTAMP', 'description' => 'date this record was created'], + 'modified' => ['type' => 'timestamp', 'not null' => true, 'default' => 'CURRENT_TIMESTAMP', 'description' => 'date this record was modified'], ], 'primary key' => ['id'], 'indexes' => [ diff --git a/src/Util/Formatting.php b/src/Util/Formatting.php index da2e6a0937..bb0b109801 100644 --- a/src/Util/Formatting.php +++ b/src/Util/Formatting.php @@ -230,14 +230,14 @@ abstract class Formatting /** * Render a plain text note content into HTML, extracting links and tags */ - public static function renderPlainText(string $text): string + public static function renderPlainText(string $text, ?string $language = null): string { $text = self::quoteAndRemoveControlCodes($text); // Split \n\n into paragraphs, process each paragrah and merge - return implode("\n", F\map(explode("\n\n", $text), function (string $paragraph) { + return implode("\n", F\map(explode("\n\n", $text), function (string $paragraph) use ($language) { $paragraph = nl2br($paragraph, use_xhtml: false); - Event::handle('RenderContent', [&$paragraph]); + Event::handle('RenderContent', [&$paragraph, $language]); return HTML::html(['p' => [$paragraph]], options: ['raw' => true, 'indent' => false]); })); @@ -458,7 +458,7 @@ abstract class Formatting * * @return string partially-rendered HTML */ - public static function linkifyMentions(string $text, Actor $author, ?Note $parent = null): string + public static function linkifyMentions(string $text, Actor $author, string $language, ?Note $parent = null): string { $mentions = self::findMentions($text, $author, $parent);