[TAGS][ENTITY][Note] Properly store the note language, pass it along when rendering content. Add mechanism for stemming tags, with fallback to simply slug-ifying them

This commit is contained in:
Hugo Sales 2021-11-24 15:51:01 +00:00
parent f837df5753
commit 2d057024b9
Signed by: someonewithpc
GPG Key ID: 7D0C7EAFC9D835A0
7 changed files with 107 additions and 73 deletions

View File

@ -1,6 +1,6 @@
<?php <?php
declare(strict_types=1); declare(strict_types = 1);
// {{{ License // {{{ License
@ -28,12 +28,14 @@ use App\Core\DB\DB;
use App\Core\Event; use App\Core\Event;
use App\Core\Form; use App\Core\Form;
use App\Core\GSFile; use App\Core\GSFile;
use function App\Core\I18n\_m;
use App\Core\Modules\Component; use App\Core\Modules\Component;
use App\Core\Security; use App\Core\Security;
use App\Entity\Actor; use App\Entity\Actor;
use App\Entity\ActorToAttachment; use App\Entity\ActorToAttachment;
use App\Entity\Attachment; use App\Entity\Attachment;
use App\Entity\AttachmentToNote; use App\Entity\AttachmentToNote;
use App\Entity\Language;
use App\Entity\Note; use App\Entity\Note;
use App\Util\Common; use App\Util\Common;
use App\Util\Exception\ClientException; use App\Util\Exception\ClientException;
@ -50,8 +52,6 @@ use Symfony\Component\HttpFoundation\File\Exception\FormSizeFileException;
use Symfony\Component\HttpFoundation\File\UploadedFile; use Symfony\Component\HttpFoundation\File\UploadedFile;
use Symfony\Component\HttpFoundation\Request; use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\Validator\Constraints\Length; use Symfony\Component\Validator\Constraints\Length;
use function App\Core\I18n\_m;
use function count;
class Posting extends Component class Posting extends Component
{ {
@ -69,15 +69,15 @@ class Posting extends Component
return Event::next; return Event::next;
} }
$actor = $user->getActor(); $actor = $user->getActor();
$actor_id = $user->getId(); $actor_id = $user->getId();
$to_tags = []; $to_tags = [];
$tags = Cache::get( $tags = Cache::get(
"actor-circle-{$actor_id}", "actor-circle-{$actor_id}",
fn() => DB::dql('select c.tag from App\Entity\ActorCircle c where c.tagger = :tagger', ['tagger' => $actor_id]), fn () => DB::dql('select c.tag from App\Entity\ActorCircle c where c.tagger = :tagger', ['tagger' => $actor_id]),
); );
foreach ($tags as $t) { foreach ($tags as $t) {
$t = $t['tag']; $t = $t['tag'];
$to_tags[$t] = $t; $to_tags[$t] = $t;
} }
@ -94,7 +94,7 @@ class Posting extends Component
Event::handle('PostingAvailableContentTypes', [&$available_content_types]); Event::handle('PostingAvailableContentTypes', [&$available_content_types]);
$context_actor = null; // This is where we'd plug in the group in which the actor is posting, or whom they're replying to $context_actor = null; // This is where we'd plug in the group in which the actor is posting, or whom they're replying to
$form_params = [ $form_params = [
['to', ChoiceType::class, ['label' => _m('To:'), 'multiple' => false, 'expanded' => false, 'choices' => $to_tags]], ['to', ChoiceType::class, ['label' => _m('To:'), 'multiple' => false, 'expanded' => false, 'choices' => $to_tags]],
['visibility', ChoiceType::class, ['label' => _m('Visibility:'), 'multiple' => false, 'expanded' => false, 'data' => 'public', 'choices' => [_m('Public') => 'public', _m('Instance') => 'instance', _m('Private') => 'private']]], ['visibility', ChoiceType::class, ['label' => _m('Visibility:'), 'multiple' => false, 'expanded' => false, 'data' => 'public', 'choices' => [_m('Public') => 'public', _m('Instance') => 'instance', _m('Private') => 'private']]],
['content', TextareaType::class, ['label' => _m('Content:'), 'data' => $initial_content, 'attr' => ['placeholder' => _m($placeholder)], 'constraints' => [new Length(['max' => Common::config('site', 'text_limit')])]]], ['content', TextareaType::class, ['label' => _m('Content:'), 'data' => $initial_content, 'attr' => ['placeholder' => _m($placeholder)], 'constraints' => [new Length(['max' => Common::config('site', 'text_limit')])]]],
@ -102,25 +102,25 @@ class Posting extends Component
FormFields::language($actor, $context_actor, label: 'Note language:', help: 'The language in which you wrote this note, so others can see it'), FormFields::language($actor, $context_actor, label: 'Note language:', help: 'The language in which you wrote this note, so others can see it'),
]; ];
if (count($available_content_types) > 1) { if (\count($available_content_types) > 1) {
$form_params[] = ['content_type', ChoiceType::class, $form_params[] = ['content_type', ChoiceType::class,
[ [
'label' => _m('Text format:'), 'multiple' => false, 'expanded' => false, 'label' => _m('Text format:'), 'multiple' => false, 'expanded' => false,
'data' => $available_content_types[array_key_first($available_content_types)], 'data' => $available_content_types[array_key_first($available_content_types)],
'choices' => $available_content_types, 'choices' => $available_content_types,
], ],
]; ];
} }
$form_params[] = ['post_note', SubmitType::class, ['label' => _m('Post')]]; $form_params[] = ['post_note', SubmitType::class, ['label' => _m('Post')]];
$form = Form::create($form_params); $form = Form::create($form_params);
$form->handleRequest($request); $form->handleRequest($request);
if ($form->isSubmitted()) { if ($form->isSubmitted()) {
try { try {
if ($form->isValid()) { if ($form->isValid()) {
$data = $form->getData(); $data = $form->getData();
$content_type = $data['content_type'] ?? $available_content_types[array_key_first($available_content_types)]; $content_type = $data['content_type'] ?? $available_content_types[array_key_first($available_content_types)];
self::storeLocalNote($user->getActor(), $data['content'], $content_type, $data['attachments']); self::storeLocalNote($user->getActor(), $data['content'], $content_type, $data['language'], $data['attachments']);
throw new RedirectException(); throw new RedirectException();
} }
} catch (FormSizeFileException $sizeFileException) { } catch (FormSizeFileException $sizeFileException) {
@ -140,35 +140,35 @@ class Posting extends Component
* $actor_id, possibly as a reply to note $reply_to and with flag * $actor_id, possibly as a reply to note $reply_to and with flag
* $is_local. Sanitizes $content and $attachments * $is_local. Sanitizes $content and $attachments
* *
* @param Actor $actor * @param array $attachments Array of UploadedFile to be stored as GSFiles associated to this note
* @param string $content
* @param string $content_type
* @param array $attachments Array of UploadedFile to be stored as GSFiles associated to this note
* @param array $processed_attachments Array of [Attachment, Attachment's name] to be associated to this $actor and Note * @param array $processed_attachments Array of [Attachment, Attachment's name] to be associated to this $actor and Note
* @return \App\Core\Entity|mixed *
* @throws \App\Util\Exception\DuplicateFoundException
* @throws ClientException * @throws ClientException
* @throws ServerException * @throws ServerException
* @throws \App\Util\Exception\DuplicateFoundException *
* @return \App\Core\Entity|mixed
*/ */
public static function storeLocalNote(Actor $actor, string $content, string $content_type, array $attachments = [], $processed_attachments = []) public static function storeLocalNote(Actor $actor, string $content, string $content_type, string $language, array $attachments = [], $processed_attachments = [])
{ {
$rendered = null; $rendered = null;
Event::handle('RenderNoteContent', [$content, $content_type, &$rendered, $actor]); Event::handle('RenderNoteContent', [$content, $content_type, &$rendered, $actor, $language]);
$note = Note::create([ $note = Note::create([
'actor_id' => $actor->getId(), 'actor_id' => $actor->getId(),
'content' => $content, 'content' => $content,
'content_type' => $content_type, 'content_type' => $content_type,
'rendered' => $rendered, 'rendered' => $rendered,
'is_local' => true, 'language_id' => Language::getFromLocale($language)->getId(),
'is_local' => true,
]); ]);
/** @var UploadedFile[] $attachments */ /** @var UploadedFile[] $attachments */
foreach ($attachments as $f) { foreach ($attachments as $f) {
$filesize = $f->getSize(); $filesize = $f->getSize();
$max_file_size = Common::getUploadLimit(); $max_file_size = Common::getUploadLimit();
if ($max_file_size < $filesize) { if ($max_file_size < $filesize) {
throw new ClientException(_m('No file may be larger than {quota} bytes and the file you sent was {size} bytes. ' throw new ClientException(_m('No file may be larger than {quota} bytes and the file you sent was {size} bytes. '
. 'Try to upload a smaller version.', ['quota' => $max_file_size, 'size' => $filesize],)); . 'Try to upload a smaller version.', ['quota' => $max_file_size, 'size' => $filesize], ));
} }
Event::handle('EnforceUserFileQuota', [$filesize, $actor->getId()]); Event::handle('EnforceUserFileQuota', [$filesize, $actor->getId()]);
$processed_attachments[] = [GSFile::storeFileAsAttachment($f), $f->getClientOriginalName()]; $processed_attachments[] = [GSFile::storeFileAsAttachment($f), $f->getClientOriginalName()];
@ -193,12 +193,12 @@ class Posting extends Component
return $note; return $note;
} }
public function onRenderNoteContent(string $content, string $content_type, ?string &$rendered, Actor $author, ?Note $reply_to = null) public function onRenderNoteContent(string $content, string $content_type, ?string &$rendered, Actor $author, string $language, ?Note $reply_to = null)
{ {
switch ($content_type) { switch ($content_type) {
case 'text/plain': case 'text/plain':
$rendered = Formatting::renderPlainText($content); $rendered = Formatting::renderPlainText($content, $language);
$rendered = Formatting::linkifyMentions($rendered, $author, $reply_to); $rendered = Formatting::linkifyMentions($rendered, $author, $language, $reply_to);
return Event::stop; return Event::stop;
case 'text/html': case 'text/html':
// TODO: It has to linkify and stuff as well // TODO: It has to linkify and stuff as well

View File

@ -13,14 +13,19 @@ class Tag extends Controller
{ {
public function tag(string $tag) public function tag(string $tag)
{ {
$user = Common::user(); $actor = Common::actor();
$page = $this->int('page') ?: 1; $page = $this->int('page') ?: 1;
$canonical = CompTag::canonicalTag($tag); $lang = $this->string('lang');
if (\is_null($lang)) {
$langs = $actor->getPreferredLanguageChoices();
$lang = $langs[array_key_first($langs)];
}
$canonical = CompTag::canonicalTag($tag, $lang);
$notes = Cache::pagedStream( $notes = Cache::pagedStream(
key: "tag-{$canonical}", key: "tag-{$canonical}",
query: 'select n from note n join note_tag nt with n.id = nt.note_id where nt.canonical = :canon order by nt.created DESC, nt.note_id DESC', query: 'select n from note n join note_tag nt with n.id = nt.note_id where nt.canonical = :canon order by nt.created DESC, nt.note_id DESC',
query_args: ['canon' => $canonical], query_args: ['canon' => $canonical],
actor: $user, actor: $actor,
page: $page, page: $page,
); );

View File

@ -28,6 +28,7 @@ use App\Core\DB\DB;
use App\Core\Event; use App\Core\Event;
use App\Core\Modules\Component; use App\Core\Modules\Component;
use App\Core\Router\Router; use App\Core\Router\Router;
use App\Entity\Language;
use App\Entity\Note; use App\Entity\Note;
use App\Entity\NoteTag; use App\Entity\NoteTag;
use App\Util\Formatting; use App\Util\Formatting;
@ -64,8 +65,8 @@ class Tag extends Component
$processed_tags = false; $processed_tags = false;
preg_match_all(self::TAG_REGEX, $content, $matched_tags, \PREG_SET_ORDER); preg_match_all(self::TAG_REGEX, $content, $matched_tags, \PREG_SET_ORDER);
foreach ($matched_tags as $match) { foreach ($matched_tags as $match) {
$tag = $match[2]; $tag = self::ensureLength($match[2]);
$canonical_tag = self::canonicalTag($tag); $canonical_tag = self::canonicalTag($tag, Language::getFromId($note->getLanguageId())->getLocale());
DB::persist(NoteTag::create(['tag' => $tag, 'canonical' => $canonical_tag, 'note_id' => $note->getId()])); DB::persist(NoteTag::create(['tag' => $tag, 'canonical' => $canonical_tag, 'note_id' => $note->getId()]));
Cache::pushList("tag-{$canonical_tag}", $note); Cache::pushList("tag-{$canonical_tag}", $note);
$processed_tags = true; $processed_tags = true;
@ -75,21 +76,32 @@ class Tag extends Component
} }
} }
public function onRenderContent(string &$text) public function onRenderContent(string &$text, string $language)
{ {
$text = preg_replace_callback(self::TAG_REGEX, fn ($m) => $m[1] . $this->tagLink($m[2]), $text); $text = preg_replace_callback(self::TAG_REGEX, fn ($m) => $m[1] . self::tagLink($m[2], $language), $text);
} }
private function tagLink(string $tag): string private static function tagLink(string $tag, string $language): string
{ {
$canonical = self::canonicalTag($tag); $tag = self::ensureLength($tag);
$url = Router::url('tag', ['tag' => $canonical]); $canonical = self::canonicalTag($tag, $language);
$url = Router::url('tag', ['tag' => $canonical, 'lang' => $language]);
return HTML::html(['a' => ['attrs' => ['href' => $url, 'title' => $tag, 'rel' => 'tag'], $tag]], options: ['indent' => false]); return HTML::html(['a' => ['attrs' => ['href' => $url, 'title' => $tag, 'rel' => 'tag'], $tag]], options: ['indent' => false]);
} }
public static function canonicalTag(string $tag): string public static function ensureLength(string $tag): string
{ {
return mb_substr(Formatting::slugify($tag), 0, self::MAX_TAG_LENGTH); return mb_substr($tag, 0, self::MAX_TAG_LENGTH);
}
public static function canonicalTag(string $tag, string $language): string
{
$result = '';
if (Event::handle('StemWord', [$language, $tag, &$result]) !== Event::stop) {
$result = Formatting::slugify($tag);
}
$result = str_replace('#', '', $result);
return self::ensureLength($result);
} }
/** /**

View File

@ -57,7 +57,7 @@ class Actor extends Entity
private int $id; private int $id;
private string $nickname; private string $nickname;
private ?string $fullname = null; private ?string $fullname = null;
private int $roles = 4; private int $roles = 4;
private ?string $homepage; private ?string $homepage;
private ?string $bio; private ?string $bio;
private ?string $location; private ?string $location;
@ -389,8 +389,7 @@ class Actor extends Entity
fn (Language $l) => $l->getLocale(), fn (Language $l) => $l->getLocale(),
), ),
) ?: [ ) ?: [
Common::config('site', 'language') => (Cache::getHashMapKey('languages', Common::config('site', 'language')) Common::config('site', 'language') => Language::getFromLocale(Common::config('site', 'language')),
?: DB::findOneBy('language', ['locale' => Common::config('site', 'language')])),
]; ];
return array_merge(...F\map(array_values($langs), fn ($l) => $l->toChoiceFormat())); return array_merge(...F\map(array_values($langs), fn ($l) => $l->toChoiceFormat()));
} }

View File

@ -108,6 +108,24 @@ class Language extends Entity
// @codeCoverageIgnoreEnd // @codeCoverageIgnoreEnd
// }}} Autocode // }}} Autocode
public static function getFromId(int $id): self
{
return Cache::getHashMapKey(
'languages-id',
(string) $id,
calculate_map: fn () => F\reindex(DB::dql('select l from language l'), fn (self $l) => (string) $l->getId()),
);
}
public static function getFromLocale(string $locale): self
{
return Cache::getHashMapKey(
'languages',
$locale,
calculate_map: fn () => F\reindex(DB::dql('select l from language l'), fn (self $l) => $l->getLocale()),
);
}
public static function getLanguageChoices(): array public static function getLanguageChoices(): array
{ {
$langs = Cache::getHashMap( $langs = Cache::getHashMap(
@ -136,7 +154,7 @@ class Language extends Entity
$key = array_key_first($preferred_language_choices); $key = array_key_first($preferred_language_choices);
$locale = $preferred_language_choices[$key]; $locale = $preferred_language_choices[$key];
unset($preferred_language_choices[$key], $language_choices[$key]); unset($preferred_language_choices[$key], $language_choices[$key]);
$short_display = Cache::getHashMapKey('languages', $locale)->getShortDisplay(); $short_display = self::getFromLocale($locale)->getShortDisplay();
$preferred_language_choices[$short_display] = trim($locale); $preferred_language_choices[$short_display] = trim($locale);
$language_choices[$short_display] = trim($locale); $language_choices[$short_display] = trim($locale);
} }

View File

@ -53,7 +53,7 @@ class Note extends Entity
private ?string $source; private ?string $source;
private int $scope = VisibilityScope::PUBLIC; private int $scope = VisibilityScope::PUBLIC;
private string $url; private string $url;
private string $language; private int $language_id;
private DateTimeInterface $created; private DateTimeInterface $created;
private DateTimeInterface $modified; private DateTimeInterface $modified;
@ -159,14 +159,14 @@ class Note extends Entity
return $this; return $this;
} }
public function getLanguage(): string public function getLanguageId(): int
{ {
return $this->language; return $this->language_id;
} }
public function setLanguage(string $language): self public function setLanguageId(int $language_id): self
{ {
$this->language = $language; $this->language_id = $language_id;
return $this; return $this;
} }
@ -284,18 +284,18 @@ class Note extends Entity
$scope = VisibilityScope::create($this->scope); $scope = VisibilityScope::create($this->scope);
return $scope->public return $scope->public
|| (!\is_null($a) && ( || (!\is_null($a) && (
($scope->subscriber && 0 != DB::count('subscription', ['subscriber' => $a->getId(), 'subscribed' => $this->actor_id])) ($scope->subscriber && 0 != DB::count('subscription', ['subscriber' => $a->getId(), 'subscribed' => $this->actor_id]))
|| ($scope->addressee && 0 != DB::count('notification', ['activity_id' => $this->id, 'actor_id' => $a->getId()])) || ($scope->addressee && 0 != DB::count('notification', ['activity_id' => $this->id, 'actor_id' => $a->getId()]))
|| ($scope->group && [] != DB::dql( || ($scope->group && [] != DB::dql(
<<<'EOF' <<<'EOF'
select m from group_member m select m from group_member m
join group_inbox i with m.group_id = i.group_id join group_inbox i with m.group_id = i.group_id
join note n with i.activity_id = n.id join note n with i.activity_id = n.id
where n.id = :note_id and m.actor_id = :actor_id where n.id = :note_id and m.actor_id = :actor_id
EOF, EOF,
['note_id' => $this->id, 'actor_id' => $a->getId()], ['note_id' => $this->id, 'actor_id' => $a->getId()],
)) ))
)); ));
} }
public static function schemaDef(): array public static function schemaDef(): array
@ -310,11 +310,11 @@ class Note extends Entity
'rendered' => ['type' => 'text', 'description' => 'rendered note content, so we can keep the microtags (if not local)'], 'rendered' => ['type' => 'text', 'description' => 'rendered note content, so we can keep the microtags (if not local)'],
'is_local' => ['type' => 'bool', 'not null' => true, 'description' => 'was this note generated by a local actor'], 'is_local' => ['type' => 'bool', 'not null' => true, 'description' => 'was this note generated by a local actor'],
'source' => ['type' => 'varchar', 'foreign key' => true, 'length' => 32, 'target' => 'NoteSource.code', 'multiplicity' => 'many to one', 'description' => 'fkey to source of note, like "web", "im", or "clientname"'], 'source' => ['type' => 'varchar', 'foreign key' => true, 'length' => 32, 'target' => 'NoteSource.code', 'multiplicity' => 'many to one', 'description' => 'fkey to source of note, like "web", "im", or "clientname"'],
'scope' => ['type' => 'int', 'not null' => true, 'default' => VisibilityScope::PUBLIC, 'description' => 'bit map for distribution scope; 0 = everywhere; 1 = this server only; 2 = addressees; 4 = groups; 8 = subscribers; 16 = messages; null = default'], 'scope' => ['type' => 'int', 'not null' => true, 'default' => VisibilityScope::PUBLIC, 'description' => 'bit map for distribution scope; 0 = everywhere; 1 = this server only; 2 = addressees; 4 = groups; 8 = subscribers; 16 = messages; null = default'],
'url' => ['type' => 'text', 'description' => 'Permalink to Note'], 'url' => ['type' => 'text', 'description' => 'Permalink to Note'],
'language' => ['type' => 'int', 'foreign key' => true, 'target' => 'Language.id', 'multiplicity' => 'one to many', 'description' => 'The language for this note'], 'language_id' => ['type' => 'int', 'foreign key' => true, 'target' => 'Language.id', 'multiplicity' => 'one to many', 'description' => 'The language for this note'],
'created' => ['type' => 'datetime', 'not null' => true, 'default' => 'CURRENT_TIMESTAMP', 'description' => 'date this record was created'], 'created' => ['type' => 'datetime', 'not null' => true, 'default' => 'CURRENT_TIMESTAMP', 'description' => 'date this record was created'],
'modified' => ['type' => 'timestamp', 'not null' => true, 'default' => 'CURRENT_TIMESTAMP', 'description' => 'date this record was modified'], 'modified' => ['type' => 'timestamp', 'not null' => true, 'default' => 'CURRENT_TIMESTAMP', 'description' => 'date this record was modified'],
], ],
'primary key' => ['id'], 'primary key' => ['id'],
'indexes' => [ 'indexes' => [

View File

@ -230,14 +230,14 @@ abstract class Formatting
/** /**
* Render a plain text note content into HTML, extracting links and tags * Render a plain text note content into HTML, extracting links and tags
*/ */
public static function renderPlainText(string $text): string public static function renderPlainText(string $text, ?string $language = null): string
{ {
$text = self::quoteAndRemoveControlCodes($text); $text = self::quoteAndRemoveControlCodes($text);
// Split \n\n into paragraphs, process each paragrah and merge // Split \n\n into paragraphs, process each paragrah and merge
return implode("\n", F\map(explode("\n\n", $text), function (string $paragraph) { return implode("\n", F\map(explode("\n\n", $text), function (string $paragraph) use ($language) {
$paragraph = nl2br($paragraph, use_xhtml: false); $paragraph = nl2br($paragraph, use_xhtml: false);
Event::handle('RenderContent', [&$paragraph]); Event::handle('RenderContent', [&$paragraph, $language]);
return HTML::html(['p' => [$paragraph]], options: ['raw' => true, 'indent' => false]); return HTML::html(['p' => [$paragraph]], options: ['raw' => true, 'indent' => false]);
})); }));
@ -458,7 +458,7 @@ abstract class Formatting
* *
* @return string partially-rendered HTML * @return string partially-rendered HTML
*/ */
public static function linkifyMentions(string $text, Actor $author, ?Note $parent = null): string public static function linkifyMentions(string $text, Actor $author, string $language, ?Note $parent = null): string
{ {
$mentions = self::findMentions($text, $author, $parent); $mentions = self::findMentions($text, $author, $parent);