[COMPONENTS][Tag] Split tag into words and stem each

This commit is contained in:
Hugo Sales 2021-11-25 20:04:59 +00:00
parent 4571b18c60
commit 98b719dca3
Signed by untrusted user: someonewithpc
GPG Key ID: 7D0C7EAFC9D835A0
3 changed files with 17 additions and 6 deletions

View File

@ -97,10 +97,13 @@ class Tag extends Component
public static function canonicalTag(string $tag, string $language): string public static function canonicalTag(string $tag, string $language): string
{ {
$result = ''; $result = '';
if (Event::handle('StemWord', [$language, $tag, &$result]) !== Event::stop) { foreach (Formatting::splitWords(str_replace('#', '', $tag)) as $word) {
$result = Formatting::slugify($tag); $temp_res = null;
if (Event::handle('StemWord', [$language, $word, &$temp_res]) !== Event::stop) {
$temp_res = $word;
}
$result .= Formatting::slugify($temp_res);
} }
$result = str_replace('#', '', $result);
return self::ensureLength($result); return self::ensureLength($result);
} }

View File

@ -29,7 +29,7 @@ use Wamania\Snowball\StemmerFactory;
class StemWord extends Plugin class StemWord extends Plugin
{ {
public function onStemWord(string $language, string $word, string &$out) public function onStemWord(string $language, string $word, ?string &$out)
{ {
$out = StemmerFactory::create($language)->stem($word); $out = StemmerFactory::create($language)->stem($word);
return Event::stop; return Event::stop;

View File

@ -504,4 +504,12 @@ abstract class Formatting
return $output; return $output;
} }
/**
* Split words by `-`, `_` or lower to upper case transitions
*/
public static function splitWords(string $words): array
{
return preg_split('/-|_|(?<=\p{Ll})(?=\p{Lu})/u', $words);
}
} }