[COMPONENTS][Tag] Split tag into words and stem each

This commit is contained in:
Hugo Sales 2021-11-25 20:04:59 +00:00
parent 4571b18c60
commit 98b719dca3
Signed by: someonewithpc
GPG Key ID: 7D0C7EAFC9D835A0
3 changed files with 17 additions and 6 deletions

View File

@ -97,10 +97,13 @@ class Tag extends Component
public static function canonicalTag(string $tag, string $language): string
{
$result = '';
if (Event::handle('StemWord', [$language, $tag, &$result]) !== Event::stop) {
$result = Formatting::slugify($tag);
foreach (Formatting::splitWords(str_replace('#', '', $tag)) as $word) {
$temp_res = null;
if (Event::handle('StemWord', [$language, $word, &$temp_res]) !== Event::stop) {
$temp_res = $word;
}
$result .= Formatting::slugify($temp_res);
}
$result = str_replace('#', '', $result);
return self::ensureLength($result);
}

View File

@ -29,7 +29,7 @@ use Wamania\Snowball\StemmerFactory;
class StemWord extends Plugin
{
public function onStemWord(string $language, string $word, string &$out)
public function onStemWord(string $language, string $word, ?string &$out)
{
$out = StemmerFactory::create($language)->stem($word);
return Event::stop;

View File

@ -504,4 +504,12 @@ abstract class Formatting
return $output;
}
/**
* Split words by `-`, `_` or lower to upper case transitions
*/
public static function splitWords(string $words): array
{
return preg_split('/-|_|(?<=\p{Ll})(?=\p{Lu})/u', $words);
}
}