[COMPONENTS][Tag] Split tag into words and stem each
This commit is contained in:
parent
4571b18c60
commit
98b719dca3
@ -97,10 +97,13 @@ class Tag extends Component
|
|||||||
public static function canonicalTag(string $tag, string $language): string
|
public static function canonicalTag(string $tag, string $language): string
|
||||||
{
|
{
|
||||||
$result = '';
|
$result = '';
|
||||||
if (Event::handle('StemWord', [$language, $tag, &$result]) !== Event::stop) {
|
foreach (Formatting::splitWords(str_replace('#', '', $tag)) as $word) {
|
||||||
$result = Formatting::slugify($tag);
|
$temp_res = null;
|
||||||
|
if (Event::handle('StemWord', [$language, $word, &$temp_res]) !== Event::stop) {
|
||||||
|
$temp_res = $word;
|
||||||
|
}
|
||||||
|
$result .= Formatting::slugify($temp_res);
|
||||||
}
|
}
|
||||||
$result = str_replace('#', '', $result);
|
|
||||||
return self::ensureLength($result);
|
return self::ensureLength($result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ use Wamania\Snowball\StemmerFactory;
|
|||||||
|
|
||||||
class StemWord extends Plugin
|
class StemWord extends Plugin
|
||||||
{
|
{
|
||||||
public function onStemWord(string $language, string $word, string &$out)
|
public function onStemWord(string $language, string $word, ?string &$out)
|
||||||
{
|
{
|
||||||
$out = StemmerFactory::create($language)->stem($word);
|
$out = StemmerFactory::create($language)->stem($word);
|
||||||
return Event::stop;
|
return Event::stop;
|
||||||
|
@ -504,4 +504,12 @@ abstract class Formatting
|
|||||||
|
|
||||||
return $output;
|
return $output;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Split words by `-`, `_` or lower to upper case transitions
|
||||||
|
*/
|
||||||
|
public static function splitWords(string $words): array
|
||||||
|
{
|
||||||
|
return preg_split('/-|_|(?<=\p{Ll})(?=\p{Lu})/u', $words);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user