PHP>=5.4.0 lets us use Transliterator, tags now asciified!

For example: #REVOLUCIÓN becomes #revolucion instead of #revolución
This commit is contained in:
Mikael Nordfeldth 2015-02-17 20:11:35 +01:00
parent 59763ceecb
commit c31c2d10b9

View File

@ -1116,6 +1116,20 @@ function common_xml_safe_str($str)
return preg_replace('/[\p{Cc}\p{Cs}]/u', '*', $str); return preg_replace('/[\p{Cc}\p{Cs}]/u', '*', $str);
} }
function common_slugify($str)
{
$str = transliterator_transliterate(
'Any-Latin;' . // any charset to latin compatible
'NFD;' . // decompose
'[:Nonspacing Mark:] Remove;' . // remove nonspacing marks (accents etc.)
'NFC;' . // composite again
'[:Punctuation:] Remove;' . // remove punctuation (.,¿? etc.)
'Lower();' . // turn into lowercase
'Latin-ASCII;', // get ASCII equivalents (ð to d for example)
$str);
return preg_replace('/[^\pL\pN]/', '', $str);
}
function common_tag_link($tag) function common_tag_link($tag)
{ {
$canonical = common_canonical_tag($tag); $canonical = common_canonical_tag($tag);
@ -1139,9 +1153,7 @@ function common_tag_link($tag)
function common_canonical_tag($tag) function common_canonical_tag($tag)
{ {
// only alphanum $tag = common_slugify($tag);
$tag = preg_replace('/[^\pL\pN]/u', '', $tag);
$tag = mb_convert_case($tag, MB_CASE_LOWER, "UTF-8");
$tag = substr($tag, 0, 64); $tag = substr($tag, 0, 64);
return $tag; return $tag;
} }