From c31c2d10b972b9efc750fcec28acdd90da81a4cd Mon Sep 17 00:00:00 2001 From: Mikael Nordfeldth Date: Tue, 17 Feb 2015 20:11:35 +0100 Subject: [PATCH] PHP>=5.4.0 lets us use Transliterator, tags now asciified! MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For example: #REVOLUCIÓN becomes #revolucion instead of #revolución --- lib/util.php | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/lib/util.php b/lib/util.php index a32c35395e..b81aa5ef35 100644 --- a/lib/util.php +++ b/lib/util.php @@ -1116,6 +1116,20 @@ function common_xml_safe_str($str) return preg_replace('/[\p{Cc}\p{Cs}]/u', '*', $str); } +function common_slugify($str) +{ + $str = transliterator_transliterate( + 'Any-Latin;' . // any charset to latin compatible + 'NFD;' . // decompose + '[:Nonspacing Mark:] Remove;' . // remove nonspacing marks (accents etc.) + 'NFC;' . // composite again + '[:Punctuation:] Remove;' . // remove punctuation (.,¿? etc.) + 'Lower();' . // turn into lowercase + 'Latin-ASCII;', // get ASCII equivalents (ð to d for example) + $str); + return preg_replace('/[^\pL\pN]/', '', $str); +} + function common_tag_link($tag) { $canonical = common_canonical_tag($tag); @@ -1139,11 +1153,9 @@ function common_tag_link($tag) function common_canonical_tag($tag) { - // only alphanum - $tag = preg_replace('/[^\pL\pN]/u', '', $tag); - $tag = mb_convert_case($tag, MB_CASE_LOWER, "UTF-8"); - $tag = substr($tag, 0, 64); - return $tag; + $tag = common_slugify($tag); + $tag = substr($tag, 0, 64); + return $tag; } function common_valid_profile_tag($str)