From 711ade9835f3523dcf83702834adef313f9cf54d Mon Sep 17 00:00:00 2001 From: Brenda Wallace Date: Mon, 20 Jul 2009 14:01:51 +1200 Subject: [PATCH 1/4] look for full unicode when saving tags --- classes/Notice.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/classes/Notice.php b/classes/Notice.php index 101fadb674..12b47f3604 100644 --- a/classes/Notice.php +++ b/classes/Notice.php @@ -97,11 +97,11 @@ class Notice extends Memcached_DataObject function saveTags() { /* extract all #hastags */ - $count = preg_match_all('/(?:^|\s)#([A-Za-z0-9_\-\.]{1,64})/', strtolower($this->content), $match); + $count = preg_match_all('/(?:^|\s)#([\pL\pN_\-\.]{1,64})/', strtolower($this->content), $match); if (!$count) { return true; } - + /* Add them to the database */ foreach(array_unique($match[1]) as $hashtag) { /* elide characters we don't want in the tag */ From 590982612206acaa902d7bc87429ec4296bfa17f Mon Sep 17 00:00:00 2001 From: Brenda Wallace Date: Mon, 20 Jul 2009 14:02:26 +1200 Subject: [PATCH 2/4] show full utf8 in tags --- lib/util.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/util.php b/lib/util.php index 9e8ec41d25..edc396cc02 100644 --- a/lib/util.php +++ b/lib/util.php @@ -404,7 +404,7 @@ function common_render_text($text) $r = preg_replace('/[\x{0}-\x{8}\x{b}-\x{c}\x{e}-\x{19}]/', '', $r); $r = common_replace_urls_callback($r, 'common_linkify'); - $r = preg_replace('/(^|\(|\[|\s+)#([A-Za-z0-9_\-\.]{1,64})/e', "'\\1#'.common_tag_link('\\2')", $r); + $r = preg_replace('/(^|\(|\[|\s+)#([\pL\pN_\-\.]{1,64})/e', "'\\1#'.common_tag_link('\\2')", $r); // XXX: machine tags return $r; } From 76dd061a34782010d5650d5513bd4d4873e0045c Mon Sep 17 00:00:00 2001 From: Brenda Wallace Date: Mon, 20 Jul 2009 14:02:52 +1200 Subject: [PATCH 3/4] allow full unicode into links for tags --- lib/router.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/router.php b/lib/router.php index 5e0fcfc946..8e48364979 100644 --- a/lib/router.php +++ b/lib/router.php @@ -211,7 +211,7 @@ class Router array('tag' => '[a-zA-Z0-9]+')); $m->connect('tag/:tag', array('action' => 'tag'), - array('tag' => '[a-zA-Z0-9]+')); + array('tag' => '[\pL\pN_\-\.]{1,64}')); $m->connect('peopletag/:tag', array('action' => 'peopletag'), From d95f45f0705467cfc7e3650db932e43f0a20e45f Mon Sep 17 00:00:00 2001 From: Brenda Wallace Date: Mon, 20 Jul 2009 14:18:48 +1200 Subject: [PATCH 4/4] turn into canonical terms before checking for unique-ness of a tag in a notice --- classes/Notice.php | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/classes/Notice.php b/classes/Notice.php index 12b47f3604..7f002d838c 100644 --- a/classes/Notice.php +++ b/classes/Notice.php @@ -101,9 +101,17 @@ class Notice extends Memcached_DataObject if (!$count) { return true; } + + //turn each into their canonical tag + //this is needed to remove dupes before saving e.g. #hash.tag = #hashtag + $hashtags = array(); + for($i=0; $isaveTag($hashtag); } @@ -112,8 +120,6 @@ class Notice extends Memcached_DataObject function saveTag($hashtag) { - $hashtag = common_canonical_tag($hashtag); - $tag = new Notice_tag(); $tag->notice_id = $this->id; $tag->tag = $hashtag;