From 1be22260197c058dead7baccbfc074d06771c4b3 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Wed, 23 Feb 2011 16:32:56 -0800 Subject: [PATCH 1/2] replace router regexes with tags with a fixed chunk that takes any non-/ char. This'll let other code decide what's a valid tag. --- lib/router.php | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/lib/router.php b/lib/router.php index c8e1c365a5..69b27da21c 100644 --- a/lib/router.php +++ b/lib/router.php @@ -116,6 +116,8 @@ class Router static $bare = array('requesttoken', 'accesstoken', 'userauthorization', 'postnotice', 'updateprofile', 'finishremotesubscribe'); + const REGEX_TAG = '[^\/]+'; // [\pL\pN_\-\.]{1,64} better if we can do unicode regexes + static function get() { if (!Router::$inst) { @@ -348,14 +350,14 @@ class Router $m->connect('tag', array('action' => 'publictagcloud')); $m->connect('tag/:tag/rss', array('action' => 'tagrss'), - array('tag' => '[\pL\pN_\-\.]{1,64}')); + array('tag' => self::REGEX_TAG)); $m->connect('tag/:tag', array('action' => 'tag'), - array('tag' => '[\pL\pN_\-\.]{1,64}')); + array('tag' => self::REGEX_TAG)); $m->connect('peopletag/:tag', array('action' => 'peopletag'), - array('tag' => '[a-zA-Z0-9]+')); + array('tag' => self::REGEX_TAG)); // groups @@ -812,7 +814,7 @@ class Router $m->connect($a.'/:tag', array('action' => $a, 'nickname' => $nickname), - array('tag' => '[a-zA-Z0-9]+')); + array('tag' => self::REGEX_TAG)); } foreach (array('rss', 'groups') as $a) { @@ -839,12 +841,12 @@ class Router $m->connect('tag/:tag/rss', array('action' => 'userrss', 'nickname' => $nickname), - array('tag' => '[\pL\pN_\-\.]{1,64}')); + array('tag' => self::REGEX_TAG)); $m->connect('tag/:tag', array('action' => 'showstream', 'nickname' => $nickname), - array('tag' => '[\pL\pN_\-\.]{1,64}')); + array('tag' => self::REGEX_TAG)); $m->connect('rsd.xml', array('action' => 'rsd', @@ -875,7 +877,7 @@ class Router foreach (array('subscriptions', 'subscribers') as $a) { $m->connect(':nickname/'.$a.'/:tag', array('action' => $a), - array('tag' => '[a-zA-Z0-9]+', + array('tag' => self::REGEX_TAG, 'nickname' => Nickname::DISPLAY_FMT)); } @@ -903,12 +905,12 @@ class Router $m->connect(':nickname/tag/:tag/rss', array('action' => 'userrss'), array('nickname' => Nickname::DISPLAY_FMT), - array('tag' => '[\pL\pN_\-\.]{1,64}')); + array('tag' => self::REGEX_TAG)); $m->connect(':nickname/tag/:tag', array('action' => 'showstream'), array('nickname' => Nickname::DISPLAY_FMT), - array('tag' => '[\pL\pN_\-\.]{1,64}')); + array('tag' => self::REGEX_TAG)); $m->connect(':nickname/rsd.xml', array('action' => 'rsd'), From 295e2bde565d9f057c4b6c3f5e4a9ae4195ede26 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Wed, 23 Feb 2011 16:37:55 -0800 Subject: [PATCH 2/2] Unicodize a couple regexes for tags: fixes linking & detection of non-ASCII tags that match the current regexes. (Checks for 'letter' and 'number' characters, underscore, dash, and period.) --- classes/Notice.php | 2 +- lib/util.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/classes/Notice.php b/classes/Notice.php index c9cca8a969..a640919094 100644 --- a/classes/Notice.php +++ b/classes/Notice.php @@ -153,7 +153,7 @@ class Notice extends Memcached_DataObject function saveTags() { /* extract all #hastags */ - $count = preg_match_all('/(?:^|\s)#([\pL\pN_\-\.]{1,64})/', strtolower($this->content), $match); + $count = preg_match_all('/(?:^|\s)#([\pL\pN_\-\.]{1,64})/u', strtolower($this->content), $match); if (!$count) { return true; } diff --git a/lib/util.php b/lib/util.php index 85f49e4c59..b710a4391b 100644 --- a/lib/util.php +++ b/lib/util.php @@ -784,7 +784,7 @@ function common_render_text($text) $r = preg_replace('/[\x{0}-\x{8}\x{b}-\x{c}\x{e}-\x{19}]/', '', $r); $r = common_replace_urls_callback($r, 'common_linkify'); - $r = preg_replace('/(^|\"\;|\'|\(|\[|\{|\s+)#([\pL\pN_\-\.]{1,64})/e', "'\\1#'.common_tag_link('\\2')", $r); + $r = preg_replace('/(^|\"\;|\'|\(|\[|\{|\s+)#([\pL\pN_\-\.]{1,64})/ue', "'\\1#'.common_tag_link('\\2')", $r); // XXX: machine tags return $r; }