From 8e7c279c9eaf1db6367f5bbf8e833698f50eca92 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 21 Feb 2011 14:14:32 -0800 Subject: [PATCH] Fix issue #3035: search highlighting broke URLs in some imported messages (Twitter) Search highlighting was being done with a regex on raw HTML text, followed by a second regex undoing replacements within double-quoted attribute values. This broke on imported Twitter messages, as the way we generate the markup uses single quotes on the attributes, which didn't get matched by the second regex. I've replaced this do-then-undo cycle by dividing up the import HTML into freetext spans and tags; the freetext gets replaced, while the tags are left untouched. --- actions/noticesearch.php | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/actions/noticesearch.php b/actions/noticesearch.php index d0673420d6..5814dec8af 100644 --- a/actions/noticesearch.php +++ b/actions/noticesearch.php @@ -193,13 +193,20 @@ class SearchNoticeListItem extends NoticeListItem { $options = implode('|', array_map('preg_quote', array_map('htmlspecialchars', $terms), array_fill(0, sizeof($terms), '/'))); $pattern = "/($options)/i"; - $result = preg_replace($pattern, '\\1', $text); + $result = ''; + + /* Divide up into text (highlight me) and tags (don't touch) */ + $chunks = preg_split('/(<[^>]+>)/', $text, 0, PREG_SPLIT_DELIM_CAPTURE); + foreach ($chunks as $i => $chunk) { + if ($i % 2 == 1) { + // odd: delimiter (tag) + $result .= $chunk; + } else { + // even: freetext between tags + $result .= preg_replace($pattern, '\\1', $chunk); + } + } - /* Remove highlighting from inside links, loop incase multiple highlights in links */ - $pattern = '/(\w+="[^"]*)('.$options.')<\/strong>([^"]*")/iU'; - do { - $result = preg_replace($pattern, '\\1\\2\\3', $result, -1, $count); - } while ($count); return $result; } }