Fix issue #3035: search highlighting broke URLs in some imported messages (Twitter)

Search highlighting was being done with a regex on raw HTML text, followed by a second regex undoing replacements within double-quoted attribute values.
This broke on imported Twitter messages, as the way we generate the markup uses single quotes on the attributes, which didn't get matched by the second regex.
I've replaced this do-then-undo cycle by dividing up the import HTML into freetext spans and tags; the freetext gets replaced, while the tags are left untouched.
This commit is contained in:
Brion Vibber 2011-02-21 14:14:32 -08:00
parent eb7e3ee528
commit 8e7c279c9e
1 changed files with 13 additions and 6 deletions

View File

@ -193,13 +193,20 @@ class SearchNoticeListItem extends NoticeListItem {
$options = implode('|', array_map('preg_quote', array_map('htmlspecialchars', $terms),
array_fill(0, sizeof($terms), '/')));
$pattern = "/($options)/i";
$result = preg_replace($pattern, '<strong>\\1</strong>', $text);
$result = '';
/* Divide up into text (highlight me) and tags (don't touch) */
$chunks = preg_split('/(<[^>]+>)/', $text, 0, PREG_SPLIT_DELIM_CAPTURE);
foreach ($chunks as $i => $chunk) {
if ($i % 2 == 1) {
// odd: delimiter (tag)
$result .= $chunk;
} else {
// even: freetext between tags
$result .= preg_replace($pattern, '<strong>\\1</strong>', $chunk);
}
}
/* Remove highlighting from inside links, loop incase multiple highlights in links */
$pattern = '/(\w+="[^"]*)<strong>('.$options.')<\/strong>([^"]*")/iU';
do {
$result = preg_replace($pattern, '\\1\\2\\3', $result, -1, $count);
} while ($count);
return $result;
}
}