From 8eca1b8daccf722b1ad06bdc128f5742bd6b0a80 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 21 Feb 2011 16:18:45 -0800 Subject: [PATCH] Fix ticket #3057: apply HTML escaping on special characters in Twitter import Changes the replacement of Twitter "entities" from in-place reverse ordering ('to preserve indices') to a forward-facing append-in-chunks that pulls in both the text and link portions, and escapes them all. This unfortunately means first *de*-escaping the < and > that Twitter helpfully adds for us.... and any literal &blah;s that get written. This seems to match Twitter's web UI, however horrid it is. --- plugins/TwitterBridge/twitterimport.php | 42 ++++++++++++++++++++----- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/plugins/TwitterBridge/twitterimport.php b/plugins/TwitterBridge/twitterimport.php index 2067196d36..5ddf5380d7 100644 --- a/plugins/TwitterBridge/twitterimport.php +++ b/plugins/TwitterBridge/twitterimport.php @@ -554,8 +554,8 @@ class TwitterImport } // Move all the entities into order so we can - // replace them in reverse order and thus - // not mess up their indices + // replace them and escape surrounding plaintext + // in order $toReplace = array(); @@ -577,13 +577,23 @@ class TwitterImport } } - // sort in reverse order by key + // sort in forward order by key - krsort($toReplace); + ksort($toReplace); + + $result = ''; + $cursor = 0; foreach ($toReplace as $part) { list($type, $object) = $part; - $orig = mb_substr($text, $object->indices[0], $object->indices[1] - $object->indices[0]); + $start = $object->indices[0]; + $end = $object->indices[1]; + if ($cursor < $start) { + // Copy in the preceding plaintext + $result .= $this->twitEscape(mb_substr($text, $cursor, $start - $cursor)); + $cursor = $start; + } + $orig = $this->twitEscape(mb_substr($text, $start, $end - $start)); switch($type) { case self::URL: $linkText = $this->makeUrlLink($object, $orig); @@ -595,11 +605,29 @@ class TwitterImport $linkText = $this->makeMentionLink($object, $orig); break; default: + $linkText = $orig; continue; } - $text = mb_substr($text, 0, $object->indices[0]) . $linkText . mb_substr($text, $object->indices[1]); + $result .= $linkText; + $cursor = $end; } - return $text; + $last = $this->twitEscape(mb_substr($text, $cursor)); + $result .= $last; + + return $result; + } + + function twitEscape($str) + { + // Twitter seems to preemptive turn < and > into < and > + // but doesn't for &, so while you may have some magic protection + // against XSS by not bothing to escape manually, you still get + // invalid XHTML. Thanks! + // + // Looks like their web interface pretty much sends anything + // through intact, so.... to do equivalent, decode all entities + // and then re-encode the special ones. + return htmlspecialchars(html_entity_decode($str, ENT_COMPAT, 'UTF-8')); } function makeUrlLink($object, $orig)