Always specify UTF-8 targt charset for html_entity_decode(); default is 8-bit ISO-8859-1 which causes things to break when we later pass them through things that expect to work with UTF-8. For instance, running through preg_replace() with the /u option results in NULL, leading to problems with OStatus and SubMirror generating their plaintext versions and doing length-cropping.

This commit is contained in:
Brion Vibber
2010-09-30 11:29:31 -07:00
parent 91f25ca817
commit 1acc7d66c6
6 changed files with 7 additions and 7 deletions

View File

@@ -558,7 +558,7 @@ class Ostatus_profile extends Memcached_DataObject
// Get (safe!) HTML and text versions of the content
$rendered = $this->purify($sourceContent);
$content = html_entity_decode(strip_tags($rendered));
$content = html_entity_decode(strip_tags($rendered), ENT_QUOTES, 'UTF-8');
$shortened = common_shorten_links($content);
@@ -569,7 +569,7 @@ class Ostatus_profile extends Memcached_DataObject
if (Notice::contentTooLong($shortened)) {
$attachment = $this->saveHTMLFile($note->title, $rendered);
$summary = html_entity_decode(strip_tags($note->summary));
$summary = html_entity_decode(strip_tags($note->summary), ENT_QUOTES, 'UTF-8');
if (empty($summary)) {
$summary = $content;
}