Test cases and fixes for Atom and RSS content decoding.
Fix extraction of Atom <content type="text"> and <content type="html">; we were failing to escape plaintext source data to HTML, and doing an extraneous double-deescape on HTML source resulting in breakage of notices containing text that looks like HTML. Only <content type="xhtml"> was working correctly previously. Fixes for RSS2 content processing: we were failing to load <content:encoded> at all due to using wrong element name, and were applying an extraneous de-escape for <description> rather than the escaping that is required to turn plaintext into HTML. (Per spec, <description> must be plaintext.)
This commit is contained in:
parent
9c8052e755
commit
8fd0059bf6
@ -83,6 +83,7 @@ class Activity
|
||||
const CREATOR = 'creator';
|
||||
|
||||
const CONTENTNS = 'http://purl.org/rss/1.0/modules/content/';
|
||||
const ENCODED = 'encoded';
|
||||
|
||||
public $actor; // an ActivityObject
|
||||
public $verb; // a string (the URL)
|
||||
@ -268,14 +269,21 @@ class Activity
|
||||
|
||||
$this->title = ActivityUtils::childContent($item, ActivityObject::TITLE, self::RSS);
|
||||
|
||||
$contentEl = ActivityUtils::child($item, ActivityUtils::CONTENT, self::CONTENTNS);
|
||||
$contentEl = ActivityUtils::child($item, self::ENCODED, self::CONTENTNS);
|
||||
|
||||
if (!empty($contentEl)) {
|
||||
$this->content = htmlspecialchars_decode($contentEl->textContent, ENT_QUOTES);
|
||||
// <content:encoded> XML node's text content is HTML; no further processing needed.
|
||||
$this->content = $contentEl->textContent;
|
||||
} else {
|
||||
$descriptionEl = ActivityUtils::child($item, self::DESCRIPTION, self::RSS);
|
||||
if (!empty($descriptionEl)) {
|
||||
$this->content = htmlspecialchars_decode($descriptionEl->textContent, ENT_QUOTES);
|
||||
// Per spec, <description> must be plaintext.
|
||||
// In practice, often there's HTML... but these days good
|
||||
// feeds are using <content:encoded> which is explicitly
|
||||
// real HTML.
|
||||
// We'll treat this following spec, and do HTML escaping
|
||||
// to convert from plaintext to HTML.
|
||||
$this->content = htmlspecialchars($descriptionEl->textContent);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -213,11 +213,19 @@ class ActivityUtils
|
||||
// slavishly following http://atompub.org/rfc4287.html#rfc.section.4.1.3.3
|
||||
|
||||
if (empty($type) || $type == 'text') {
|
||||
return $el->textContent;
|
||||
// We have plaintext saved as the XML text content.
|
||||
// Since we want HTML, we need to escape any special chars.
|
||||
return htmlspecialchars($el->textContent);
|
||||
} else if ($type == 'html') {
|
||||
// We have HTML saved as the XML text content.
|
||||
// No additional processing required once we've got it.
|
||||
$text = $el->textContent;
|
||||
return htmlspecialchars_decode($text, ENT_QUOTES);
|
||||
return $text;
|
||||
} else if ($type == 'xhtml') {
|
||||
// Per spec, the <content type="xhtml"> contains a single
|
||||
// HTML <div> with XHTML namespace on it as a child node.
|
||||
// We need to pull all of that <div>'s child nodes and
|
||||
// serialize them back to an (X)HTML source fragment.
|
||||
$divEl = ActivityUtils::child($el, 'div', 'http://www.w3.org/1999/xhtml');
|
||||
if (empty($divEl)) {
|
||||
return null;
|
||||
|
@ -32,6 +32,18 @@ class ActivityParseTests extends PHPUnit_Framework_TestCase
|
||||
$this->assertEquals('tag:versioncentral.example.org,2009:/change/1643245', $act->objects[0]->id);
|
||||
}
|
||||
|
||||
public function testExample2()
|
||||
{
|
||||
global $_example2;
|
||||
$dom = DOMDocument::loadXML($_example2);
|
||||
$act = new Activity($dom->documentElement);
|
||||
|
||||
$this->assertFalse(empty($act));
|
||||
// Did we handle <content type="html"> correctly with a typical payload?
|
||||
$this->assertEquals("<p>Geraldine posted a Photo on PhotoPanic</p>\n " .
|
||||
"<img src=\"/geraldine/photo1.jpg\">", trim($act->content));
|
||||
}
|
||||
|
||||
public function testExample3()
|
||||
{
|
||||
global $_example3;
|
||||
@ -305,6 +317,71 @@ class ActivityParseTests extends PHPUnit_Framework_TestCase
|
||||
|
||||
}
|
||||
|
||||
public function testAtomContent()
|
||||
{
|
||||
$tests = array(array("<content>Some regular plain text.</content>",
|
||||
"Some regular plain text."),
|
||||
array("<content><b>this is not HTML</b></content>",
|
||||
"<b>this is not HTML</b>"),
|
||||
array("<content type='html'>Some regular plain HTML.</content>",
|
||||
"Some regular plain HTML."),
|
||||
array("<content type='html'><b>this is too HTML</b></content>",
|
||||
"<b>this is too HTML</b>"),
|
||||
array("<content type='html'>&lt;b&gt;but this is not HTML!&lt;/b&gt;</content>",
|
||||
"<b>but this is not HTML!</b>"),
|
||||
array("<content type='xhtml'><div xmlns='http://www.w3.org/1999/xhtml'>Some regular plain XHTML.</div></content>",
|
||||
"Some regular plain XHTML."),
|
||||
array("<content type='xhtml'><div xmlns='http://www.w3.org/1999/xhtml'><b>This is some XHTML!</b></div></content>",
|
||||
"<b>This is some XHTML!</b>"),
|
||||
array("<content type='xhtml'><div xmlns='http://www.w3.org/1999/xhtml'><b>This is not some XHTML!</b></div></content>",
|
||||
"<b>This is not some XHTML!</b>"),
|
||||
array("<content type='xhtml'><div xmlns='http://www.w3.org/1999/xhtml'>&lt;b&gt;This is not some XHTML either!&lt;/b&gt;</div></content>",
|
||||
"&lt;b&gt;This is not some XHTML either!&lt;/b&gt;"));
|
||||
foreach ($tests as $data) {
|
||||
list($source, $output) = $data;
|
||||
$xml = "<entry xmlns='http://www.w3.org/2005/Atom'>" .
|
||||
"<id>http://example.com/fakeid</id>" .
|
||||
"<author><name>Test</name></author>" .
|
||||
"<title>Atom content tests</title>" .
|
||||
$source .
|
||||
"</entry>";
|
||||
$dom = DOMDocument::loadXML($xml);
|
||||
$act = new Activity($dom->documentElement);
|
||||
|
||||
$this->assertFalse(empty($act));
|
||||
$this->assertEquals($output, trim($act->content));
|
||||
}
|
||||
}
|
||||
|
||||
public function testRssContent()
|
||||
{
|
||||
$tests = array(array("<content:encoded>Some regular plain HTML.</content:encoded>",
|
||||
"Some regular plain HTML."),
|
||||
array("<content:encoded>Some <b>exciting bold HTML</b></content:encoded>",
|
||||
"Some <b>exciting bold HTML</b>"),
|
||||
array("<content:encoded>Some &lt;b&gt;escaped non-HTML.&lt;/b&gt;</content:encoded>",
|
||||
"Some <b>escaped non-HTML.</b>"),
|
||||
array("<description>Some plain text.</description>",
|
||||
"Some plain text."),
|
||||
array("<description>Some <b>non-HTML text</b></description>",
|
||||
"Some <b>non-HTML text</b>"),
|
||||
array("<description>Some &lt;b&gt;double-escaped text&lt;/b&gt;</description>",
|
||||
"Some &lt;b&gt;double-escaped text&lt;/b&gt;"));
|
||||
foreach ($tests as $data) {
|
||||
list($source, $output) = $data;
|
||||
$xml = "<item xmlns:content='http://purl.org/rss/1.0/modules/content/'>" .
|
||||
"<guid>http://example.com/fakeid</guid>" .
|
||||
"<title>RSS content tests</title>" .
|
||||
$source .
|
||||
"</item>";
|
||||
$dom = DOMDocument::loadXML($xml);
|
||||
$act = new Activity($dom->documentElement);
|
||||
|
||||
$this->assertFalse(empty($act));
|
||||
$this->assertEquals($output, trim($act->content));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
$_example1 = <<<EXAMPLE1
|
||||
|
Loading…
Reference in New Issue
Block a user