Upgrade XML output scrubbing to better deal with newline and a few other chars

This commit is contained in:
Zach Copley 2010-03-01 14:58:06 -08:00
parent 04c4facba9
commit a5dc5f9c62
1 changed files with 22 additions and 2 deletions

View File

@ -809,8 +809,28 @@ function common_shorten_links($text)
function common_xml_safe_str($str)
{
// Neutralize control codes and surrogates
return preg_replace('/[\p{Cc}\p{Cs}]/u', '*', $str);
// Replace common eol and extra whitespace input chars
$unWelcome = array(
"\t", // tab
"\n", // newline
"\r", // cr
"\0", // null byte eos
"\x0B" // vertical tab
);
$replacement = array(
' ', // single space
' ',
'', // nothing
'',
' '
);
$str = str_replace($unWelcome, $replacement, $str);
// Neutralize any additional control codes and UTF-16 surrogates
// (Twitter uses '*')
return preg_replace('/[\p{Cc}\p{Cs}]/u', '*', $str);
}
function common_tag_link($tag)