forked from GNUsocial/gnu-social
81ea0f8117
HTMLPurifier defangs arbitrary submitted HTML. We're using it in the OStatus plugin, but it may be valuable for other parts of the codebase (I think OEmbed might benefit, for example).
99 lines
3.3 KiB
PHP
99 lines
3.3 KiB
PHP
<?php
|
|
|
|
/**
|
|
* Class that handles operations involving percent-encoding in URIs.
|
|
*
|
|
* @warning
|
|
* Be careful when reusing instances of PercentEncoder. The object
|
|
* you use for normalize() SHOULD NOT be used for encode(), or
|
|
* vice-versa.
|
|
*/
|
|
class HTMLPurifier_PercentEncoder
|
|
{
|
|
|
|
/**
|
|
* Reserved characters to preserve when using encode().
|
|
*/
|
|
protected $preserve = array();
|
|
|
|
/**
|
|
* String of characters that should be preserved while using encode().
|
|
*/
|
|
public function __construct($preserve = false) {
|
|
// unreserved letters, ought to const-ify
|
|
for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits
|
|
for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case
|
|
for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case
|
|
$this->preserve[45] = true; // Dash -
|
|
$this->preserve[46] = true; // Period .
|
|
$this->preserve[95] = true; // Underscore _
|
|
$this->preserve[126]= true; // Tilde ~
|
|
|
|
// extra letters not to escape
|
|
if ($preserve !== false) {
|
|
for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
|
|
$this->preserve[ord($preserve[$i])] = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Our replacement for urlencode, it encodes all non-reserved characters,
|
|
* as well as any extra characters that were instructed to be preserved.
|
|
* @note
|
|
* Assumes that the string has already been normalized, making any
|
|
* and all percent escape sequences valid. Percents will not be
|
|
* re-escaped, regardless of their status in $preserve
|
|
* @param $string String to be encoded
|
|
* @return Encoded string.
|
|
*/
|
|
public function encode($string) {
|
|
$ret = '';
|
|
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
|
|
if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) {
|
|
$ret .= '%' . sprintf('%02X', $int);
|
|
} else {
|
|
$ret .= $string[$i];
|
|
}
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
/**
|
|
* Fix up percent-encoding by decoding unreserved characters and normalizing.
|
|
* @warning This function is affected by $preserve, even though the
|
|
* usual desired behavior is for this not to preserve those
|
|
* characters. Be careful when reusing instances of PercentEncoder!
|
|
* @param $string String to normalize
|
|
*/
|
|
public function normalize($string) {
|
|
if ($string == '') return '';
|
|
$parts = explode('%', $string);
|
|
$ret = array_shift($parts);
|
|
foreach ($parts as $part) {
|
|
$length = strlen($part);
|
|
if ($length < 2) {
|
|
$ret .= '%25' . $part;
|
|
continue;
|
|
}
|
|
$encoding = substr($part, 0, 2);
|
|
$text = substr($part, 2);
|
|
if (!ctype_xdigit($encoding)) {
|
|
$ret .= '%25' . $part;
|
|
continue;
|
|
}
|
|
$int = hexdec($encoding);
|
|
if (isset($this->preserve[$int])) {
|
|
$ret .= chr($int) . $text;
|
|
continue;
|
|
}
|
|
$encoding = strtoupper($encoding);
|
|
$ret .= '%' . $encoding . $text;
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
}
|
|
|
|
// vim: et sw=4 sts=4
|