diff --git a/extlib/HTMLPurifier/HTMLPurifier.includes.php b/extlib/HTMLPurifier/HTMLPurifier.includes.php
index fdb58c2d37..e8bce5c850 100644
--- a/extlib/HTMLPurifier/HTMLPurifier.includes.php
+++ b/extlib/HTMLPurifier/HTMLPurifier.includes.php
@@ -7,7 +7,7 @@
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
- * @version 4.7.0
+ * @version 4.9.3
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
@@ -137,6 +137,8 @@ require 'HTMLPurifier/AttrTransform/SafeObject.php';
require 'HTMLPurifier/AttrTransform/SafeParam.php';
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
require 'HTMLPurifier/AttrTransform/TargetBlank.php';
+require 'HTMLPurifier/AttrTransform/TargetNoopener.php';
+require 'HTMLPurifier/AttrTransform/TargetNoreferrer.php';
require 'HTMLPurifier/AttrTransform/Textarea.php';
require 'HTMLPurifier/ChildDef/Chameleon.php';
require 'HTMLPurifier/ChildDef/Custom.php';
@@ -175,6 +177,8 @@ require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require 'HTMLPurifier/HTMLModule/Tables.php';
require 'HTMLPurifier/HTMLModule/Target.php';
require 'HTMLPurifier/HTMLModule/TargetBlank.php';
+require 'HTMLPurifier/HTMLModule/TargetNoopener.php';
+require 'HTMLPurifier/HTMLModule/TargetNoreferrer.php';
require 'HTMLPurifier/HTMLModule/Text.php';
require 'HTMLPurifier/HTMLModule/Tidy.php';
require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
@@ -225,5 +229,6 @@ require 'HTMLPurifier/URIScheme/https.php';
require 'HTMLPurifier/URIScheme/mailto.php';
require 'HTMLPurifier/URIScheme/news.php';
require 'HTMLPurifier/URIScheme/nntp.php';
+require 'HTMLPurifier/URIScheme/tel.php';
require 'HTMLPurifier/VarParser/Flexible.php';
require 'HTMLPurifier/VarParser/Native.php';
diff --git a/extlib/HTMLPurifier/HTMLPurifier.php b/extlib/HTMLPurifier/HTMLPurifier.php
index c6041bc113..b4605ebc6e 100644
--- a/extlib/HTMLPurifier/HTMLPurifier.php
+++ b/extlib/HTMLPurifier/HTMLPurifier.php
@@ -19,7 +19,7 @@
*/
/*
- HTML Purifier 4.7.0 - Standards Compliant HTML Filtering
+ HTML Purifier 4.9.3 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
@@ -58,12 +58,12 @@ class HTMLPurifier
* Version of HTML Purifier.
* @type string
*/
- public $version = '4.7.0';
+ public $version = '4.9.3';
/**
* Constant with version of HTML Purifier.
*/
- const VERSION = '4.7.0';
+ const VERSION = '4.9.3';
/**
* Global configuration object.
@@ -104,7 +104,7 @@ class HTMLPurifier
/**
* Initializes the purifier.
*
- * @param HTMLPurifier_Config $config Optional HTMLPurifier_Config object
+ * @param HTMLPurifier_Config|mixed $config Optional HTMLPurifier_Config object
* for all instances of the purifier, if omitted, a default
* configuration is supplied (which can be overridden on a
* per-use basis).
diff --git a/extlib/HTMLPurifier/HTMLPurifier.safe-includes.php b/extlib/HTMLPurifier/HTMLPurifier.safe-includes.php
index 9dea6d1ed5..a3261f8a32 100644
--- a/extlib/HTMLPurifier/HTMLPurifier.safe-includes.php
+++ b/extlib/HTMLPurifier/HTMLPurifier.safe-includes.php
@@ -131,6 +131,8 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetBlank.php';
+require_once $__dir . '/HTMLPurifier/AttrTransform/TargetNoopener.php';
+require_once $__dir . '/HTMLPurifier/AttrTransform/TargetNoreferrer.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
@@ -169,6 +171,8 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetBlank.php';
+require_once $__dir . '/HTMLPurifier/HTMLModule/TargetNoopener.php';
+require_once $__dir . '/HTMLPurifier/HTMLModule/TargetNoreferrer.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
@@ -219,5 +223,6 @@ require_once $__dir . '/HTMLPurifier/URIScheme/https.php';
require_once $__dir . '/HTMLPurifier/URIScheme/mailto.php';
require_once $__dir . '/HTMLPurifier/URIScheme/news.php';
require_once $__dir . '/HTMLPurifier/URIScheme/nntp.php';
+require_once $__dir . '/HTMLPurifier/URIScheme/tel.php';
require_once $__dir . '/HTMLPurifier/VarParser/Flexible.php';
require_once $__dir . '/HTMLPurifier/VarParser/Native.php';
diff --git a/extlib/HTMLPurifier/HTMLPurifier/Arborize.php b/extlib/HTMLPurifier/HTMLPurifier/Arborize.php
index 9e6617be5d..d2e9d22a20 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/Arborize.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/Arborize.php
@@ -19,8 +19,8 @@ class HTMLPurifier_Arborize
if ($token instanceof HTMLPurifier_Token_End) {
$token->start = null; // [MUT]
$r = array_pop($stack);
- assert($r->name === $token->name);
- assert(empty($token->attr));
+ //assert($r->name === $token->name);
+ //assert(empty($token->attr));
$r->endCol = $token->col;
$r->endLine = $token->line;
$r->endArmor = $token->armor;
@@ -32,7 +32,7 @@ class HTMLPurifier_Arborize
$stack[] = $node;
}
}
- assert(count($stack) == 1);
+ //assert(count($stack) == 1);
return $stack[0];
}
diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrCollections.php b/extlib/HTMLPurifier/HTMLPurifier/AttrCollections.php
index 4f6c2e39a2..c7b17cf144 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrCollections.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrCollections.php
@@ -21,6 +21,11 @@ class HTMLPurifier_AttrCollections
* @param HTMLPurifier_HTMLModule[] $modules Hash array of HTMLPurifier_HTMLModule members
*/
public function __construct($attr_types, $modules)
+ {
+ $this->doConstruct($attr_types, $modules);
+ }
+
+ public function doConstruct($attr_types, $modules)
{
// load extensions from the modules
foreach ($modules as $module) {
diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef.php
index 5ac06522b9..739646fa7c 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef.php
@@ -86,7 +86,13 @@ abstract class HTMLPurifier_AttrDef
*/
protected function mungeRgb($string)
{
- return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
+ $p = '\s*(\d+(\.\d+)?([%]?))\s*';
+
+ if (preg_match('/(rgba|hsla)\(/', $string)) {
+ return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string);
+ }
+
+ return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string);
}
/**
diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS.php
index 02c1641fb2..ad2cb90ad1 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS.php
@@ -25,15 +25,42 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
$css = $this->parseCDATA($css);
$definition = $config->getCSSDefinition();
+ $allow_duplicates = $config->get("CSS.AllowDuplicates");
- // we're going to break the spec and explode by semicolons.
- // This is because semicolon rarely appears in escaped form
- // Doing this is generally flaky but fast
- // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
- // for details
- $declarations = explode(';', $css);
+ // According to the CSS2.1 spec, the places where a
+ // non-delimiting semicolon can appear are in strings
+ // escape sequences. So here is some dumb hack to
+ // handle quotes.
+ $len = strlen($css);
+ $accum = "";
+ $declarations = array();
+ $quoted = false;
+ for ($i = 0; $i < $len; $i++) {
+ $c = strcspn($css, ";'\"", $i);
+ $accum .= substr($css, $i, $c);
+ $i += $c;
+ if ($i == $len) break;
+ $d = $css[$i];
+ if ($quoted) {
+ $accum .= $d;
+ if ($d == $quoted) {
+ $quoted = false;
+ }
+ } else {
+ if ($d == ";") {
+ $declarations[] = $accum;
+ $accum = "";
+ } else {
+ $accum .= $d;
+ $quoted = $d;
+ }
+ }
+ }
+ if ($accum != "") $declarations[] = $accum;
+
$propvalues = array();
+ $new_declarations = '';
/**
* Name of the current CSS property being validated.
@@ -83,7 +110,11 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
if ($result === false) {
continue;
}
- $propvalues[$property] = $result;
+ if ($allow_duplicates) {
+ $new_declarations .= "$property:$result;";
+ } else {
+ $propvalues[$property] = $result;
+ }
}
$context->destroy('CurrentCSSProperty');
@@ -92,7 +123,6 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
// slightly inefficient, but it's the only way of getting rid of
// duplicates. Perhaps config to optimize it, but not now.
- $new_declarations = '';
foreach ($propvalues as $prop => $value) {
$new_declarations .= "$prop:$value;";
}
diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Color.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Color.php
index 16d2a6b98c..d7287a00c2 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Color.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Color.php
@@ -6,6 +6,16 @@
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
{
+ /**
+ * @type HTMLPurifier_AttrDef_CSS_AlphaValue
+ */
+ protected $alpha;
+
+ public function __construct()
+ {
+ $this->alpha = new HTMLPurifier_AttrDef_CSS_AlphaValue();
+ }
+
/**
* @param string $color
* @param HTMLPurifier_Config $config
@@ -29,59 +39,104 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
return $colors[$lower];
}
- if (strpos($color, 'rgb(') !== false) {
- // rgb literal handling
+ if (preg_match('#(rgb|rgba|hsl|hsla)\(#', $color, $matches) === 1) {
$length = strlen($color);
if (strpos($color, ')') !== $length - 1) {
return false;
}
- $triad = substr($color, 4, $length - 4 - 1);
- $parts = explode(',', $triad);
- if (count($parts) !== 3) {
+
+ // get used function : rgb, rgba, hsl or hsla
+ $function = $matches[1];
+
+ $parameters_size = 3;
+ $alpha_channel = false;
+ if (substr($function, -1) === 'a') {
+ $parameters_size = 4;
+ $alpha_channel = true;
+ }
+
+ /*
+ * Allowed types for values :
+ * parameter_position => [type => max_value]
+ */
+ $allowed_types = array(
+ 1 => array('percentage' => 100, 'integer' => 255),
+ 2 => array('percentage' => 100, 'integer' => 255),
+ 3 => array('percentage' => 100, 'integer' => 255),
+ );
+ $allow_different_types = false;
+
+ if (strpos($function, 'hsl') !== false) {
+ $allowed_types = array(
+ 1 => array('integer' => 360),
+ 2 => array('percentage' => 100),
+ 3 => array('percentage' => 100),
+ );
+ $allow_different_types = true;
+ }
+
+ $values = trim(str_replace($function, '', $color), ' ()');
+
+ $parts = explode(',', $values);
+ if (count($parts) !== $parameters_size) {
return false;
}
- $type = false; // to ensure that they're all the same type
+
+ $type = false;
$new_parts = array();
+ $i = 0;
+
foreach ($parts as $part) {
+ $i++;
$part = trim($part);
+
if ($part === '') {
return false;
}
- $length = strlen($part);
- if ($part[$length - 1] === '%') {
- // handle percents
- if (!$type) {
- $type = 'percentage';
- } elseif ($type !== 'percentage') {
+
+ // different check for alpha channel
+ if ($alpha_channel === true && $i === count($parts)) {
+ $result = $this->alpha->validate($part, $config, $context);
+
+ if ($result === false) {
return false;
}
- $num = (float)substr($part, 0, $length - 1);
- if ($num < 0) {
- $num = 0;
- }
- if ($num > 100) {
- $num = 100;
- }
- $new_parts[] = "$num%";
+
+ $new_parts[] = (string)$result;
+ continue;
+ }
+
+ if (substr($part, -1) === '%') {
+ $current_type = 'percentage';
} else {
- // handle integers
- if (!$type) {
- $type = 'integer';
- } elseif ($type !== 'integer') {
- return false;
- }
- $num = (int)$part;
- if ($num < 0) {
- $num = 0;
- }
- if ($num > 255) {
- $num = 255;
- }
- $new_parts[] = (string)$num;
+ $current_type = 'integer';
+ }
+
+ if (!array_key_exists($current_type, $allowed_types[$i])) {
+ return false;
+ }
+
+ if (!$type) {
+ $type = $current_type;
+ }
+
+ if ($allow_different_types === false && $type != $current_type) {
+ return false;
+ }
+
+ $max_value = $allowed_types[$i][$current_type];
+
+ if ($current_type == 'integer') {
+ // Return value between range 0 -> $max_value
+ $new_parts[] = (int)max(min($part, $max_value), 0);
+ } elseif ($current_type == 'percentage') {
+ $new_parts[] = (float)max(min(rtrim($part, '%'), $max_value), 0) . '%';
}
}
- $new_triad = implode(',', $new_parts);
- $color = "rgb($new_triad)";
+
+ $new_values = implode(',', $new_parts);
+
+ $color = $function . '(' . $new_values . ')';
} else {
// hexadecimal handling
if ($color[0] === '#') {
@@ -100,6 +155,7 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
}
return $color;
}
+
}
// vim: et sw=4 sts=4
diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php
index 86101020dc..74e24c8816 100644
--- a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php
+++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php
@@ -130,6 +130,8 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
//
+ By default, HTML Purifier removes duplicate CSS properties,
+ like color:red; color:blue
. If this is set to
+ true, duplicate properties are allowed.
+
+ In HTML Purifier 4.8.0, this also supports NULL
,
+ which means that no chmod'ing or directory creation shall
+ occur.
+
+ This directive enables aggressive pre-filter removal of + script tags. This is not necessary for security, + but it can help work around a bug in libxml where embedded + HTML elements inside script sections cause the parser to + choke. To revert to pre-4.9.0 behavior, set this to false. + This directive has no effect if %Core.Trusted is true, + %Core.RemoveScriptContents is false, or %Core.HiddenElements + does not contain script. +
+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.LegacyEntityDecoder.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.LegacyEntityDecoder.txt new file mode 100644 index 0000000000..392b436493 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.LegacyEntityDecoder.txt @@ -0,0 +1,36 @@ +Core.LegacyEntityDecoder +TYPE: bool +VERSION: 4.9.0 +DEFAULT: false +--DESCRIPTION-- ++ Prior to HTML Purifier 4.9.0, entities were decoded by performing + a global search replace for all entities whose decoded versions + did not have special meanings under HTML, and replaced them with + their decoded versions. We would match all entities, even if they did + not have a trailing semicolon, but only if there weren't any trailing + alphanumeric characters. +
+Original | Text | Attribute |
---|---|---|
¥ | ¥ | ¥ |
¥ | ¥ | ¥ |
¥a | ¥a | ¥a |
¥= | ¥= | ¥= |
+ In HTML Purifier 4.9.0, we changed the behavior of entity parsing + to match entities that had missing trailing semicolons in less + cases, to more closely match HTML5 parsing behavior: +
+Original | Text | Attribute |
---|---|---|
¥ | ¥ | ¥ |
¥ | ¥ | ¥ |
¥a | ¥a | ¥a |
¥= | ¥= | ¥= |
+ This flag reverts back to pre-HTML Purifier 4.9.0 behavior. +
+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoopener.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoopener.txt new file mode 100644 index 0000000000..dd514c0def --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoopener.txt @@ -0,0 +1,10 @@ +--# vim: et sw=4 sts=4 +HTML.TargetNoopener +TYPE: bool +VERSION: 4.8.0 +DEFAULT: TRUE +--DESCRIPTION-- +If enabled, noopener rel attributes are added to links which have +a target attribute associated with them. This prevents malicious +destinations from overwriting the original window. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt new file mode 100644 index 0000000000..cb5a0b0e5e --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt @@ -0,0 +1,9 @@ +HTML.TargetNoreferrer +TYPE: bool +VERSION: 4.8.0 +DEFAULT: TRUE +--DESCRIPTION-- +If enabled, noreferrer rel attributes are added to links which have +a target attribute associated with them. This prevents malicious +destinations from overwriting the original window. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt index 666635a5ff..eb97307e20 100644 --- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt @@ -8,6 +8,7 @@ array ( 'ftp' => true, 'nntp' => true, 'news' => true, + 'tel' => true, ) --DESCRIPTION-- Whitelist that defines the schemes that a URI is allowed to have. This diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt index 728e378cbe..834bc08c0b 100644 --- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt @@ -1,5 +1,5 @@ URI.DefaultScheme -TYPE: string +TYPE: string/null DEFAULT: 'http' --DESCRIPTION-- @@ -7,4 +7,9 @@ DEFAULT: 'http' Defines through what scheme the output will be served, in order to select the proper object validator when no scheme information is present. + ++ Starting with HTML Purifier 4.9.0, the default scheme can be null, in + which case we reject all URIs which do not have explicit schemes. +
--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt index 179f94eb03..58c81dcc44 100644 --- a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt @@ -9,75 +9,75 @@ DEFAULT: NULL absolute URIs into another URI, usually a URI redirection service. This directive accepts a URI, formatted with a%s
where
the url-encoded original URI should be inserted (sample:
- https://searx.laquadrature.net/?q=%s
).
-
-
+ http://www.google.com/url?q=%s
).
+
Uses for this directive: -
-+
Prior to HTML Purifier 3.1.1, this directive also enabled the munging
of browsable external resources, which could break things if your redirection
script was a splash page or used meta
tags. To revert to
previous behavior, please use %URI.MungeResources.
-
+
+You may want to also use %URI.MungeSecretKey along with this directive in order to enforce what URIs your redirector script allows. Open redirector scripts can be a security risk and negatively affect the reputation of your domain name. -
-+
+Starting with HTML Purifier 3.1.1, there is also these substitutions: -
-Key | -Description | -Example <a href=""> |
-
---|---|---|
Key | +Description | +Example <a href=""> |
+
%r | -1 - The URI embeds a resource (blank) - The URI is merely a link |
- - |
%n | -The name of the tag this URI came from | -a | -
%m | -The name of the attribute this URI came from | -href | -
%p | -The name of the CSS property this URI came from, or blank if irrelevant | -- |
%r | +1 - The URI embeds a resource (blank) - The URI is merely a link |
+ + |
%n | +The name of the tag this URI came from | +a | +
%m | +The name of the attribute this URI came from | +href | +
%p | +The name of the CSS property this URI came from, or blank if irrelevant | ++ |
+
Admittedly, these letters are somewhat arbitrary; the only stipulation was that they couldn't be a through f. r is for resource (I would have preferred e, but you take what you can get), n is for name, m was picked because it came after n (and I couldn't use a), p is for property. -
- --# vim: et sw=4 sts=4 + +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache.php b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache.php index 67bb5b1e69..9aa8ff354f 100644 --- a/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache.php +++ b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache.php @@ -118,7 +118,7 @@ abstract class HTMLPurifier_DefinitionCache /** * Clears all expired (older version or revision) objects from cache - * @note Be carefuly implementing this method as flush. Flush must + * @note Be careful implementing this method as flush. Flush must * not interfere with other Definition types, and cleanup() * should not be repeatedly called by userland code. * @param HTMLPurifier_Config $config diff --git a/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Serializer.php b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Serializer.php index ce268d91b4..952e48d470 100644 --- a/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Serializer.php +++ b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Serializer.php @@ -97,6 +97,12 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac } $dir = $this->generateDirectoryPath($config); $dh = opendir($dir); + // Apparently, on some versions of PHP, readdir will return + // an empty string if you pass an invalid argument to readdir. + // So you need this test. See #49. + if (false === $dh) { + return false; + } while (false !== ($filename = readdir($dh))) { if (empty($filename)) { continue; @@ -106,6 +112,8 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac } unlink($dir . '/' . $filename); } + closedir($dh); + return true; } /** @@ -119,6 +127,10 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac } $dir = $this->generateDirectoryPath($config); $dh = opendir($dir); + // See #49 (and above). + if (false === $dh) { + return false; + } while (false !== ($filename = readdir($dh))) { if (empty($filename)) { continue; @@ -131,6 +143,8 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac unlink($dir . '/' . $filename); } } + closedir($dh); + return true; } /** @@ -186,11 +200,9 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac if ($result !== false) { // set permissions of the new file (no execute) $chmod = $config->get('Cache.SerializerPermissions'); - if (!$chmod) { - $chmod = 0644; // invalid config or simpletest + if ($chmod !== null) { + chmod($file, $chmod & 0666); } - $chmod = $chmod & 0666; - chmod($file, $chmod); } return $result; } @@ -204,8 +216,10 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac { $directory = $this->generateDirectoryPath($config); $chmod = $config->get('Cache.SerializerPermissions'); - if (!$chmod) { - $chmod = 0755; // invalid config or simpletest + if ($chmod === null) { + // TODO: This races + if (is_dir($directory)) return true; + return mkdir($directory); } if (!is_dir($directory)) { $base = $this->generateBaseDirectoryPath($config); @@ -219,15 +233,16 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac } elseif (!$this->_testPermissions($base, $chmod)) { return false; } - mkdir($directory, $chmod); - if (!$this->_testPermissions($directory, $chmod)) { + if (!mkdir($directory, $chmod)) { trigger_error( - 'Base directory ' . $base . ' does not exist, - please create or change using %Cache.SerializerPath', + 'Could not create directory ' . $directory . '', E_USER_WARNING ); return false; } + if (!$this->_testPermissions($directory, $chmod)) { + return false; + } } elseif (!$this->_testPermissions($directory, $chmod)) { return false; } @@ -256,7 +271,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac ); return false; } - if (function_exists('posix_getuid')) { + if (function_exists('posix_getuid') && $chmod !== null) { // POSIX system, we can give more specific advice if (fileowner($dir) === posix_getuid()) { // we can chmod it ourselves diff --git a/extlib/HTMLPurifier/HTMLPurifier/Encoder.php b/extlib/HTMLPurifier/HTMLPurifier/Encoder.php index fef9b58906..b94f175423 100644 --- a/extlib/HTMLPurifier/HTMLPurifier/Encoder.php +++ b/extlib/HTMLPurifier/HTMLPurifier/Encoder.php @@ -101,6 +101,14 @@ class HTMLPurifier_Encoder * It will parse according to UTF-8 and return a valid UTF8 string, with * non-SGML codepoints excluded. * + * Specifically, it will permit: + * \x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF} + * Source: https://www.w3.org/TR/REC-xml/#NT-Char + * Arguably this function should be modernized to the HTML5 set + * of allowed characters: + * https://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream + * which simultaneously expand and restrict the set of allowed characters. + * * @param string $str The string to clean * @param bool $force_php * @return string @@ -122,15 +130,12 @@ class HTMLPurifier_Encoder * function that needs to be able to understand UTF-8 characters. * As of right now, only smart lossless character encoding converters * would need that, and I'm probably not going to implement them. - * Once again, PHP 6 should solve all our problems. */ public static function cleanUTF8($str, $force_php = false) { // UTF-8 validity is checked since PHP 4.3.5 // This is an optimization: if the string is already valid UTF-8, no // need to do PHP stuff. 99% of the time, this will be the case. - // The regexp matches the XML char production, as well as well as excluding - // non-SGML codepoints U+007F to U+009F if (preg_match( '/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str @@ -255,6 +260,7 @@ class HTMLPurifier_Encoder // 7F-9F is not strictly prohibited by XML, // but it is non-SGML, and thus we don't allow it (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) || + (0xE000 <= $mUcs4 && 0xFFFD >= $mUcs4) || (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4) ) ) { diff --git a/extlib/HTMLPurifier/HTMLPurifier/EntityParser.php b/extlib/HTMLPurifier/HTMLPurifier/EntityParser.php index 61529dcd9d..c372b5a6a6 100644 --- a/extlib/HTMLPurifier/HTMLPurifier/EntityParser.php +++ b/extlib/HTMLPurifier/HTMLPurifier/EntityParser.php @@ -16,6 +16,138 @@ class HTMLPurifier_EntityParser */ protected $_entity_lookup; + /** + * Callback regex string for entities in text. + * @type string + */ + protected $_textEntitiesRegex; + + /** + * Callback regex string for entities in attributes. + * @type string + */ + protected $_attrEntitiesRegex; + + /** + * Tests if the beginning of a string is a semi-optional regex + */ + protected $_semiOptionalPrefixRegex; + + public function __construct() { + // From + // http://stackoverflow.com/questions/15532252/why-is-reg-being-rendered-as-without-the-bounding-semicolon + $semi_optional = "quot|QUOT|lt|LT|gt|GT|amp|AMP|AElig|Aacute|Acirc|Agrave|Aring|Atilde|Auml|COPY|Ccedil|ETH|Eacute|Ecirc|Egrave|Euml|Iacute|Icirc|Igrave|Iuml|Ntilde|Oacute|Ocirc|Ograve|Oslash|Otilde|Ouml|REG|THORN|Uacute|Ucirc|Ugrave|Uuml|Yacute|aacute|acirc|acute|aelig|agrave|aring|atilde|auml|brvbar|ccedil|cedil|cent|copy|curren|deg|divide|eacute|ecirc|egrave|eth|euml|frac12|frac14|frac34|iacute|icirc|iexcl|igrave|iquest|iuml|laquo|macr|micro|middot|nbsp|not|ntilde|oacute|ocirc|ograve|ordf|ordm|oslash|otilde|ouml|para|plusmn|pound|raquo|reg|sect|shy|sup1|sup2|sup3|szlig|thorn|times|uacute|ucirc|ugrave|uml|uuml|yacute|yen|yuml"; + + // NB: three empty captures to put the fourth match in the right + // place + $this->_semiOptionalPrefixRegex = "/&()()()($semi_optional)/"; + + $this->_textEntitiesRegex = + '/&(?:'. + // hex + '[#]x([a-fA-F0-9]+);?|'. + // dec + '[#]0*(\d+);?|'. + // string (mandatory semicolon) + // NB: order matters: match semicolon preferentially + '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'. + // string (optional semicolon) + "($semi_optional)". + ')/'; + + $this->_attrEntitiesRegex = + '/&(?:'. + // hex + '[#]x([a-fA-F0-9]+);?|'. + // dec + '[#]0*(\d+);?|'. + // string (mandatory semicolon) + // NB: order matters: match semicolon preferentially + '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'. + // string (optional semicolon) + // don't match if trailing is equals or alphanumeric (URL + // like) + "($semi_optional)(?![=;A-Za-z0-9])". + ')/'; + + } + + /** + * Substitute entities with the parsed equivalents. Use this on + * textual data in an HTML document (as opposed to attributes.) + * + * @param string $string String to have entities parsed. + * @return string Parsed string. + */ + public function substituteTextEntities($string) + { + return preg_replace_callback( + $this->_textEntitiesRegex, + array($this, 'entityCallback'), + $string + ); + } + + /** + * Substitute entities with the parsed equivalents. Use this on + * attribute contents in documents. + * + * @param string $string String to have entities parsed. + * @return string Parsed string. + */ + public function substituteAttrEntities($string) + { + return preg_replace_callback( + $this->_attrEntitiesRegex, + array($this, 'entityCallback'), + $string + ); + } + + /** + * Callback function for substituteNonSpecialEntities() that does the work. + * + * @param array $matches PCRE matches array, with 0 the entire match, and + * either index 1, 2 or 3 set with a hex value, dec value, + * or string (respectively). + * @return string Replacement string. + */ + + protected function entityCallback($matches) + { + $entity = $matches[0]; + $hex_part = @$matches[1]; + $dec_part = @$matches[2]; + $named_part = empty($matches[3]) ? @$matches[4] : $matches[3]; + if ($hex_part !== NULL && $hex_part !== "") { + return HTMLPurifier_Encoder::unichr(hexdec($hex_part)); + } elseif ($dec_part !== NULL && $dec_part !== "") { + return HTMLPurifier_Encoder::unichr((int) $dec_part); + } else { + if (!$this->_entity_lookup) { + $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); + } + if (isset($this->_entity_lookup->table[$named_part])) { + return $this->_entity_lookup->table[$named_part]; + } else { + // exact match didn't match anything, so test if + // any of the semicolon optional match the prefix. + // Test that this is an EXACT match is important to + // prevent infinite loop + if (!empty($matches[3])) { + return preg_replace_callback( + $this->_semiOptionalPrefixRegex, + array($this, 'entityCallback'), + $entity + ); + } + return $entity; + } + } + } + + // LEGACY CODE BELOW + /** * Callback regex string for parsing entities. * @type string @@ -144,7 +276,7 @@ class HTMLPurifier_EntityParser $entity; } else { return isset($this->_special_ent2dec[$matches[3]]) ? - $this->_special_ent2dec[$matches[3]] : + $this->_special_dec2str[$this->_special_ent2dec[$matches[3]]] : $entity; } } diff --git a/extlib/HTMLPurifier/HTMLPurifier/Filter/ExtractStyleBlocks.php b/extlib/HTMLPurifier/HTMLPurifier/Filter/ExtractStyleBlocks.php index 08e62c16bf..66f70b0fc0 100644 --- a/extlib/HTMLPurifier/HTMLPurifier/Filter/ExtractStyleBlocks.php +++ b/extlib/HTMLPurifier/HTMLPurifier/Filter/ExtractStyleBlocks.php @@ -95,7 +95,10 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter if ($tidy !== null) { $this->_tidy = $tidy; } - $html = preg_replace_callback('##isU', array($this, 'styleCallback'), $html); + // NB: this must be NON-greedy because if we have + // + // we must not grab foo