Sanitize html returned by oEmbed providers to protect laconica from XSS attacks
This commit is contained in:
parent
b9cf19a2ee
commit
ac75772150
715
extlib/htmLawed/htmLawed.php
Normal file
715
extlib/htmLawed/htmLawed.php
Normal file
@ -0,0 +1,715 @@
|
||||
<?php
|
||||
|
||||
/*
|
||||
htmLawed 1.1.8.1, 16 July 2009
|
||||
Copyright Santosh Patnaik
|
||||
GPL v3 license
|
||||
A PHP Labware internal utility; www.bioinformatics.org/phplabware/internal_utilities/htmLawed
|
||||
|
||||
See htmLawed_README.txt/htm
|
||||
*/
|
||||
|
||||
function htmLawed($t, $C=1, $S=array()){
|
||||
$C = is_array($C) ? $C : array();
|
||||
if(!empty($C['valid_xhtml'])){
|
||||
$C['elements'] = empty($C['elements']) ? '*-center-dir-font-isindex-menu-s-strike-u' : $C['elements'];
|
||||
$C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 2;
|
||||
$C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 2;
|
||||
}
|
||||
// config eles
|
||||
$e = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'applet'=>1, 'area'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'blockquote'=>1, 'br'=>1, 'button'=>1, 'caption'=>1, 'center'=>1, 'cite'=>1, 'code'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'del'=>1, 'dfn'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'dt'=>1, 'em'=>1, 'embed'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'isindex'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'object'=>1, 'ol'=>1, 'optgroup'=>1, 'option'=>1, 'p'=>1, 'param'=>1, 'pre'=>1, 'q'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'table'=>1, 'tbody'=>1, 'td'=>1, 'textarea'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1, 'tt'=>1, 'u'=>1, 'ul'=>1, 'var'=>1); // 86/deprecated+embed+ruby
|
||||
if(!empty($C['safe'])){
|
||||
unset($e['applet'], $e['embed'], $e['iframe'], $e['object'], $e['script']);
|
||||
}
|
||||
$x = !empty($C['elements']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['elements']) : '*';
|
||||
if($x == '-*'){$e = array();}
|
||||
elseif(strpos($x, '*') === false){$e = array_flip(explode(',', $x));}
|
||||
else{
|
||||
if(isset($x[1])){
|
||||
preg_match_all('`(?:^|-|\+)[^\-+]+?(?=-|\+|$)`', $x, $m, PREG_SET_ORDER);
|
||||
for($i=count($m); --$i>=0;){$m[$i] = $m[$i][0];}
|
||||
foreach($m as $v){
|
||||
if($v[0] == '+'){$e[substr($v, 1)] = 1;}
|
||||
if($v[0] == '-' && isset($e[($v = substr($v, 1))]) && !in_array('+'. $v, $m)){unset($e[$v]);}
|
||||
}
|
||||
}
|
||||
}
|
||||
$C['elements'] =& $e;
|
||||
// config attrs
|
||||
$x = !empty($C['deny_attribute']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['deny_attribute']) : '';
|
||||
$x = array_flip((isset($x[0]) && $x[0] == '*') ? explode('-', $x) : explode(',', $x. (!empty($C['safe']) ? ',on*' : '')));
|
||||
if(isset($x['on*'])){
|
||||
unset($x['on*']);
|
||||
$x += array('onblur'=>1, 'onchange'=>1, 'onclick'=>1, 'ondblclick'=>1, 'onfocus'=>1, 'onkeydown'=>1, 'onkeypress'=>1, 'onkeyup'=>1, 'onmousedown'=>1, 'onmousemove'=>1, 'onmouseout'=>1, 'onmouseover'=>1, 'onmouseup'=>1, 'onreset'=>1, 'onselect'=>1, 'onsubmit'=>1);
|
||||
}
|
||||
$C['deny_attribute'] = $x;
|
||||
// config URL
|
||||
$x = (isset($C['schemes'][2]) && strpos($C['schemes'], ':')) ? strtolower($C['schemes']) : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https';
|
||||
$C['schemes'] = array();
|
||||
foreach(explode(';', str_replace(array(' ', "\t", "\r", "\n"), '', $x)) as $v){
|
||||
$x = $x2 = null; list($x, $x2) = explode(':', $v, 2);
|
||||
if($x2){$C['schemes'][$x] = array_flip(explode(',', $x2));}
|
||||
}
|
||||
if(!isset($C['schemes']['*'])){$C['schemes']['*'] = array('file'=>1, 'http'=>1, 'https'=>1,);}
|
||||
if(!empty($C['safe']) && empty($C['schemes']['style'])){$C['schemes']['style'] = array('nil'=>1);}
|
||||
$C['abs_url'] = isset($C['abs_url']) ? $C['abs_url'] : 0;
|
||||
if(!isset($C['base_url']) or !preg_match('`^[a-zA-Z\d.+\-]+://[^/]+/(.+?/)?$`', $C['base_url'])){
|
||||
$C['base_url'] = $C['abs_url'] = 0;
|
||||
}
|
||||
// config rest
|
||||
$C['and_mark'] = empty($C['and_mark']) ? 0 : 1;
|
||||
$C['anti_link_spam'] = (isset($C['anti_link_spam']) && is_array($C['anti_link_spam']) && count($C['anti_link_spam']) == 2 && (empty($C['anti_link_spam'][0]) or hl_regex($C['anti_link_spam'][0])) && (empty($C['anti_link_spam'][1]) or hl_regex($C['anti_link_spam'][1]))) ? $C['anti_link_spam'] : 0;
|
||||
$C['anti_mail_spam'] = isset($C['anti_mail_spam']) ? $C['anti_mail_spam'] : 0;
|
||||
$C['balance'] = isset($C['balance']) ? (bool)$C['balance'] : 1;
|
||||
$C['cdata'] = isset($C['cdata']) ? $C['cdata'] : (empty($C['safe']) ? 3 : 0);
|
||||
$C['clean_ms_char'] = empty($C['clean_ms_char']) ? 0 : $C['clean_ms_char'];
|
||||
$C['comment'] = isset($C['comment']) ? $C['comment'] : (empty($C['safe']) ? 3 : 0);
|
||||
$C['css_expression'] = empty($C['css_expression']) ? 0 : 1;
|
||||
$C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1;
|
||||
$C['hook'] = (!empty($C['hook']) && function_exists($C['hook'])) ? $C['hook'] : 0;
|
||||
$C['hook_tag'] = (!empty($C['hook_tag']) && function_exists($C['hook_tag'])) ? $C['hook_tag'] : 0;
|
||||
$C['keep_bad'] = isset($C['keep_bad']) ? $C['keep_bad'] : 6;
|
||||
$C['lc_std_val'] = isset($C['lc_std_val']) ? (bool)$C['lc_std_val'] : 1;
|
||||
$C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 1;
|
||||
$C['named_entity'] = isset($C['named_entity']) ? (bool)$C['named_entity'] : 1;
|
||||
$C['no_deprecated_attr'] = isset($C['no_deprecated_attr']) ? $C['no_deprecated_attr'] : 1;
|
||||
$C['parent'] = isset($C['parent'][0]) ? strtolower($C['parent']) : 'body';
|
||||
$C['show_setting'] = !empty($C['show_setting']) ? $C['show_setting'] : 0;
|
||||
$C['style_pass'] = empty($C['style_pass']) ? 0 : 1;
|
||||
$C['tidy'] = empty($C['tidy']) ? 0 : $C['tidy'];
|
||||
$C['unique_ids'] = isset($C['unique_ids']) ? $C['unique_ids'] : 1;
|
||||
$C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 0;
|
||||
|
||||
if(isset($GLOBALS['C'])){$reC = $GLOBALS['C'];}
|
||||
$GLOBALS['C'] = $C;
|
||||
$S = is_array($S) ? $S : hl_spec($S);
|
||||
if(isset($GLOBALS['S'])){$reS = $GLOBALS['S'];}
|
||||
$GLOBALS['S'] = $S;
|
||||
|
||||
$t = preg_replace('`[\x00-\x08\x0b-\x0c\x0e-\x1f]`', '', $t);
|
||||
if($C['clean_ms_char']){
|
||||
$x = array("\x7f"=>'', "\x80"=>'€', "\x81"=>'', "\x83"=>'ƒ', "\x85"=>'…', "\x86"=>'†', "\x87"=>'‡', "\x88"=>'ˆ', "\x89"=>'‰', "\x8a"=>'Š', "\x8b"=>'‹', "\x8c"=>'Œ', "\x8d"=>'', "\x8e"=>'Ž', "\x8f"=>'', "\x90"=>'', "\x95"=>'•', "\x96"=>'–', "\x97"=>'—', "\x98"=>'˜', "\x99"=>'™', "\x9a"=>'š', "\x9b"=>'›', "\x9c"=>'œ', "\x9d"=>'', "\x9e"=>'ž', "\x9f"=>'Ÿ');
|
||||
$x = $x + ($C['clean_ms_char'] == 1 ? array("\x82"=>'‚', "\x84"=>'„', "\x91"=>'‘', "\x92"=>'’', "\x93"=>'“', "\x94"=>'”') : array("\x82"=>'\'', "\x84"=>'"', "\x91"=>'\'', "\x92"=>'\'', "\x93"=>'"', "\x94"=>'"'));
|
||||
$t = strtr($t, $x);
|
||||
}
|
||||
if($C['cdata'] or $C['comment']){$t = preg_replace_callback('`<!(?:(?:--.*?--)|(?:\[CDATA\[.*?\]\]))>`sm', 'hl_cmtcd', $t);}
|
||||
$t = preg_replace_callback('`&([A-Za-z][A-Za-z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`', 'hl_ent', str_replace('&', '&', $t));
|
||||
if($C['unique_ids'] && !isset($GLOBALS['hl_Ids'])){$GLOBALS['hl_Ids'] = array();}
|
||||
if($C['hook']){$t = $C['hook']($t, $C, $S);}
|
||||
if($C['show_setting'] && preg_match('`^[a-z][a-z0-9_]*$`i', $C['show_setting'])){
|
||||
$GLOBALS[$C['show_setting']] = array('config'=>$C, 'spec'=>$S, 'time'=>microtime());
|
||||
}
|
||||
// main
|
||||
$t = preg_replace_callback('`<(?:(?:\s|$)|(?:[^>]*(?:>|$)))|>`m', 'hl_tag', $t);
|
||||
$t = $C['balance'] ? hl_bal($t, $C['keep_bad'], $C['parent']) : $t;
|
||||
$t = (($C['cdata'] or $C['comment']) && strpos($t, "\x01") !== false) ? str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05"), array('', '', '&', '<', '>'), $t) : $t;
|
||||
$t = $C['tidy'] ? hl_tidy($t, $C['tidy'], $C['parent']) : $t;
|
||||
unset($C, $e);
|
||||
if(isset($reC)){$GLOBALS['C'] = $reC;}
|
||||
if(isset($reS)){$GLOBALS['S'] = $reS;}
|
||||
return $t;
|
||||
// eof
|
||||
}
|
||||
|
||||
function hl_attrval($t, $p){
|
||||
// check attr val against $S
|
||||
$o = 1; $l = strlen($t);
|
||||
foreach($p as $k=>$v){
|
||||
switch($k){
|
||||
case 'maxlen':if($l > $v){$o = 0;}
|
||||
break; case 'minlen': if($l < $v){$o = 0;}
|
||||
break; case 'maxval': if((float)($t) > $v){$o = 0;}
|
||||
break; case 'minval': if((float)($t) < $v){$o = 0;}
|
||||
break; case 'match': if(!preg_match($v, $t)){$o = 0;}
|
||||
break; case 'nomatch': if(preg_match($v, $t)){$o = 0;}
|
||||
break; case 'oneof':
|
||||
$m = 0;
|
||||
foreach(explode('|', $v) as $n){if($t == $n){$m = 1; break;}}
|
||||
$o = $m;
|
||||
break; case 'noneof':
|
||||
$m = 1;
|
||||
foreach(explode('|', $v) as $n){if($t == $n){$m = 0; break;}}
|
||||
$o = $m;
|
||||
break; default:
|
||||
break;
|
||||
}
|
||||
if(!$o){break;}
|
||||
}
|
||||
return ($o ? $t : (isset($p['default']) ? $p['default'] : 0));
|
||||
// eof
|
||||
}
|
||||
|
||||
function hl_bal($t, $do=1, $in='div'){
|
||||
// balance tags
|
||||
// by content
|
||||
$cB = array('blockquote'=>1, 'form'=>1, 'map'=>1, 'noscript'=>1); // Block
|
||||
$cE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty
|
||||
$cF = array('button'=>1, 'del'=>1, 'div'=>1, 'dd'=>1, 'fieldset'=>1, 'iframe'=>1, 'ins'=>1, 'li'=>1, 'noscript'=>1, 'object'=>1, 'td'=>1, 'th'=>1); // Flow; later context-wise dynamic move of ins & del to $cI
|
||||
$cI = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'caption'=>1, 'cite'=>1, 'code'=>1, 'dfn'=>1, 'dt'=>1, 'em'=>1, 'font'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'i'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'p'=>1, 'pre'=>1, 'q'=>1, 'rb'=>1, 'rt'=>1, 's'=>1, 'samp'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'tt'=>1, 'u'=>1, 'var'=>1); // Inline
|
||||
$cN = array('a'=>array('a'=>1), 'button'=>array('a'=>1, 'button'=>1, 'fieldset'=>1, 'form'=>1, 'iframe'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'fieldset'=>array('fieldset'=>1), 'form'=>array('form'=>1), 'label'=>array('label'=>1), 'noscript'=>array('script'=>1), 'pre'=>array('big'=>1, 'font'=>1, 'img'=>1, 'object'=>1, 'script'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1), 'rb'=>array('ruby'=>1), 'rt'=>array('ruby'=>1)); // Illegal
|
||||
$cN2 = array_keys($cN);
|
||||
$cR = array('blockquote'=>1, 'dir'=>1, 'dl'=>1, 'form'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1);
|
||||
$cS = array('colgroup'=>array('col'=>1), 'dir'=>array('li'), 'dl'=>array('dd'=>1, 'dt'=>1), 'menu'=>array('li'=>1), 'ol'=>array('li'=>1), 'optgroup'=>array('option'=>1), 'option'=>array('#pcdata'=>1), 'rbc'=>array('rb'=>1), 'rp'=>array('#pcdata'=>1), 'rtc'=>array('rt'=>1), 'ruby'=>array('rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1), 'select'=>array('optgroup'=>1, 'option'=>1), 'script'=>array('#pcdata'=>1), 'table'=>array('caption'=>1, 'col'=>1, 'colgroup'=>1, 'tfoot'=>1, 'tbody'=>1, 'tr'=>1, 'thead'=>1), 'tbody'=>array('tr'=>1), 'tfoot'=>array('tr'=>1), 'textarea'=>array('#pcdata'=>1), 'thead'=>array('tr'=>1), 'tr'=>array('td'=>1, 'th'=>1), 'ul'=>array('li'=>1)); // Specific - immediate parent-child
|
||||
$cO = array('address'=>array('p'=>1), 'applet'=>array('param'=>1), 'blockquote'=>array('script'=>1), 'fieldset'=>array('legend'=>1, '#pcdata'=>1), 'form'=>array('script'=>1), 'map'=>array('area'=>1), 'object'=>array('param'=>1, 'embed'=>1)); // Other
|
||||
$cT = array('colgroup'=>1, 'dd'=>1, 'dt'=>1, 'li'=>1, 'option'=>1, 'p'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1); // Omitable closing
|
||||
// block/inline type; ins & del both type; #pcdata: text
|
||||
$eB = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'del'=>1, 'dir'=>1, 'dl'=>1, 'div'=>1, 'fieldset'=>1, 'form'=>1, 'ins'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'isindex'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'table'=>1, 'ul'=>1);
|
||||
$eI = array('#pcdata'=>1, 'a'=>1, 'abbr'=>1, 'acronym'=>1, 'applet'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'br'=>1, 'button'=>1, 'cite'=>1, 'code'=>1, 'del'=>1, 'dfn'=>1, 'em'=>1, 'embed'=>1, 'font'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'kbd'=>1, 'label'=>1, 'map'=>1, 'object'=>1, 'param'=>1, 'q'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'select'=>1, 'script'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1, 'tt'=>1, 'u'=>1, 'var'=>1);
|
||||
$eN = array('a'=>1, 'big'=>1, 'button'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'label'=>1, 'object'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1); // Exclude from specific ele; $cN values
|
||||
$eO = array('area'=>1, 'caption'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'dt'=>1, 'legend'=>1, 'li'=>1, 'optgroup'=>1, 'option'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'script'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'thead'=>1, 'th'=>1, 'tr'=>1); // Missing in $eB & $eI
|
||||
$eF = $eB + $eI;
|
||||
|
||||
// $in sets allowed child
|
||||
$in = ((isset($eF[$in]) && $in != '#pcdata') or isset($eO[$in])) ? $in : 'div';
|
||||
if(isset($cE[$in])){
|
||||
return (!$do ? '' : str_replace(array('<', '>'), array('<', '>'), $t));
|
||||
}
|
||||
if(isset($cS[$in])){$inOk = $cS[$in];}
|
||||
elseif(isset($cI[$in])){$inOk = $eI; $cI['del'] = 1; $cI['ins'] = 1;}
|
||||
elseif(isset($cF[$in])){$inOk = $eF; unset($cI['del'], $cI['ins']);}
|
||||
elseif(isset($cB[$in])){$inOk = $eB; unset($cI['del'], $cI['ins']);}
|
||||
if(isset($cO[$in])){$inOk = $inOk + $cO[$in];}
|
||||
if(isset($cN[$in])){$inOk = array_diff_assoc($inOk, $cN[$in]);}
|
||||
|
||||
$t = explode('<', $t);
|
||||
$ok = $q = array(); // $q seq list of open non-empty ele
|
||||
ob_start();
|
||||
|
||||
for($i=-1, $ci=count($t); ++$i<$ci;){
|
||||
// allowed $ok in parent $p
|
||||
if($ql = count($q)){
|
||||
$p = array_pop($q);
|
||||
$q[] = $p;
|
||||
if(isset($cS[$p])){$ok = $cS[$p];}
|
||||
elseif(isset($cI[$p])){$ok = $eI; $cI['del'] = 1; $cI['ins'] = 1;}
|
||||
elseif(isset($cF[$p])){$ok = $eF; unset($cI['del'], $cI['ins']);}
|
||||
elseif(isset($cB[$p])){$ok = $eB; unset($cI['del'], $cI['ins']);}
|
||||
if(isset($cO[$p])){$ok = $ok + $cO[$p];}
|
||||
if(isset($cN[$p])){$ok = array_diff_assoc($ok, $cN[$p]);}
|
||||
}else{$ok = $inOk; unset($cI['del'], $cI['ins']);}
|
||||
// bad tags, & ele content
|
||||
if(isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))){
|
||||
echo '<', $s, $e, $a, '>';
|
||||
}
|
||||
if(isset($x[0])){
|
||||
if($do < 3 or isset($ok['#pcdata'])){echo $x;}
|
||||
elseif(strpos($x, "\x02\x04")){
|
||||
foreach(preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v){
|
||||
echo (substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : ''));
|
||||
}
|
||||
}elseif($do > 4){echo preg_replace('`\S`', '', $x);}
|
||||
}
|
||||
// get markup
|
||||
if(!preg_match('`^(/?)([a-zA-Z1-6]+)([^>]*)>(.*)`sm', $t[$i], $r)){$x = $t[$i]; continue;}
|
||||
$s = null; $e = null; $a = null; $x = null; list($all, $s, $e, $a, $x) = $r;
|
||||
// close tag
|
||||
if($s){
|
||||
if(isset($cE[$e]) or !in_array($e, $q)){continue;} // Empty/unopen
|
||||
if($p == $e){array_pop($q); echo '</', $e, '>'; unset($e); continue;} // Last open
|
||||
$add = ''; // Nesting - close open tags that need to be
|
||||
for($j=-1, $cj=count($q); ++$j<$cj;){
|
||||
if(($d = array_pop($q)) == $e){break;}
|
||||
else{$add .= "</{$d}>";}
|
||||
}
|
||||
echo $add, '</', $e, '>'; unset($e); continue;
|
||||
}
|
||||
// open tag
|
||||
// $cB ele needs $eB ele as child
|
||||
if(isset($cB[$e]) && strlen(trim($x))){
|
||||
$t[$i] = "{$e}{$a}>";
|
||||
array_splice($t, $i+1, 0, 'div>'. $x); unset($e, $x); ++$ci; --$i; continue;
|
||||
}
|
||||
if((($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql)) && !isset($eB[$e]) && !isset($ok[$e])){
|
||||
array_splice($t, $i, 0, 'div>'); unset($e, $x); ++$ci; --$i; continue;
|
||||
}
|
||||
// if no open ele, $in = parent; mostly immediate parent-child relation should hold
|
||||
if(!$ql or !isset($eN[$e]) or !array_intersect($q, $cN2)){
|
||||
if(!isset($ok[$e])){
|
||||
if($ql && isset($cT[$p])){echo '</', array_pop($q), '>'; unset($e, $x); --$i;}
|
||||
continue;
|
||||
}
|
||||
if(!isset($cE[$e])){$q[] = $e;}
|
||||
echo '<', $e, $a, '>'; unset($e); continue;
|
||||
}
|
||||
// specific parent-child
|
||||
if(isset($cS[$p][$e])){
|
||||
if(!isset($cE[$e])){$q[] = $e;}
|
||||
echo '<', $e, $a, '>'; unset($e); continue;
|
||||
}
|
||||
// nesting
|
||||
$add = '';
|
||||
$q2 = array();
|
||||
for($k=-1, $kc=count($q); ++$k<$kc;){
|
||||
$d = $q[$k];
|
||||
$ok2 = array();
|
||||
if(isset($cS[$d])){$q2[] = $d; continue;}
|
||||
$ok2 = isset($cI[$d]) ? $eI : $eF;
|
||||
if(isset($cO[$d])){$ok2 = $ok2 + $cO[$d];}
|
||||
if(isset($cN[$d])){$ok2 = array_diff_assoc($ok2, $cN[$d]);}
|
||||
if(!isset($ok2[$e])){
|
||||
if(!$k && !isset($inOk[$e])){continue 2;}
|
||||
$add = "</{$d}>";
|
||||
for(;++$k<$kc;){$add = "</{$q[$k]}>{$add}";}
|
||||
break;
|
||||
}
|
||||
else{$q2[] = $d;}
|
||||
}
|
||||
$q = $q2;
|
||||
if(!isset($cE[$e])){$q[] = $e;}
|
||||
echo $add, '<', $e, $a, '>'; unset($e); continue;
|
||||
}
|
||||
|
||||
// end
|
||||
if($ql = count($q)){
|
||||
$p = array_pop($q);
|
||||
$q[] = $p;
|
||||
if(isset($cS[$p])){$ok = $cS[$p];}
|
||||
elseif(isset($cI[$p])){$ok = $eI; $cI['del'] = 1; $cI['ins'] = 1;}
|
||||
elseif(isset($cF[$p])){$ok = $eF; unset($cI['del'], $cI['ins']);}
|
||||
elseif(isset($cB[$p])){$ok = $eB; unset($cI['del'], $cI['ins']);}
|
||||
if(isset($cO[$p])){$ok = $ok + $cO[$p];}
|
||||
if(isset($cN[$p])){$ok = array_diff_assoc($ok, $cN[$p]);}
|
||||
}else{$ok = $inOk; unset($cI['del'], $cI['ins']);}
|
||||
if(isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))){
|
||||
echo '<', $s, $e, $a, '>';
|
||||
}
|
||||
if(isset($x[0])){
|
||||
if(strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))){
|
||||
echo '<div>', $x, '</div>';
|
||||
}
|
||||
elseif($do < 3 or isset($ok['#pcdata'])){echo $x;}
|
||||
elseif(strpos($x, "\x02\x04")){
|
||||
foreach(preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v){
|
||||
echo (substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : ''));
|
||||
}
|
||||
}elseif($do > 4){echo preg_replace('`\S`', '', $x);}
|
||||
}
|
||||
while(!empty($q) && ($e = array_pop($q))){echo '</', $e, '>';}
|
||||
$o = ob_get_contents();
|
||||
ob_end_clean();
|
||||
return $o;
|
||||
// eof
|
||||
}
|
||||
|
||||
function hl_cmtcd($t){
|
||||
// comment/CDATA sec handler
|
||||
$t = $t[0];
|
||||
global $C;
|
||||
if($t[3] == '-'){
|
||||
if(!$C['comment']){return $t;}
|
||||
if($C['comment'] == 1){return '';}
|
||||
if(substr(($t = preg_replace('`--+`', '-', substr($t, 4, -3))), -1) != ' '){$t .= ' ';}
|
||||
$t = $C['comment'] == 2 ? str_replace(array('&', '<', '>'), array('&', '<', '>'), $t) : $t;
|
||||
$t = "\x01\x02\x04!--$t--\x05\x02\x01";
|
||||
}else{ // CDATA
|
||||
if(!$C['cdata']){return $t;}
|
||||
if($C['cdata'] == 1){return '';}
|
||||
$t = substr($t, 1, -1);
|
||||
$t = $C['cdata'] == 2 ? str_replace(array('&', '<', '>'), array('&', '<', '>'), $t) : $t;
|
||||
$t = "\x01\x01\x04$t\x05\x01\x01";
|
||||
}
|
||||
return str_replace(array('&', '<', '>'), array("\x03", "\x04", "\x05"), $t);
|
||||
// eof
|
||||
}
|
||||
|
||||
function hl_ent($t){
|
||||
// entitity handler
|
||||
global $C;
|
||||
$t = $t[1];
|
||||
static $U = array('quot'=>1,'amp'=>1,'lt'=>1,'gt'=>1);
|
||||
static $N = array('fnof'=>'402', 'Alpha'=>'913', 'Beta'=>'914', 'Gamma'=>'915', 'Delta'=>'916', 'Epsilon'=>'917', 'Zeta'=>'918', 'Eta'=>'919', 'Theta'=>'920', 'Iota'=>'921', 'Kappa'=>'922', 'Lambda'=>'923', 'Mu'=>'924', 'Nu'=>'925', 'Xi'=>'926', 'Omicron'=>'927', 'Pi'=>'928', 'Rho'=>'929', 'Sigma'=>'931', 'Tau'=>'932', 'Upsilon'=>'933', 'Phi'=>'934', 'Chi'=>'935', 'Psi'=>'936', 'Omega'=>'937', 'alpha'=>'945', 'beta'=>'946', 'gamma'=>'947', 'delta'=>'948', 'epsilon'=>'949', 'zeta'=>'950', 'eta'=>'951', 'theta'=>'952', 'iota'=>'953', 'kappa'=>'954', 'lambda'=>'955', 'mu'=>'956', 'nu'=>'957', 'xi'=>'958', 'omicron'=>'959', 'pi'=>'960', 'rho'=>'961', 'sigmaf'=>'962', 'sigma'=>'963', 'tau'=>'964', 'upsilon'=>'965', 'phi'=>'966', 'chi'=>'967', 'psi'=>'968', 'omega'=>'969', 'thetasym'=>'977', 'upsih'=>'978', 'piv'=>'982', 'bull'=>'8226', 'hellip'=>'8230', 'prime'=>'8242', 'Prime'=>'8243', 'oline'=>'8254', 'frasl'=>'8260', 'weierp'=>'8472', 'image'=>'8465', 'real'=>'8476', 'trade'=>'8482', 'alefsym'=>'8501', 'larr'=>'8592', 'uarr'=>'8593', 'rarr'=>'8594', 'darr'=>'8595', 'harr'=>'8596', 'crarr'=>'8629', 'lArr'=>'8656', 'uArr'=>'8657', 'rArr'=>'8658', 'dArr'=>'8659', 'hArr'=>'8660', 'forall'=>'8704', 'part'=>'8706', 'exist'=>'8707', 'empty'=>'8709', 'nabla'=>'8711', 'isin'=>'8712', 'notin'=>'8713', 'ni'=>'8715', 'prod'=>'8719', 'sum'=>'8721', 'minus'=>'8722', 'lowast'=>'8727', 'radic'=>'8730', 'prop'=>'8733', 'infin'=>'8734', 'ang'=>'8736', 'and'=>'8743', 'or'=>'8744', 'cap'=>'8745', 'cup'=>'8746', 'int'=>'8747', 'there4'=>'8756', 'sim'=>'8764', 'cong'=>'8773', 'asymp'=>'8776', 'ne'=>'8800', 'equiv'=>'8801', 'le'=>'8804', 'ge'=>'8805', 'sub'=>'8834', 'sup'=>'8835', 'nsub'=>'8836', 'sube'=>'8838', 'supe'=>'8839', 'oplus'=>'8853', 'otimes'=>'8855', 'perp'=>'8869', 'sdot'=>'8901', 'lceil'=>'8968', 'rceil'=>'8969', 'lfloor'=>'8970', 'rfloor'=>'8971', 'lang'=>'9001', 'rang'=>'9002', 'loz'=>'9674', 'spades'=>'9824', 'clubs'=>'9827', 'hearts'=>'9829', 'diams'=>'9830', 'apos'=>'39', 'OElig'=>'338', 'oelig'=>'339', 'Scaron'=>'352', 'scaron'=>'353', 'Yuml'=>'376', 'circ'=>'710', 'tilde'=>'732', 'ensp'=>'8194', 'emsp'=>'8195', 'thinsp'=>'8201', 'zwnj'=>'8204', 'zwj'=>'8205', 'lrm'=>'8206', 'rlm'=>'8207', 'ndash'=>'8211', 'mdash'=>'8212', 'lsquo'=>'8216', 'rsquo'=>'8217', 'sbquo'=>'8218', 'ldquo'=>'8220', 'rdquo'=>'8221', 'bdquo'=>'8222', 'dagger'=>'8224', 'Dagger'=>'8225', 'permil'=>'8240', 'lsaquo'=>'8249', 'rsaquo'=>'8250', 'euro'=>'8364', 'nbsp'=>'160', 'iexcl'=>'161', 'cent'=>'162', 'pound'=>'163', 'curren'=>'164', 'yen'=>'165', 'brvbar'=>'166', 'sect'=>'167', 'uml'=>'168', 'copy'=>'169', 'ordf'=>'170', 'laquo'=>'171', 'not'=>'172', 'shy'=>'173', 'reg'=>'174', 'macr'=>'175', 'deg'=>'176', 'plusmn'=>'177', 'sup2'=>'178', 'sup3'=>'179', 'acute'=>'180', 'micro'=>'181', 'para'=>'182', 'middot'=>'183', 'cedil'=>'184', 'sup1'=>'185', 'ordm'=>'186', 'raquo'=>'187', 'frac14'=>'188', 'frac12'=>'189', 'frac34'=>'190', 'iquest'=>'191', 'Agrave'=>'192', 'Aacute'=>'193', 'Acirc'=>'194', 'Atilde'=>'195', 'Auml'=>'196', 'Aring'=>'197', 'AElig'=>'198', 'Ccedil'=>'199', 'Egrave'=>'200', 'Eacute'=>'201', 'Ecirc'=>'202', 'Euml'=>'203', 'Igrave'=>'204', 'Iacute'=>'205', 'Icirc'=>'206', 'Iuml'=>'207', 'ETH'=>'208', 'Ntilde'=>'209', 'Ograve'=>'210', 'Oacute'=>'211', 'Ocirc'=>'212', 'Otilde'=>'213', 'Ouml'=>'214', 'times'=>'215', 'Oslash'=>'216', 'Ugrave'=>'217', 'Uacute'=>'218', 'Ucirc'=>'219', 'Uuml'=>'220', 'Yacute'=>'221', 'THORN'=>'222', 'szlig'=>'223', 'agrave'=>'224', 'aacute'=>'225', 'acirc'=>'226', 'atilde'=>'227', 'auml'=>'228', 'aring'=>'229', 'aelig'=>'230', 'ccedil'=>'231', 'egrave'=>'232', 'eacute'=>'233', 'ecirc'=>'234', 'euml'=>'235', 'igrave'=>'236', 'iacute'=>'237', 'icirc'=>'238', 'iuml'=>'239', 'eth'=>'240', 'ntilde'=>'241', 'ograve'=>'242', 'oacute'=>'243', 'ocirc'=>'244', 'otilde'=>'245', 'ouml'=>'246', 'divide'=>'247', 'oslash'=>'248', 'ugrave'=>'249', 'uacute'=>'250', 'ucirc'=>'251', 'uuml'=>'252', 'yacute'=>'253', 'thorn'=>'254', 'yuml'=>'255');
|
||||
if($t[0] != '#'){
|
||||
return ($C['and_mark'] ? "\x06" : '&'). (isset($U[$t]) ? $t : (isset($N[$t]) ? (!$C['named_entity'] ? '#'. ($C['hexdec_entity'] > 1 ? 'x'. dechex($N[$t]) : $N[$t]) : $t) : 'amp;'. $t)). ';';
|
||||
}
|
||||
if(($n = ctype_digit($t = substr($t, 1)) ? intval($t) : hexdec(substr($t, 1))) < 9 or ($n > 13 && $n < 32) or $n == 11 or $n == 12 or ($n > 126 && $n < 160 && $n != 133) or ($n > 55295 && ($n < 57344 or ($n > 64975 && $n < 64992) or $n == 65534 or $n == 65535 or $n > 1114111))){
|
||||
return ($C['and_mark'] ? "\x06" : '&'). "amp;#{$t};";
|
||||
}
|
||||
return ($C['and_mark'] ? "\x06" : '&'). '#'. (((ctype_digit($t) && $C['hexdec_entity'] < 2) or !$C['hexdec_entity']) ? $n : 'x'. dechex($n)). ';';
|
||||
// eof
|
||||
}
|
||||
|
||||
function hl_prot($p, $c=null){
|
||||
// check URL scheme
|
||||
global $C;
|
||||
$b = $a = '';
|
||||
if($c == null){$c = 'style'; $b = $p[1]; $a = $p[3]; $p = trim($p[2]);}
|
||||
$c = isset($C['schemes'][$c]) ? $C['schemes'][$c] : $C['schemes']['*'];
|
||||
if(isset($c['*']) or !strcspn($p, '#?;')){return "{$b}{$p}{$a}";} // All ok, frag, query, param
|
||||
if(preg_match('`^([a-z\d\-+.&#; ]+?)(:|&#(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])){ // Denied prot
|
||||
return "{$b}denied:{$p}{$a}";
|
||||
}
|
||||
if($C['abs_url']){
|
||||
if($C['abs_url'] == -1 && strpos($p, $C['base_url']) === 0){ // Make url rel
|
||||
$p = substr($p, strlen($C['base_url']));
|
||||
}elseif(empty($m[1])){ // Make URL abs
|
||||
if(substr($p, 0, 2) == '//'){$p = substr($C['base_url'], 0, strpos($C['base_url'], ':')+1). $p;}
|
||||
elseif($p[0] == '/'){$p = preg_replace('`(^.+?://[^/]+)(.*)`', '$1', $C['base_url']). $p;}
|
||||
elseif(strcspn($p, './')){$p = $C['base_url']. $p;}
|
||||
else{
|
||||
preg_match('`^([a-zA-Z\d\-+.]+://[^/]+)(.*)`', $C['base_url'], $m);
|
||||
$p = preg_replace('`(?<=/)\./`', '', $m[2]. $p);
|
||||
while(preg_match('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', $p)){
|
||||
$p = preg_replace('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', '', $p);
|
||||
}
|
||||
$p = $m[1]. $p;
|
||||
}
|
||||
}
|
||||
}
|
||||
return "{$b}{$p}{$a}";
|
||||
// eof
|
||||
}
|
||||
|
||||
function hl_regex($p){
|
||||
// ?regex
|
||||
if(empty($p)){return 0;}
|
||||
if($t = ini_get('track_errors')){$o = isset($php_errormsg) ? $php_errormsg : null;}
|
||||
else{ini_set('track_errors', 1);}
|
||||
unset($php_errormsg);
|
||||
if(($d = ini_get('display_errors'))){ini_set('display_errors', 0);}
|
||||
preg_match($p, '');
|
||||
if($d){ini_set('display_errors', 1);}
|
||||
$r = isset($php_errormsg) ? 0 : 1;
|
||||
if($t){$php_errormsg = isset($o) ? $o : null;}
|
||||
else{ini_set('track_errors', 0);}
|
||||
return $r;
|
||||
// eof
|
||||
}
|
||||
|
||||
function hl_spec($t){
|
||||
// final $spec
|
||||
$s = array();
|
||||
$t = str_replace(array("\t", "\r", "\n", ' '), '', preg_replace('/"(?>(`.|[^"])*)"/sme', 'substr(str_replace(array(";", "|", "~", " ", ",", "/", "(", ")", \'`"\'), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\""), "$0"), 1, -1)', trim($t)));
|
||||
for($i = count(($t = explode(';', $t))); --$i>=0;){
|
||||
$w = $t[$i];
|
||||
if(empty($w) or ($e = strpos($w, '=')) === false or !strlen(($a = substr($w, $e+1)))){continue;}
|
||||
$y = $n = array();
|
||||
foreach(explode(',', $a) as $v){
|
||||
if(!preg_match('`^([a-z:\-\*]+)(?:\((.*?)\))?`i', $v, $m)){continue;}
|
||||
if(($x = strtolower($m[1])) == '-*'){$n['*'] = 1; continue;}
|
||||
if($x[0] == '-'){$n[substr($x, 1)] = 1; continue;}
|
||||
if(!isset($m[2])){$y[$x] = 1; continue;}
|
||||
foreach(explode('/', $m[2]) as $m){
|
||||
if(empty($m) or ($p = strpos($m, '=')) == 0 or $p < 5){$y[$x] = 1; continue;}
|
||||
$y[$x][strtolower(substr($m, 0, $p))] = str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08"), array(";", "|", "~", " ", ",", "/", "(", ")"), substr($m, $p+1));
|
||||
}
|
||||
if(isset($y[$x]['match']) && !hl_regex($y[$x]['match'])){unset($y[$x]['match']);}
|
||||
if(isset($y[$x]['nomatch']) && !hl_regex($y[$x]['nomatch'])){unset($y[$x]['nomatch']);}
|
||||
}
|
||||
if(!count($y) && !count($n)){continue;}
|
||||
foreach(explode(',', substr($w, 0, $e)) as $v){
|
||||
if(!strlen(($v = strtolower($v)))){continue;}
|
||||
if(count($y)){$s[$v] = $y;}
|
||||
if(count($n)){$s[$v]['n'] = $n;}
|
||||
}
|
||||
}
|
||||
return $s;
|
||||
// eof
|
||||
}
|
||||
|
||||
function hl_tag($t){
|
||||
// tag/attribute handler
|
||||
global $C;
|
||||
$t = $t[0];
|
||||
// invalid < >
|
||||
if($t == '< '){return '< ';}
|
||||
if($t == '>'){return '>';}
|
||||
if(!preg_match('`^<(/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>$`m', $t, $m)){
|
||||
return str_replace(array('<', '>'), array('<', '>'), $t);
|
||||
}elseif(!isset($C['elements'][($e = strtolower($m[2]))])){
|
||||
return (($C['keep_bad']%2) ? str_replace(array('<', '>'), array('<', '>'), $t) : '');
|
||||
}
|
||||
// attr string
|
||||
$a = str_replace(array("\xad", "\n", "\r", "\t"), ' ', trim($m[3]));
|
||||
if(strpos($a, '&') !== false){
|
||||
str_replace(array('­', '­', '­'), ' ', $a);
|
||||
}
|
||||
// tag transform
|
||||
static $eD = array('applet'=>1, 'center'=>1, 'dir'=>1, 'embed'=>1, 'font'=>1, 'isindex'=>1, 'menu'=>1, 's'=>1, 'strike'=>1, 'u'=>1); // Deprecated
|
||||
if($C['make_tag_strict'] && isset($eD[$e])){
|
||||
$trt = hl_tag2($e, $a, $C['make_tag_strict']);
|
||||
if(!$e){return (($C['keep_bad']%2) ? str_replace(array('<', '>'), array('<', '>'), $t) : '');}
|
||||
}
|
||||
// close tag
|
||||
static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty ele
|
||||
if(!empty($m[1])){
|
||||
return (!isset($eE[$e]) ? "</$e>" : (($C['keep_bad'])%2 ? str_replace(array('<', '>'), array('<', '>'), $t) : ''));
|
||||
}
|
||||
|
||||
// open tag & attr
|
||||
static $aN = array('abbr'=>array('td'=>1, 'th'=>1), 'accept-charset'=>array('form'=>1), 'accept'=>array('form'=>1, 'input'=>1), 'accesskey'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'legend'=>1, 'textarea'=>1), 'action'=>array('form'=>1), 'align'=>array('caption'=>1, 'embed'=>1, 'applet'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'legend'=>1, 'table'=>1, 'hr'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'p'=>1, 'col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'alt'=>array('applet'=>1, 'area'=>1, 'img'=>1, 'input'=>1), 'archive'=>array('applet'=>1, 'object'=>1), 'axis'=>array('td'=>1, 'th'=>1), 'bgcolor'=>array('embed'=>1, 'table'=>1, 'tr'=>1, 'td'=>1, 'th'=>1), 'border'=>array('table'=>1, 'img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'cellpadding'=>array('table'=>1), 'cellspacing'=>array('table'=>1), 'char'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charoff'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charset'=>array('a'=>1, 'script'=>1), 'checked'=>array('input'=>1), 'cite'=>array('blockquote'=>1, 'q'=>1, 'del'=>1, 'ins'=>1), 'classid'=>array('object'=>1), 'clear'=>array('br'=>1), 'code'=>array('applet'=>1), 'codebase'=>array('object'=>1, 'applet'=>1), 'codetype'=>array('object'=>1), 'color'=>array('font'=>1), 'cols'=>array('textarea'=>1), 'colspan'=>array('td'=>1, 'th'=>1), 'compact'=>array('dir'=>1, 'dl'=>1, 'menu'=>1, 'ol'=>1, 'ul'=>1), 'coords'=>array('area'=>1, 'a'=>1), 'data'=>array('object'=>1), 'datetime'=>array('del'=>1, 'ins'=>1), 'declare'=>array('object'=>1), 'defer'=>array('script'=>1), 'dir'=>array('bdo'=>1), 'disabled'=>array('button'=>1, 'input'=>1, 'optgroup'=>1, 'option'=>1, 'select'=>1, 'textarea'=>1), 'enctype'=>array('form'=>1), 'face'=>array('font'=>1), 'for'=>array('label'=>1), 'frame'=>array('table'=>1), 'frameborder'=>array('iframe'=>1), 'headers'=>array('td'=>1, 'th'=>1), 'height'=>array('embed'=>1, 'iframe'=>1, 'td'=>1, 'th'=>1, 'img'=>1, 'object'=>1, 'applet'=>1), 'href'=>array('a'=>1, 'area'=>1), 'hreflang'=>array('a'=>1), 'hspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'ismap'=>array('img'=>1, 'input'=>1), 'label'=>array('option'=>1, 'optgroup'=>1), 'language'=>array('script'=>1), 'longdesc'=>array('img'=>1, 'iframe'=>1), 'marginheight'=>array('iframe'=>1), 'marginwidth'=>array('iframe'=>1), 'maxlength'=>array('input'=>1), 'method'=>array('form'=>1), 'model'=>array('embed'=>1), 'multiple'=>array('select'=>1), 'name'=>array('button'=>1, 'embed'=>1, 'textarea'=>1, 'applet'=>1, 'select'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'a'=>1, 'input'=>1, 'object'=>1, 'map'=>1, 'param'=>1), 'nohref'=>array('area'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'object'=>array('applet'=>1), 'onblur'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onchange'=>array('input'=>1, 'select'=>1, 'textarea'=>1), 'onfocus'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onreset'=>array('form'=>1), 'onselect'=>array('input'=>1, 'textarea'=>1), 'onsubmit'=>array('form'=>1), 'pluginspage'=>array('embed'=>1), 'pluginurl'=>array('embed'=>1), 'prompt'=>array('isindex'=>1), 'readonly'=>array('textarea'=>1, 'input'=>1), 'rel'=>array('a'=>1), 'rev'=>array('a'=>1), 'rows'=>array('textarea'=>1), 'rowspan'=>array('td'=>1, 'th'=>1), 'rules'=>array('table'=>1), 'scope'=>array('td'=>1, 'th'=>1), 'scrolling'=>array('iframe'=>1), 'selected'=>array('option'=>1), 'shape'=>array('area'=>1, 'a'=>1), 'size'=>array('hr'=>1, 'font'=>1, 'input'=>1, 'select'=>1), 'span'=>array('col'=>1, 'colgroup'=>1), 'src'=>array('embed'=>1, 'script'=>1, 'input'=>1, 'iframe'=>1, 'img'=>1), 'standby'=>array('object'=>1), 'start'=>array('ol'=>1), 'summary'=>array('table'=>1), 'tabindex'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'object'=>1, 'select'=>1, 'textarea'=>1), 'target'=>array('a'=>1, 'area'=>1, 'form'=>1), 'type'=>array('a'=>1, 'embed'=>1, 'object'=>1, 'param'=>1, 'script'=>1, 'input'=>1, 'li'=>1, 'ol'=>1, 'ul'=>1, 'button'=>1), 'usemap'=>array('img'=>1, 'input'=>1, 'object'=>1), 'valign'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'value'=>array('input'=>1, 'option'=>1, 'param'=>1, 'button'=>1, 'li'=>1), 'valuetype'=>array('param'=>1), 'vspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'width'=>array('embed'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'object'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'applet'=>1, 'col'=>1, 'colgroup'=>1, 'pre'=>1), 'wmode'=>array('embed'=>1), 'xml:space'=>array('pre'=>1, 'script'=>1, 'style'=>1)); // Ele-specific
|
||||
static $aNE = array('checked'=>1, 'compact'=>1, 'declare'=>1, 'defer'=>1, 'disabled'=>1, 'ismap'=>1, 'multiple'=>1, 'nohref'=>1, 'noresize'=>1, 'noshade'=>1, 'nowrap'=>1, 'readonly'=>1, 'selected'=>1); // Empty
|
||||
static $aNP = array('action'=>1, 'cite'=>1, 'classid'=>1, 'codebase'=>1, 'data'=>1, 'href'=>1, 'longdesc'=>1, 'model'=>1, 'pluginspage'=>1, 'pluginurl'=>1, 'usemap'=>1); // Need scheme check; excludes style, on* & src
|
||||
static $aNU = array('class'=>array('param'=>1, 'script'=>1), 'dir'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'id'=>array('script'=>1), 'lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'xml:lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'onclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'ondblclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeydown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeypress'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeyup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousedown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousemove'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseout'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseover'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'style'=>array('param'=>1, 'script'=>1), 'title'=>array('param'=>1, 'script'=>1)); // Univ & exceptions
|
||||
|
||||
if($C['lc_std_val']){
|
||||
// predef attr vals for $eAL & $aNE ele
|
||||
static $aNL = array('all'=>1, 'baseline'=>1, 'bottom'=>1, 'button'=>1, 'center'=>1, 'char'=>1, 'checkbox'=>1, 'circle'=>1, 'col'=>1, 'colgroup'=>1, 'cols'=>1, 'data'=>1, 'default'=>1, 'file'=>1, 'get'=>1, 'groups'=>1, 'hidden'=>1, 'image'=>1, 'justify'=>1, 'left'=>1, 'ltr'=>1, 'middle'=>1, 'none'=>1, 'object'=>1, 'password'=>1, 'poly'=>1, 'post'=>1, 'preserve'=>1, 'radio'=>1, 'rect'=>1, 'ref'=>1, 'reset'=>1, 'right'=>1, 'row'=>1, 'rowgroup'=>1, 'rows'=>1, 'rtl'=>1, 'submit'=>1, 'text'=>1, 'top'=>1);
|
||||
static $eAL = array('a'=>1, 'area'=>1, 'bdo'=>1, 'button'=>1, 'col'=>1, 'form'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1, 'xml:space'=>1);
|
||||
$lcase = isset($eAL[$e]) ? 1 : 0;
|
||||
}
|
||||
|
||||
$depTr = 0;
|
||||
if($C['no_deprecated_attr']){
|
||||
// dep attr:applicable ele
|
||||
static $aND = array('align'=>array('caption'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'object'=>1, 'p'=>1, 'table'=>1), 'bgcolor'=>array('table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1), 'border'=>array('img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'clear'=>array('br'=>1), 'compact'=>array('dl'=>1, 'ol'=>1, 'ul'=>1), 'height'=>array('td'=>1, 'th'=>1), 'hspace'=>array('img'=>1, 'object'=>1), 'language'=>array('script'=>1), 'name'=>array('a'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'map'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'size'=>array('hr'=>1), 'start'=>array('ol'=>1), 'type'=>array('li'=>1, 'ol'=>1, 'ul'=>1), 'value'=>array('li'=>1), 'vspace'=>array('img'=>1, 'object'=>1), 'width'=>array('hr'=>1, 'pre'=>1, 'td'=>1, 'th'=>1));
|
||||
static $eAD = array('a'=>1, 'br'=>1, 'caption'=>1, 'div'=>1, 'dl'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'li'=>1, 'map'=>1, 'object'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'script'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1, 'ul'=>1);
|
||||
$depTr = isset($eAD[$e]) ? 1 : 0;
|
||||
}
|
||||
|
||||
// attr name-vals
|
||||
if(strpos($a, "\x01") !== false){$a = preg_replace('`\x01[^\x01]*\x01`', '', $a);} // No comment/CDATA sec
|
||||
$mode = 0; $a = trim($a, ' /'); $aA = array();
|
||||
while(strlen($a)){
|
||||
$w = 0;
|
||||
switch($mode){
|
||||
case 0: // Name
|
||||
if(preg_match('`^[a-zA-Z][\-a-zA-Z:]+`', $a, $m)){
|
||||
$nm = strtolower($m[0]);
|
||||
$w = $mode = 1; $a = ltrim(substr_replace($a, '', 0, strlen($m[0])));
|
||||
}
|
||||
break; case 1:
|
||||
if($a[0] == '='){ // =
|
||||
$w = 1; $mode = 2; $a = ltrim($a, '= ');
|
||||
}else{ // No val
|
||||
$w = 1; $mode = 0; $a = ltrim($a);
|
||||
$aA[$nm] = '';
|
||||
}
|
||||
break; case 2: // Val
|
||||
if(preg_match('`^"[^"]*"`', $a, $m) or preg_match("`^'[^']*'`", $a, $m) or preg_match("`^\s*[^\s\"']+`", $a, $m)){
|
||||
$m = $m[0]; $w = 1; $mode = 0; $a = ltrim(substr_replace($a, '', 0, strlen($m)));
|
||||
$aA[$nm] = trim(($m[0] == '"' or $m[0] == '\'') ? substr($m, 1, -1) : $m);
|
||||
}
|
||||
break;
|
||||
}
|
||||
if($w == 0){ // Parse errs, deal with space, " & '
|
||||
$a = preg_replace('`^(?:"[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*`', '', $a);
|
||||
$mode = 0;
|
||||
}
|
||||
}
|
||||
if($mode == 1){$aA[$nm] = '';}
|
||||
|
||||
// clean attrs
|
||||
global $S;
|
||||
$rl = isset($S[$e]) ? $S[$e] : array();
|
||||
$a = array(); $nfr = 0;
|
||||
foreach($aA as $k=>$v){
|
||||
if(((isset($C['deny_attribute']['*']) ? isset($C['deny_attribute'][$k]) : !isset($C['deny_attribute'][$k])) or isset($rl[$k])) && ((!isset($rl['n'][$k]) && !isset($rl['n']['*'])) or isset($rl[$k])) && (isset($aN[$k][$e]) or (isset($aNU[$k]) && !isset($aNU[$k][$e])))){
|
||||
if(isset($aNE[$k])){$v = $k;}
|
||||
elseif(!empty($lcase) && (($e != 'button' or $e != 'input') or $k == 'type')){ // Rather loose but ?not cause issues
|
||||
$v = (isset($aNL[($v2 = strtolower($v))])) ? $v2 : $v;
|
||||
}
|
||||
if($k == 'style' && !$C['style_pass']){
|
||||
if(false !== strpos($v, '&#')){
|
||||
static $sC = array(' '=>' ', ' '=>' ', 'E'=>'e', 'E'=>'e', 'e'=>'e', 'e'=>'e', 'X'=>'x', 'X'=>'x', 'x'=>'x', 'x'=>'x', 'P'=>'p', 'P'=>'p', 'p'=>'p', 'p'=>'p', 'S'=>'s', 'S'=>'s', 's'=>'s', 's'=>'s', 'I'=>'i', 'I'=>'i', 'i'=>'i', 'i'=>'i', 'O'=>'o', 'O'=>'o', 'o'=>'o', 'o'=>'o', 'N'=>'n', 'N'=>'n', 'n'=>'n', 'n'=>'n', 'U'=>'u', 'U'=>'u', 'u'=>'u', 'u'=>'u', 'R'=>'r', 'R'=>'r', 'r'=>'r', 'r'=>'r', 'L'=>'l', 'L'=>'l', 'l'=>'l', 'l'=>'l', '('=>'(', '('=>'(', ')'=>')', ')'=>')', ' '=>':', ' '=>':', '"'=>'"', '"'=>'"', '''=>"'", '''=>"'", '/'=>'/', '/'=>'/', '*'=>'*', '*'=>'*', '\'=>'\\', '\'=>'\\');
|
||||
$v = strtr($v, $sC);
|
||||
}
|
||||
$v = preg_replace_callback('`(url(?:\()(?: )*(?:\'|"|&(?:quot|apos);)?)(.+)((?:\'|"|&(?:quot|apos);)?(?: )*(?:\)))`iS', 'hl_prot', $v);
|
||||
$v = !$C['css_expression'] ? preg_replace('`expression`i', ' ', preg_replace('`\\\\\S|(/|(%2f))(\*|(%2a))`i', ' ', $v)) : $v;
|
||||
}elseif(isset($aNP[$k]) or strpos($k, 'src') !== false or $k[0] == 'o'){
|
||||
$v = hl_prot($v, $k);
|
||||
if($k == 'href'){ // X-spam
|
||||
if($C['anti_mail_spam'] && strpos($v, 'mailto:') === 0){
|
||||
$v = str_replace('@', htmlspecialchars($C['anti_mail_spam']), $v);
|
||||
}elseif($C['anti_link_spam']){
|
||||
$r1 = $C['anti_link_spam'][1];
|
||||
if(!empty($r1) && preg_match($r1, $v)){continue;}
|
||||
$r0 = $C['anti_link_spam'][0];
|
||||
if(!empty($r0) && preg_match($r0, $v)){
|
||||
if(isset($a['rel'])){
|
||||
if(!preg_match('`\bnofollow\b`i', $a['rel'])){$a['rel'] .= ' nofollow';}
|
||||
}elseif(isset($aA['rel'])){
|
||||
if(!preg_match('`\bnofollow\b`i', $aA['rel'])){$nfr = 1;}
|
||||
}else{$a['rel'] = 'nofollow';}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if(isset($rl[$k]) && is_array($rl[$k]) && ($v = hl_attrval($v, $rl[$k])) === 0){continue;}
|
||||
$a[$k] = str_replace('"', '"', $v);
|
||||
}
|
||||
}
|
||||
if($nfr){$a['rel'] = isset($a['rel']) ? $a['rel']. ' nofollow' : 'nofollow';}
|
||||
|
||||
// rqd attr
|
||||
static $eAR = array('area'=>array('alt'=>'area'), 'bdo'=>array('dir'=>'ltr'), 'form'=>array('action'=>''), 'img'=>array('src'=>'', 'alt'=>'image'), 'map'=>array('name'=>''), 'optgroup'=>array('label'=>''), 'param'=>array('name'=>''), 'script'=>array('type'=>'text/javascript'), 'textarea'=>array('rows'=>'10', 'cols'=>'50'));
|
||||
if(isset($eAR[$e])){
|
||||
foreach($eAR[$e] as $k=>$v){
|
||||
if(!isset($a[$k])){$a[$k] = isset($v[0]) ? $v : $k;}
|
||||
}
|
||||
}
|
||||
|
||||
// depr attrs
|
||||
if($depTr){
|
||||
$c = array();
|
||||
foreach($a as $k=>$v){
|
||||
if($k == 'style' or !isset($aND[$k][$e])){continue;}
|
||||
if($k == 'align'){
|
||||
unset($a['align']);
|
||||
if($e == 'img' && ($v == 'left' or $v == 'right')){$c[] = 'float: '. $v;}
|
||||
elseif(($e == 'div' or $e == 'table') && $v == 'center'){$c[] = 'margin: auto';}
|
||||
else{$c[] = 'text-align: '. $v;}
|
||||
}elseif($k == 'bgcolor'){
|
||||
unset($a['bgcolor']);
|
||||
$c[] = 'background-color: '. $v;
|
||||
}elseif($k == 'border'){
|
||||
unset($a['border']); $c[] = "border: {$v}px";
|
||||
}elseif($k == 'bordercolor'){
|
||||
unset($a['bordercolor']); $c[] = 'border-color: '. $v;
|
||||
}elseif($k == 'clear'){
|
||||
unset($a['clear']); $c[] = 'clear: '. ($v != 'all' ? $v : 'both');
|
||||
}elseif($k == 'compact'){
|
||||
unset($a['compact']); $c[] = 'font-size: 85%';
|
||||
}elseif($k == 'height' or $k == 'width'){
|
||||
unset($a[$k]); $c[] = $k. ': '. ($v[0] != '*' ? $v. (ctype_digit($v) ? 'px' : '') : 'auto');
|
||||
}elseif($k == 'hspace'){
|
||||
unset($a['hspace']); $c[] = "margin-left: {$v}px; margin-right: {$v}px";
|
||||
}elseif($k == 'language' && !isset($a['type'])){
|
||||
unset($a['language']);
|
||||
$a['type'] = 'text/'. strtolower($v);
|
||||
}elseif($k == 'name'){
|
||||
if($C['no_deprecated_attr'] == 2 or ($e != 'a' && $e != 'map')){unset($a['name']);}
|
||||
if(!isset($a['id']) && preg_match('`[a-zA-Z][a-zA-Z\d.:_\-]*`', $v)){$a['id'] = $v;}
|
||||
}elseif($k == 'noshade'){
|
||||
unset($a['noshade']); $c[] = 'border-style: none; border: 0; background-color: gray; color: gray';
|
||||
}elseif($k == 'nowrap'){
|
||||
unset($a['nowrap']); $c[] = 'white-space: nowrap';
|
||||
}elseif($k == 'size'){
|
||||
unset($a['size']); $c[] = 'size: '. $v. 'px';
|
||||
}elseif($k == 'start' or $k == 'value'){
|
||||
unset($a[$k]);
|
||||
}elseif($k == 'type'){
|
||||
unset($a['type']);
|
||||
static $ol_type = array('i'=>'lower-roman', 'I'=>'upper-roman', 'a'=>'lower-latin', 'A'=>'upper-latin', '1'=>'decimal');
|
||||
$c[] = 'list-style-type: '. (isset($ol_type[$v]) ? $ol_type[$v] : 'decimal');
|
||||
}elseif($k == 'vspace'){
|
||||
unset($a['vspace']); $c[] = "margin-top: {$v}px; margin-bottom: {$v}px";
|
||||
}
|
||||
}
|
||||
if(count($c)){
|
||||
$c = implode('; ', $c);
|
||||
$a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;'). '; '. $c. ';': $c. ';';
|
||||
}
|
||||
}
|
||||
// unique ID
|
||||
if($C['unique_ids'] && isset($a['id'])){
|
||||
if(!preg_match('`^[A-Za-z][A-Za-z0-9_\-.:]*$`', ($id = $a['id'])) or (isset($GLOBALS['hl_Ids'][$id]) && $C['unique_ids'] == 1)){unset($a['id']);
|
||||
}else{
|
||||
while(isset($GLOBALS['hl_Ids'][$id])){$id = $C['unique_ids']. $id;}
|
||||
$GLOBALS['hl_Ids'][($a['id'] = $id)] = 1;
|
||||
}
|
||||
}
|
||||
// xml:lang
|
||||
if($C['xml:lang'] && isset($a['lang'])){
|
||||
$a['xml:lang'] = isset($a['xml:lang']) ? $a['xml:lang'] : $a['lang'];
|
||||
if($C['xml:lang'] == 2){unset($a['lang']);}
|
||||
}
|
||||
// for transformed tag
|
||||
if(!empty($trt)){
|
||||
$a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;'). '; '. $trt : $trt;
|
||||
}
|
||||
// return with empty ele /
|
||||
if(empty($C['hook_tag'])){
|
||||
$aA = '';
|
||||
foreach($a as $k=>$v){$aA .= " {$k}=\"{$v}\"";}
|
||||
return "<{$e}{$aA}". (isset($eE[$e]) ? ' /' : ''). '>';
|
||||
}
|
||||
else{return $C['hook_tag']($e, $a);}
|
||||
// eof
|
||||
}
|
||||
|
||||
function hl_tag2(&$e, &$a, $t=1){
|
||||
// transform tag
|
||||
if($e == 'center'){$e = 'div'; return 'text-align: center;';}
|
||||
if($e == 'dir' or $e == 'menu'){$e = 'ul'; return '';}
|
||||
if($e == 's' or $e == 'strike'){$e = 'span'; return 'text-decoration: line-through;';}
|
||||
if($e == 'u'){$e = 'span'; return 'text-decoration: underline;';}
|
||||
static $fs = array('0'=>'xx-small', '1'=>'xx-small', '2'=>'small', '3'=>'medium', '4'=>'large', '5'=>'x-large', '6'=>'xx-large', '7'=>'300%', '-1'=>'smaller', '-2'=>'60%', '+1'=>'larger', '+2'=>'150%', '+3'=>'200%', '+4'=>'300%');
|
||||
if($e == 'font'){
|
||||
$a2 = '';
|
||||
if(preg_match('`face\s*=\s*(\'|")([^=]+?)\\1`i', $a, $m) or preg_match('`face\s*=\s*([^"])(\S+)`i', $a, $m)){
|
||||
$a2 .= ' font-family: '. str_replace('"', '\'', trim($m[2])). ';';
|
||||
}
|
||||
if(preg_match('`color\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m)){
|
||||
$a2 .= ' color: '. trim($m[2]). ';';
|
||||
}
|
||||
if(preg_match('`size\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m) && isset($fs[($m = trim($m[2]))])){
|
||||
$a2 .= ' font-size: '. $fs[$m]. ';';
|
||||
}
|
||||
$e = 'span'; return ltrim($a2);
|
||||
}
|
||||
if($t == 2){$e = 0; return 0;}
|
||||
return '';
|
||||
// eof
|
||||
}
|
||||
|
||||
function hl_tidy($t, $w, $p){
|
||||
// Tidy/compact HTM
|
||||
if(strpos(' pre,script,textarea', "$p,")){return $t;}
|
||||
$t = str_replace(' </', '</', preg_replace(array('`(<\w[^>]*(?<!/)>)\s+`', '`\s+`', '`(<\w[^>]*(?<!/)>) `'), array(' $1', ' ', '$1'), preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea).*?>)(.+?)(</\2>)`sm'), create_function('$m', 'return $m[1]. str_replace(array("<", ">", "\n", "\r", "\t", " "), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]). $m[4];'), $t)));
|
||||
if(($w = strtolower($w)) == -1){
|
||||
return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t);
|
||||
}
|
||||
$s = strpos(" $w", 't') ? "\t" : ' ';
|
||||
$s = preg_match('`\d`', $w, $m) ? str_repeat($s, $m[0]) : str_repeat($s, ($s == "\t" ? 1 : 2));
|
||||
$n = preg_match('`[ts]([1-9])`', $w, $m) ? $m[1] : 0;
|
||||
$a = array('br'=>1);
|
||||
$b = array('button'=>1, 'input'=>1, 'option'=>1);
|
||||
$c = array('caption'=>1, 'dd'=>1, 'dt'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'isindex'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'object'=>1, 'p'=>1, 'pre'=>1, 'td'=>1, 'textarea'=>1, 'th'=>1);
|
||||
$d = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'colgroup'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'fieldset'=>1, 'form'=>1, 'hr'=>1, 'iframe'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1);
|
||||
ob_start();
|
||||
if(isset($d[$p])){echo str_repeat($s, ++$n);}
|
||||
$t = explode('<', $t);
|
||||
echo ltrim(array_shift($t));
|
||||
for($i=-1, $j=count($t); ++$i<$j;){
|
||||
$r = ''; list($e, $r) = explode('>', $t[$i]);
|
||||
$x = $e[0] == '/' ? 0 : (substr($e, -1) == '/' ? 1 : ($e[0] != '!' ? 2 : -1));
|
||||
$y = !$x ? ltrim($e, '/') : ($x > 0 ? substr($e, 0, strcspn($e, ' ')) : 0);
|
||||
$e = "<$e>";
|
||||
if(isset($d[$y])){
|
||||
if(!$x){echo "\n", str_repeat($s, --$n), "$e\n", str_repeat($s, $n);}
|
||||
else{echo "\n", str_repeat($s, $n), "$e\n", str_repeat($s, ($x != 1 ? ++$n : $n));}
|
||||
echo ltrim($r); continue;
|
||||
}
|
||||
$f = "\n". str_repeat($s, $n);
|
||||
if(isset($c[$y])){
|
||||
if(!$x){echo $e, $f, ltrim($r);}
|
||||
else{echo $f, $e, $r;}
|
||||
}elseif(isset($b[$y])){echo $f, $e, $r;
|
||||
}elseif(isset($a[$y])){echo $e, $f, ltrim($r);
|
||||
}elseif(!$y){echo $f, $e, $f, ltrim($r);
|
||||
}else{echo $e, $r;}
|
||||
}
|
||||
$t = preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents());
|
||||
ob_end_clean();
|
||||
if(($l = strpos(" $w", 'r') ? (strpos(" $w", 'n') ? "\r\n" : "\r") : 0)){
|
||||
$t = str_replace("\n", $l, $t);
|
||||
}
|
||||
return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t);
|
||||
// eof
|
||||
}
|
||||
|
||||
function hl_version(){
|
||||
// rel
|
||||
return '1.1.8.1';
|
||||
// eof
|
||||
}
|
||||
|
||||
function kses($t, $h, $p=array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'gopher', 'mailto')){
|
||||
// kses compat
|
||||
foreach($h as $k=>$v){
|
||||
$h[$k]['n']['*'] = 1;
|
||||
}
|
||||
$C['cdata'] = $C['comment'] = $C['make_tag_strict'] = $C['no_deprecated_attr'] = $C['unique_ids'] = 0;
|
||||
$C['keep_bad'] = 1;
|
||||
$C['elements'] = count($h) ? strtolower(implode(',', array_keys($h))) : '-*';
|
||||
$C['hook'] = 'kses_hook';
|
||||
$C['schemes'] = '*:'. implode(',', $p);
|
||||
return htmLawed($t, $C, $h);
|
||||
// eof
|
||||
}
|
||||
|
||||
function kses_hook($t, &$C, &$S){
|
||||
// kses compat
|
||||
return $t;
|
||||
// eof
|
||||
}
|
592
extlib/htmLawed/htmLawedTest.php
Normal file
592
extlib/htmLawed/htmLawedTest.php
Normal file
@ -0,0 +1,592 @@
|
||||
<?php
|
||||
|
||||
/*
|
||||
htmLawedTest.php, 16 July 2009
|
||||
htmLawed 1.1.8.1, 16 July 2009
|
||||
Copyright Santosh Patnaik
|
||||
GPL v3 license
|
||||
A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed
|
||||
|
||||
Test htmLawed; user provides text input; input and processed input are shown as highlighted code and rendered HTML; also shown are execution time and peak memory usage
|
||||
*/
|
||||
|
||||
// config
|
||||
$_errs = 0; // display PHP errors
|
||||
$_limit = 8000; // input character limit
|
||||
|
||||
// more config
|
||||
$_hlimit = 1000; // input character limit for showing hexdumps
|
||||
$_hilite = 1; // 0 turns off slow Javascript-based code-highlighting, e.g., if $_limit is high
|
||||
$_w3c_validate = 1; // 1 to show buttons to send input/output to w3c validator
|
||||
$_sid = 'sid'; // session name; alphanum.
|
||||
$_slife = 30; // session life in min.
|
||||
|
||||
// errors
|
||||
error_reporting(E_ALL | (defined('E_STRICT') ? E_STRICT : 1));
|
||||
ini_set('display_errors', $_errs);
|
||||
|
||||
// session
|
||||
session_name($_sid);
|
||||
session_cache_limiter('private');
|
||||
session_cache_expire($_slife);
|
||||
ini_set('session.gc_maxlifetime', $_slife * 60);
|
||||
ini_set('session.use_only_cookies', 1);
|
||||
ini_set('session.cookie_lifetime', 0);
|
||||
session_start();
|
||||
if(!isset($_SESSION['token'])){
|
||||
$_SESSION['token'] = md5(uniqid(rand(), 1));
|
||||
}
|
||||
|
||||
// slashes
|
||||
if(get_magic_quotes_gpc()){
|
||||
foreach($_POST as $k => $v){
|
||||
$_POST[$k] = stripslashes($v);
|
||||
}
|
||||
ini_set('magic_quotes_gpc', 0);
|
||||
}
|
||||
set_magic_quotes_runtime(0);
|
||||
|
||||
$_POST['enc'] = (isset($_POST['enc']) and preg_match('`^[-\w]+$`', $_POST['enc'])) ? $_POST['enc'] : 'utf-8';
|
||||
|
||||
// token for anti-CSRF
|
||||
if(count($_POST)){
|
||||
if((empty($_GET['pre']) and ((!empty($_POST['token']) and !empty($_SESSION['token']) and $_POST['token'] != $_SESSION['token']) or empty($_POST[$_sid]) or $_POST[$_sid] != session_id() or empty($_COOKIE[$_sid]) or $_COOKIE[$_sid] != session_id())) or ($_POST[$_sid] != session_id())){
|
||||
$_POST = array('enc'=>'utf-8');
|
||||
}
|
||||
}
|
||||
if(empty($_GET['pre'])){
|
||||
$_SESSION['token'] = md5(uniqid(rand(), 1));
|
||||
$token = $_SESSION['token'];
|
||||
session_regenerate_id(1);
|
||||
}
|
||||
|
||||
// compress
|
||||
if(function_exists('gzencode') && isset($_SERVER['HTTP_ACCEPT_ENCODING']) && preg_match('`gzip|deflate`i', $_SERVER['HTTP_ACCEPT_ENCODING']) && !ini_get('zlib.output_compression')){
|
||||
ob_start('ob_gzhandler');
|
||||
}
|
||||
|
||||
// HTM for unprocessed
|
||||
if(isset($_POST['inputH'])){
|
||||
echo '<html><head><title>htmLawed test: HTML view of unprocessed input</title></head><body style="margin:0; padding: 0;"><p style="background-color: black; color: white; padding: 2px;"> Rendering of unprocessed input without an HTML doctype or charset declaration <small><a style="color: white; text-decoration: none;" href="1" onclick="javascript:window.close(this); return false;">close window</a> | <a style="color: white; text-decoration: none;" href="htmLawedTest.php" onclick="javascript: window.open(\'htmLawedTest.php\', \'hlmain\'); window.close(this); return false;">htmLawed test page</a></small></p><div>', $_POST['inputH'], '</div></body></html>';
|
||||
exit;
|
||||
}
|
||||
|
||||
// main
|
||||
$_POST['text'] = isset($_POST['text']) ? $_POST['text'] : 'text to process; < '. $_limit. ' characters'. ($_hlimit ? ' (for binary hexdump view, < '. $_hlimit. ')' : '');
|
||||
$do = (!empty($_POST[$_sid]) && isset($_POST['text'][0]) && !isset($_POST['text'][$_limit])) ? 1 : 0;
|
||||
$limit_exceeded = isset($_POST['text'][$_limit]) ? 1 : 0;
|
||||
$pre_mem = memory_get_usage();
|
||||
$validation = (!empty($_POST[$_sid]) and isset($_POST['w3c_validate'][0])) ? 1 : 0;
|
||||
include './htmLawed.php';
|
||||
|
||||
function format($t){
|
||||
$t = "\n". str_replace(array("\t", "\r\n", "\r", '&', '<', '>', "\n"), array(' ', "\n", "\n", '&', '<', '>', "<span class=\"newline\">¬</span><br />\n"), $t);
|
||||
return str_replace(array('<br />', "\n ", ' '), array("\n<br />\n", "\n ", ' '), $t);
|
||||
}
|
||||
|
||||
function hexdump($d){
|
||||
// Mainly by Aidan Lister <aidan@php.net>, Peter Waller <iridum@php.net>
|
||||
$hexi = '';
|
||||
$ascii = '';
|
||||
ob_start();
|
||||
echo '<pre>';
|
||||
$offset = 0;
|
||||
$len = strlen($d);
|
||||
for($i=$j=0; $i<$len; $i++)
|
||||
{
|
||||
// Convert to hexidecimal
|
||||
$hexi .= sprintf("%02X ", ord($d[$i]));
|
||||
// Replace non-viewable bytes with '.'
|
||||
if(ord($d[$i]) >= 32){
|
||||
$ascii .= htmlspecialchars($d[$i]);
|
||||
}else{
|
||||
$ascii .= '.';
|
||||
}
|
||||
// Add extra column spacing
|
||||
if($j == 7){
|
||||
$hexi .= ' ';
|
||||
$ascii .= ' ';
|
||||
}
|
||||
// Add row
|
||||
if(++$j == 16 || $i == $len-1){
|
||||
// Join the hexi / ascii output
|
||||
echo sprintf("%04X %-49s %s", $offset, $hexi, $ascii);
|
||||
// Reset vars
|
||||
$hexi = $ascii = '';
|
||||
$offset += 16;
|
||||
$j = 0;
|
||||
// Add newline
|
||||
if ($i !== $len-1){
|
||||
echo "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
echo '</pre>';
|
||||
$o = ob_get_contents();
|
||||
ob_end_clean();
|
||||
return $o;
|
||||
}
|
||||
?>
|
||||
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html lang="en" xml:lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=<?php echo htmlspecialchars($_POST['enc']); ?>" />
|
||||
<meta name="description" content="htmLawed <?php echo hl_version();?> test page" />
|
||||
<style type="text/css"><!--/*--><![CDATA[/*><!--*/
|
||||
a, a.resizer{text-decoration:none;}
|
||||
a:hover, a.resizer:hover{color:red;}
|
||||
a.resizer{color:green; float:right;}
|
||||
body{background-color:#efefef;}
|
||||
body, button, div, html, input, p{font-size:13px; font-family:'Lucida grande', Verdana, Arial, Helvetica, sans-serif;}
|
||||
button, input{font-size: 85%;}
|
||||
div.help{border-top: 1px dotted gray; margin-top: 15px; padding-top: 15px; color:#999999;}
|
||||
#inputC, #inputD, #inputF, #inputR, #outputD, #outputF, #outputH, #outputR, #settingF{display:block;}
|
||||
#inputC, #settingF{background-color:white; border:1px gray solid; padding:3px;}
|
||||
#inputC li{margin: 0; padding: 0;}
|
||||
#inputC ul{margin: 0; padding: 0; margin-left: 14px;}
|
||||
#inputC input{margin: 0; margin-left: 2px; margin-right: 2px; padding: 1px; vertical-align: middle;}
|
||||
#inputD{overflow:auto; background-color:#ffff99; border:1px #cc9966 solid; padding:3px;}
|
||||
#inputR{overflow:auto; background-color:#ffffcc; border:1px #ffcc99 solid; padding:3px;}
|
||||
#inputC, #settingF, #inputD, #inputR, #outputD, #outputR, textarea{font-size:100%; font-family:'Bitstream vera sans mono', 'courier new', 'courier', monospace;}
|
||||
#outputD{overflow:auto; background-color: #99ffcc; border:1px #66cc99 solid; padding:3px;}
|
||||
#outputH{overflow:auto; background-color:white; padding:3px; border:1px #dcdcdc solid;}
|
||||
#outputR{overflow:auto; background-color: #ccffcc; border:1px #99cc99 solid; padding:3px;}
|
||||
span.cmtcdata{color: orange;}
|
||||
span.ctag{color:red;}
|
||||
span.ent{border-bottom:1px dotted #999999;}
|
||||
span.etag{color:purple;}
|
||||
span.help{color:#999999;}
|
||||
span.newline{color:#dcdcdc;}
|
||||
span.notice{color:green;}
|
||||
span.otag{color:blue;}
|
||||
#topmost{margin:auto; width:98%;}
|
||||
/*]]>*/--></style>
|
||||
<script type="text/javascript"><!--//--><![CDATA[//><!--
|
||||
window.name = 'hlmain';
|
||||
function hl(i){
|
||||
<?php if(!$_hilite){echo 'return;'; }?>
|
||||
var e = document.getElementById(i);
|
||||
if(!e){return;}
|
||||
run(e, '</[a-z1-6]+>', 'ctag');
|
||||
run(e, '<[a-z]+(?:[^>]*)/>', 'etag');
|
||||
run(e, '<[a-z1-6]+(?:[^>]*)>', 'otag');
|
||||
run(e, '&[#a-z0-9]+;', 'ent');
|
||||
run(e, '<!(?:(?:--(?:.|\n)*?--)|(?:\\[CDATA\\[(?:.|\n)*?\\]\\]))>', 'cmtcdata');
|
||||
}
|
||||
function sndProc(){
|
||||
var f = document.getElementById('testform');
|
||||
if(!f){return;}
|
||||
var e = document.createElement('input');
|
||||
e.type = 'hidden';
|
||||
e.name = '<?php echo htmlspecialchars($_sid); ?>';
|
||||
e.id = '<?php echo htmlspecialchars($_sid); ?>';
|
||||
e.value = readCookie('<?php echo htmlspecialchars($_sid); ?>');
|
||||
f.appendChild(e);
|
||||
f.submit();
|
||||
}
|
||||
function readCookie(n){
|
||||
var ne = n + '=';
|
||||
var ca = document.cookie.split(';');
|
||||
for(var i=0;i < ca.length;i++){
|
||||
var c = ca[i];
|
||||
while(c.charAt(0)==' '){
|
||||
c = c.substring(1,c.length);
|
||||
}
|
||||
if(c.indexOf(ne) == 0){
|
||||
return c.substring(ne.length,c.length);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
function run(e, q, c){
|
||||
var q = new RegExp(q);
|
||||
if(e.firstChild == null){
|
||||
var m = q.exec(e.data);
|
||||
if(m){
|
||||
var v = m[0];
|
||||
var k2 = e.splitText(m.index);
|
||||
var k3 = k2.splitText(v.length);
|
||||
var s = e.ownerDocument.createElement('span');
|
||||
e.parentNode.replaceChild(s, k2);
|
||||
s.className = c; s.appendChild(k2);
|
||||
}
|
||||
}
|
||||
for(var k = e.firstChild; k != null; k = k.nextSibling){
|
||||
if(k.nodeType == 3){
|
||||
var m = q.exec(k.data);
|
||||
if(m){
|
||||
var v = m[0];
|
||||
var k2 = k.splitText(m.index);
|
||||
var k3 = k2.splitText(v.length);
|
||||
var s = k.ownerDocument.createElement('span');
|
||||
k.parentNode.replaceChild(s, k2);
|
||||
s.className = c; s.appendChild(k2);
|
||||
}
|
||||
}
|
||||
else if(c == 'ent' && k.nodeType == 1){
|
||||
var d = k.firstChild;
|
||||
if(d){
|
||||
var m = q.exec(d.data);
|
||||
if(m){
|
||||
var v = m[0];
|
||||
var d2 = d.splitText(m.index);
|
||||
var d3 = d2.splitText(v.length);
|
||||
var s = d.ownerDocument.createElement('span');
|
||||
d.parentNode.replaceChild(s, d2);
|
||||
s.className = c; s.appendChild(d2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
function toggle(i){
|
||||
var e = document.getElementById(i);
|
||||
if(!e){return;}
|
||||
if(e.style){
|
||||
var a = e.style.display;
|
||||
if(a == 'block'){e.style.display = 'none'; return;}
|
||||
if(a == 'none'){e.style.display = 'block';}
|
||||
else{e.style.display = 'none';}
|
||||
return;
|
||||
}
|
||||
var a = e.visibility;
|
||||
if(a == 'hidden'){e.visibility = 'show'; return;}
|
||||
if(a == 'show'){e.visibility = 'hidden';}
|
||||
}
|
||||
function sndUnproc(){
|
||||
var i = document.getElementById('text');
|
||||
if(!i){return;}
|
||||
i = i.value;
|
||||
i = i.replace(/>/g, '>');
|
||||
i = i.replace(/</g, '<');
|
||||
i = i.replace(/"/g, '"');
|
||||
var w = window.open('htmLawedTest.php?pre=1', 'hlprehtm');
|
||||
var f = document.createElement('form');
|
||||
f.enctype = 'application/x-www-form-urlencoded';
|
||||
f.method = 'post';
|
||||
f.acceptCharset = '<?php echo htmlspecialchars($_POST['enc']); ?>';
|
||||
if(f.style){f.style.display = 'none';}
|
||||
else{f.visibility = 'hidden';}
|
||||
f.innerHTML = '<p style="display:none;"><input style="display:none;" type="hidden" name="token" id="token" value="<?php echo $token; ?>" /><input style="display:none;" type="hidden" name="<?php echo htmlspecialchars($_sid); ?>" id="<?php echo htmlspecialchars($_sid); ?>" value="' + readCookie('<?php echo htmlspecialchars($_sid); ?>') + '" /><input style="display:none;" type="hidden" name="inputH" id="inputH" value="'+ i+ '" /></p>';
|
||||
f.action = 'htmLawedTest.php?pre=1';
|
||||
f.target = 'hlprehtm';
|
||||
f.method = 'post';
|
||||
var b = document.getElementsByTagName('body')[0];
|
||||
b.appendChild(f);
|
||||
f.submit();
|
||||
w.focus;
|
||||
}
|
||||
function sndValidn(id, type){
|
||||
var i = document.getElementById(id);
|
||||
if(!i){return;}
|
||||
i = i.value;
|
||||
i = i.replace(/>/g, '>');
|
||||
i = i.replace(/</g, '<');
|
||||
i = i.replace(/"/g, '"');
|
||||
var w = window.open('http://validator.w3.org/check', 'validate'+id+type);
|
||||
var f = document.createElement('form');
|
||||
f.enctype = 'application/x-www-form-urlencoded';
|
||||
f.method = 'post';
|
||||
f.acceptCharset = '<?php echo htmlspecialchars($_POST['enc']); ?>';
|
||||
if(f.style){f.style.display = 'none';}
|
||||
else{f.visibility = 'hidden';}
|
||||
f.innerHTML = '<p style="display:none;"><input style="display:none;" type="hidden" name="fragment" id="fragment" value="'+ i+ '" /><input style="display:none;" type="hidden" name="prefill" id="prefill" value="1" /><input style="display:none;" type="hidden" name="prefill_doctype" id="prefill_doctype" value="'+ type+ '" /><input style="display:none;" type="hidden" name="group" id="group" value="1" /><input type="hidden" name="ss" id="ss" value="1" /></p>';
|
||||
f.action = 'http://validator.w3.org/check';
|
||||
f.target = 'validate'+id+type;
|
||||
var b = document.getElementsByTagName('body')[0];
|
||||
b.appendChild(f);
|
||||
f.submit();
|
||||
w.focus;
|
||||
}
|
||||
tRs = {
|
||||
formEl: null,
|
||||
resizeClass: 'textarea',
|
||||
adEv: function(t,ev,fn){
|
||||
if(typeof document.addEventListener != 'undefined'){
|
||||
t.addEventListener(ev,fn,false);
|
||||
}else{
|
||||
t.attachEvent('on' + ev, fn);
|
||||
}
|
||||
},
|
||||
rmEv: function(t,ev,fn){
|
||||
if(typeof document.removeEventListener != 'undefined'){
|
||||
t.removeEventListener(ev,fn,false);
|
||||
}else
|
||||
{
|
||||
t.detachEvent('on' + ev, fn);
|
||||
}
|
||||
},
|
||||
adBtn: function(){
|
||||
var textareas = document.getElementsByTagName('textarea');
|
||||
for(var i = 0; i < textareas.length; i++){
|
||||
var txtclass=textareas[i].className;
|
||||
if(txtclass.substring(0,tRs.resizeClass.length)==tRs.resizeClass ||
|
||||
txtclass.substring(txtclass.length -tRs.resizeClass.length)==tRs.resizeClass){
|
||||
var a = document.createElement('a');
|
||||
a.appendChild(document.createTextNode("\u2195"));
|
||||
a.style.cursor = 'n-resize';
|
||||
a.className= 'resizer';
|
||||
a.title = 'click-drag to resize'
|
||||
tRs.adEv(a, 'mousedown', tRs.initResize);
|
||||
textareas[i].parentNode.appendChild(a);
|
||||
}
|
||||
}
|
||||
},
|
||||
initResize: function(event){
|
||||
if(typeof event == 'undefined'){
|
||||
event = window.event;
|
||||
}
|
||||
if(event.srcElement){
|
||||
var target = event.srcElement.previousSibling;
|
||||
}else{
|
||||
var target = event.target.previousSibling;
|
||||
}
|
||||
if(target.nodeName.toLowerCase() == 'textarea' || (target.nodeName.toLowerCase() == 'input' && target.type == 'text')){
|
||||
tRs.formEl = target;
|
||||
tRs.formEl.startHeight = tRs.formEl.clientHeight;
|
||||
tRs.formEl.startY = event.clientY;
|
||||
tRs.adEv(document, 'mousemove', tRs.resize);
|
||||
tRs.adEv(document, 'mouseup', tRs.stopResize);
|
||||
tRs.formEl.parentNode.style.cursor = 'n-resize';
|
||||
tRs.formEl.style.cursor = 'n-resize';
|
||||
try{
|
||||
event.preventDefault();
|
||||
}catch(e){
|
||||
}
|
||||
}
|
||||
},
|
||||
resize: function(event){
|
||||
if(typeof event == 'undefined'){
|
||||
event = window.event;
|
||||
}
|
||||
if(tRs.formEl.nodeName.toLowerCase() == 'textarea'){
|
||||
tRs.formEl.style.height = event.clientY - tRs.formEl.startY + tRs.formEl.startHeight + 'px';
|
||||
}
|
||||
},
|
||||
stopResize: function(event){
|
||||
tRs.rmEv(document, 'mousedown', tRs.initResize);
|
||||
tRs.rmEv(document, 'mousemove', tRs.resize);
|
||||
tRs.formEl.style.cursor = 'text';
|
||||
tRs.formEl.parentNode.style.cursor = 'auto';
|
||||
return false;
|
||||
}
|
||||
};
|
||||
tRs.adEv(window, 'load', tRs.adBtn);
|
||||
//--><!]]></script>
|
||||
<title>htmLawed (<?php echo hl_version();?>) test</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="topmost">
|
||||
|
||||
<h5 style="float: left; display: inline; margin-top: 0; margin-bottom: 5px;"><a href="http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/index.php" title="htmLawed home">HTM<big><big>L</big></big>AWED</a> <?php echo hl_version();?> <a href="htmLawedTest.php" title="test home">TEST</a></h5>
|
||||
<span style="float: right;" class="help"><a href="htmLawed_README.htm"><span class="notice">htm</span></a> / <a href="htmLawed_README.txt"><span class="notice">txt</span></a> documentation</span><br style="clear:both;" />
|
||||
|
||||
<a href="htmLawedTest.php" title="[toggle visibility] type or copy-paste" onclick="javascript:toggle('inputF'); return false;"><span class="notice">Input »</span> <span class="help" title="limit lower with multibyte characters<?php echo (($_hlimit < $_limit && $_hlimit)? '; limit is '. $_hlimit. ' for viewing binaries' : ''); ?>"><small>(max. <?php echo htmlspecialchars($_limit);?> chars)</small></span></a>
|
||||
|
||||
<form id="testform" name="testform" action="htmLawedTest.php" method="post" accept-charset="<?php echo htmlspecialchars($_POST['enc']); ?>" style="padding:0; margin: 0; display:inline;">
|
||||
|
||||
<div id="inputF" style="display: block;">
|
||||
|
||||
<input type="hidden" name="token" id="token" value="<?php echo $token; ?>" />
|
||||
<div><textarea id="text" class="textarea" name="text" rows="5" cols="100" style="width: 100%;"><?php echo htmlspecialchars($_POST['text']);?></textarea></div>
|
||||
<input type="submit" id="submitF" name="submitF" value="Process" style="float:left;" title="filter using htmLawed" onclick="javascript: sndProc(); return false;" onkeypress="javascript: sndProc(); return false;" />
|
||||
|
||||
<?php
|
||||
if($do){
|
||||
if($validation){
|
||||
echo '<input type="hidden" value="1" name="w3c_validate" id="w3c_validate" />';
|
||||
}
|
||||
?>
|
||||
|
||||
<button type="button" title="rendered as web-page without a doctype or charset declaration" style="float: right;" onclick="javascript: sndUnproc(); return false;" onkeypress="javascript: sndUnproc(); return false;">View unprocessed</button>
|
||||
<button type="button" onclick="javascript:document.getElementById('text').focus();document.getElementById('text').select()" title="select all to copy" style="float:right;">Select all</button>
|
||||
|
||||
<?php
|
||||
if($_w3c_validate && $validation){
|
||||
?>
|
||||
|
||||
<button type="button" title="HTML 4.01 W3C online validation" style="float: right;" onclick="javascript: sndValidn('text', 'html401'); return false;" onkeypress="javascript: sndValidn('text', 'html401'); return false;">Check HTML</button>
|
||||
<button type="button" title="XHTML 1.1 W3C online validation" style="float: right;" onclick="javascript: sndValidn('text', 'xhtml110'); return false;" onkeypress="javascript: sndValidn('text', 'xhtml110'); return false;">Check XHTML</button>
|
||||
|
||||
<?php
|
||||
}
|
||||
}
|
||||
else{
|
||||
if($_w3c_validate){
|
||||
echo '<span style="float: right;" class="help" title="for direct submission of input or output code to W3C validator for (X)HTML validation"><span style="font-size: 85%;"> Validator tools: </span><input type="checkbox" value="1" name="w3c_validate" id="w3c_validate" style="vertical-align: middle;"', ($validation ? ' checked="checked"' : ''), ' /></span>';
|
||||
}
|
||||
}
|
||||
?>
|
||||
|
||||
<span style="float:right;" class="help"><span style="font-size: 85%;">Encoding: </span><input type="text" size="8" id="enc" name="enc" style="vertical-align: middle;" value="<?php echo htmlspecialchars($_POST['enc']); ?>" title="IANA-recognized name of the input character-set; can be multiple ;- or space-separated values; may not work in some browsers" /></span>
|
||||
|
||||
</div>
|
||||
<br style="clear:both;" />
|
||||
|
||||
<?php
|
||||
if($limit_exceeded){
|
||||
echo '<br /><strong>Input text is too long!</strong><br />';
|
||||
}
|
||||
?>
|
||||
|
||||
<br />
|
||||
|
||||
<a href="htmLawedTest.php" title="[toggle visibility] htmLawed configuration" onclick="javascript:toggle('inputC'); return false;"><span class="notice">Settings »</span></a>
|
||||
|
||||
<div id="inputC" style="display: none;">
|
||||
<table summary="none">
|
||||
<tr>
|
||||
<td><span class="help" title="$config argument">Config:</span></td>
|
||||
<td><ul>
|
||||
|
||||
<?php
|
||||
$cfg = array(
|
||||
'abs_url'=>array('3', '0', 'absolute/relative URL conversion', '-1'),
|
||||
'and_mark'=>array('2', '0', 'mark original <em>&</em> chars', '0', 'd'=>1), // 'd' to disable
|
||||
'anti_link_spam'=>array('1', '0', 'modify <em>href</em> values as an anti-link spam measure', '0', array(array('30', '1', '', 'regex for extra <em>rel</em>'), array('30', '2', '', 'regex for no <em>href</em>'))),
|
||||
'anti_mail_spam'=>array('1', '0', 'replace <em>@</em> in <em>mailto:</em> URLs', '0', '8', 'NO@SPAM', 'replacement'),
|
||||
'balance'=>array('2', '1', 'fix nestings and balance tags', '0'),
|
||||
'base_url'=>array('', '', 'base URL', '25'),
|
||||
'cdata'=>array('4', 'nil', 'allow <em>CDATA</em> sections', 'nil'),
|
||||
'clean_ms_char'=>array('3', '0', 'replace bad characters introduced by Microsoft apps. like <em>Word</em>', '0'),
|
||||
'comment'=>array('4', 'nil', 'allow HTML comments', 'nil'),
|
||||
'css_expression'=>array('2', 'nil', 'allow dynamic expressions in CSS style properties', 'nil'),
|
||||
'deny_attribute'=>array('1', '0', 'denied attributes', '0', '50', '', 'these'),
|
||||
'elements'=>array('', '', 'allowed elements', '50'),
|
||||
'hexdec_entity'=>array('3', '1', 'convert hexadecimal numeric entities to decimal ones, or vice versa', '0'),
|
||||
'hook'=>array('', '', 'name of hook function', '25'),
|
||||
'hook_tag'=>array('', '', 'name of custom function to further check attribute values', '25'),
|
||||
'keep_bad'=>array('7', '6', 'keep, or remove <em>bad</em> tag content', '0'),
|
||||
'lc_std_val'=>array('2', '1', 'lower-case std. attribute values like <em>radio</em>', '0'),
|
||||
'make_tag_strict'=>array('3', 'nil', 'transform deprecated elements', 'nil'),
|
||||
'named_entity'=>array('2', '1', 'allow named entities, or convert numeric ones', '0'),
|
||||
'no_deprecated_attr'=>array('3', '1', 'allow deprecated attributes, or transform them', '0'),
|
||||
'parent'=>array('', 'div', 'name of parent element', '25'),
|
||||
'safe'=>array('2', '0', 'for most <em>safe</em> HTML', '0'),
|
||||
'schemes'=>array('', 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https', 'allowed URL protocols', '50'),
|
||||
'show_setting'=>array('', 'htmLawed_setting', 'variable name to record <em>finalized</em> htmLawed settings', '25', 'd'=>1),
|
||||
'style_pass'=>array('2', 'nil', 'do not look at <em>style</em> attribute values', 'nil'),
|
||||
'tidy'=>array('3', '0', 'beautify/compact', '-1', '8', '1t1', 'format'),
|
||||
'unique_ids'=>array('2', '1', 'unique <em>id</em> values', '0', '8', 'my_', 'prefix'),
|
||||
'valid_xhtml'=>array('2', 'nil', 'auto-set various parameters for most valid XHTML', 'nil'),
|
||||
'xml:lang'=>array('3', 'nil', 'auto-add <em>xml:lang</em> attribute', '0'),
|
||||
);
|
||||
foreach($cfg as $k=>$v){
|
||||
echo '<li>', $k, ': ';
|
||||
if(!empty($v[0])){ // input radio
|
||||
$j = $v[3];
|
||||
for($i = $j-1; ++$i < $v[0]+$v[3];++$j){
|
||||
echo '<input type="radio" name="h', $k, '" value="', $i, '"', (!isset($_POST['h'. $k]) ? ($v[1] == $i ? ' checked="checked"' : '') : ($_POST['h'. $k] == $i ? ' checked="checked"' : '')), (isset($v['d']) ? ' disabled="disabled"' : ''), ' />', $i, ' ';
|
||||
}
|
||||
if($v[1] == 'nil'){
|
||||
echo '<input type="radio" name="h', $k, '" value="nil"', ((!isset($_POST['h'. $k]) or $_POST['h'. $k] == 'nil') ? ' checked="checked"' : ''), (isset($v['d']) ? ' disabled="disabled"' : ''), ' />not set ';
|
||||
}
|
||||
if(!empty($v[4])){ // + input text box
|
||||
echo '<input type="radio" name="h', $k, '" value="', $j, '"', (((isset($_POST['h'. $k]) && $_POST['h'. $k] == $j) or (!isset($_POST['h'. $k]) && $j == $v[1])) ? ' checked="checked"' : ''), (isset($v['d']) ? ' disabled="disabled"' : ''), ' />';
|
||||
if(!is_array($v[4])){
|
||||
echo $v[6], ': <input type="text" size="', $v[4], '" name="h', $k. $j, '" value="', htmlspecialchars(isset($_POST['h'. $k. $j][0]) ? $_POST['h'. $k. $j] : $v[5]), '"', (isset($v['d']) ? ' disabled="disabled"' : ''), ' />';
|
||||
}
|
||||
else{
|
||||
foreach($v[4] as $z){
|
||||
echo ' ', $z[3], ': <input type="text" size="', $z[0], '" name="h', $k. $j. $z[1], '" value="', htmlspecialchars(isset($_POST['h'. $k. $j. $z[1]][0]) ? $_POST['h'. $k. $j. $z[1]] : $z[2]), '"', (isset($v['d']) ? ' disabled="disabled"' : ''), ' />';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
elseif(ctype_digit($v[3])){ // input text
|
||||
echo '<input type="text" size="', $v[3], '" name="h', $k, '" value="', htmlspecialchars(isset($_POST['h'. $k][0]) ? $_POST['h'. $k] : $v[1]), '"', (isset($v['d']) ? ' disabled="disabled"' : ''), ' />';
|
||||
}
|
||||
else{} // text-area
|
||||
echo ' <span class="help">', $v[2], '</span></li>';
|
||||
}
|
||||
echo '</ul></td></tr><tr><td><span style="vertical-align: top;" class="help" title="$spec argument: element-specific attribute rules">Spec:</span></td><td><textarea name="spec" id="spec" cols="70" rows="3" style="width:80%;">', htmlspecialchars((isset($_POST['spec']) ? $_POST['spec'] : '')), '</textarea></td></tr></table>';
|
||||
?>
|
||||
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<?php
|
||||
if($do){
|
||||
$cfg = array();
|
||||
foreach($_POST as $k=>$v){
|
||||
if($k[0] == 'h' && $v != 'nil'){
|
||||
$cfg[substr($k, 1)] = $v;
|
||||
}
|
||||
}
|
||||
|
||||
if($cfg['anti_link_spam'] && (!empty($cfg['anti_link_spam11']) or !empty($cfg['anti_link_spam12']))){
|
||||
$cfg['anti_link_spam'] = array($cfg['anti_link_spam11'], $cfg['anti_link_spam12']);
|
||||
}
|
||||
unset($cfg['anti_link_spam11'], $cfg['anti_link_spam12']);
|
||||
if($cfg['anti_mail_spam'] == 1){
|
||||
$cfg['anti_mail_spam'] = isset($cfg['anti_mail_spam1'][0]) ? $cfg['anti_mail_spam1'] : 0;
|
||||
}
|
||||
unset($cfg['anti_mail_spam11']);
|
||||
if($cfg['deny_attribute'] == 1){
|
||||
$cfg['deny_attribute'] = isset($cfg['deny_attribute1'][0]) ? $cfg['deny_attribute1'] : 0;
|
||||
}
|
||||
unset($cfg['deny_attribute1']);
|
||||
if($cfg['tidy'] == 2){
|
||||
$cfg['tidy'] = isset($cfg['tidy2'][0]) ? $cfg['tidy2'] : 0;
|
||||
}
|
||||
unset($cfg['tidy2']);
|
||||
if($cfg['unique_ids'] == 2){
|
||||
$cfg['unique_ids'] = isset($cfg['unique_ids2'][0]) ? $cfg['unique_ids2'] : 1;
|
||||
}
|
||||
unset($cfg['unique_ids2']);
|
||||
unset($cfg['and_mark']); // disabling and_mark
|
||||
|
||||
$cfg['show_setting'] = 'hlcfg';
|
||||
$st = microtime();
|
||||
$out = htmLawed($_POST['text'], $cfg, str_replace(array('$', '{'), '', $_POST['spec']));
|
||||
$et = microtime();
|
||||
echo '<br /><a href="htmLawedTest.php" title="[toggle visibility] syntax-highlighted" onclick="javascript:toggle(\'inputR\'); return false;"><span class="notice">Input code »</span></a> <span class="help" title="tags estimated as half of total > and < chars; values may be inaccurate for non-ASCII text"><small><big>', strlen($_POST['text']), '</big> chars, ~<big>', round((substr_count($_POST['text'], '>') + substr_count($_POST['text'], '<'))/2), '</big> tags</small> </span><div id="inputR" style="display: none;">', format($_POST['text']), '</div><script type="text/javascript">hl(\'inputR\');</script>', (!isset($_POST['text'][$_hlimit]) ? ' <a href="htmLawedTest.php" title="[toggle visibility] hexdump; non-viewable characters like line-returns are shown as dots" onclick="javascript:toggle(\'inputD\'); return false;"><span class="notice">Input binary » </span></a><div id="inputD" style="display: none;">'. hexdump($_POST['text']). '</div>' : ''), ' <a href="htmLawedTest.php" title="[toggle visibility] finalized internal settings as interpreted by htmLawed; for developers" onclick="javascript:toggle(\'settingF\'); return false;"><span class="notice">Finalized internal settings » </span></a> <div id="settingF" style="display: none;">', str_replace(array(' ', "\t", ' '), array(' ', ' ', ' '), nl2br(htmlspecialchars(print_r($GLOBALS['hlcfg']['config'], true)))), '</div><script type="text/javascript">hl(\'settingF\');</script>', '<br /><a href="htmLawedTest.php" title="[toggle visibility] suitable for copy-paste" onclick="javascript:toggle(\'outputF\'); return false;"><span class="notice">Output »</span></a> <span class="help" title="approx., server-specific value excluding the \'include()\' call"><small>htmLawed processing time <big>', number_format(((substr($et,0,9)) + (substr($et,-10)) - (substr($st,0,9)) - (substr($st,-10))),4), '</big> s</small></span>', (($mem = memory_get_peak_usage()) !== false ? '<span class="help"><small>, peak memory usage <big>'. round(($mem-$pre_mem)/1048576, 2). '</big> <small>MB</small>' : ''), '</small></span><div id="outputF" style="display: block;"><div><textarea id="text2" class="textarea" name="text2" rows="5" cols="100" style="width: 100%;">', htmlspecialchars($out), '</textarea></div><button type="button" onclick="javascript:document.getElementById(\'text2\').focus();document.getElementById(\'text2\').select()" title="select all to copy" style="float:right;">Select all</button>';
|
||||
if($_w3c_validate && $validation)
|
||||
{
|
||||
?>
|
||||
|
||||
<button type="button" title="HTML 4.01 W3C online validation" style="float: right;" onclick="javascript: sndValidn('text2', 'html401'); return false;" onkeypress="javascript: sndValidn('text2', 'html401'); return false;">Check HTML</button>
|
||||
<button type="button" title="XHTML 1.1 W3C online validation" style="float: right;" onclick="javascript: sndValidn('text2', 'xhtml110'); return false;" onkeypress="javascript: sndValidn('text2', 'xhtml110'); return false;">Check XHTML</button>
|
||||
|
||||
<?php
|
||||
}
|
||||
echo '</div><br /><a href="htmLawedTest.php" title="[toggle visibility] syntax-highlighted" onclick="javascript:toggle(\'outputR\'); return false;"><span class="notice">Output code »</span></a><div id="outputR" style="display: block;">', format($out), '</div><script type="text/javascript">hl(\'outputR\');</script>', (!isset($_POST['text'][$_hlimit]) ? '<br /><a href="htmLawedTest.php" title="[toggle visibility] hexdump; non-viewable characters like line-returns are shown as dots" onclick="javascript:toggle(\'outputD\'); return false;"><span class="notice">Output binary »</span></a><div id="outputD" style="display: none;">'. hexdump($out). '</div>' : ''), '<br /><a href="htmLawedTest.php" title="[toggle visibility] XHTML 1 Transitional doctype" onclick="javascript:toggle(\'outputH\'); return false;"><span class="notice">Output rendered »</span></a><div id="outputH" style="display: block;">', $out, '</div>';
|
||||
}
|
||||
else{
|
||||
?>
|
||||
|
||||
<br />
|
||||
|
||||
<div class="help">Use with a Javascript- and cookie-enabled, relatively new version of a common browser. <em>Submitted input will also be HTML-rendered (XHTML 1) after htmLawed-filtering.</em>
|
||||
|
||||
<?php echo (file_exists('./htmLawed_TESTCASE.txt') ? '<br /><br />You can use text from <a href="htmLawed_TESTCASE.txt"><span class="notice">this collection of test-cases</span></a> in the input. Set the character encoding of the browser to Unicode/utf-8 before copying.' : ''); ?>
|
||||
|
||||
<br /><br />For anti-XSS tests, try the <a href="http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawedSafeModeTest.php"><span class="notice">special test-page</span></a> or see <a href="http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/rsnake/RSnakeXSSTest.htm"><span class="notice">these results</span></a>.
|
||||
|
||||
<br /><br /><small>Change <em>Encoding</em> to reflect the character encoding of the input text. Even then, it may not work or some characters may not display properly because of variable browser support and because of the form interface. Developers can write some PHP code to capture the filtered input to a file if this is important.
|
||||
<br /><br />Refer to the htmLawed documentation (<a href="htmLawed_README.htm"><span class="notice">htm</span></a>/<a href="htmLawed_README.txt"><span class="notice">txt</span></a>) for details about <em>Settings</em>, and htmLawed's behavior and limitations. For <em>Settings</em>, incorrectly-specified values like regular expressions are silently ignored. One or more settings form-fields may have been disabled. Some characters are not allowed in the <em>Spec</em> field.
|
||||
|
||||
|
||||
<br /><br />Hovering the mouse over some of the text can provide additional information in some browsers.</small>
|
||||
|
||||
<?php
|
||||
if($_w3c_validate){
|
||||
?>
|
||||
|
||||
<small><br /><br />Because of character-encoding issues, the W3C validator (anyway not perfect) may reject validation requests or invalidate otherwise-valid code, esp. if text was copy-pasted in the input box. Local applications like the <em>HTML Validator</em> Firefox browser add-on may be useful in such cases.</small>
|
||||
|
||||
<?php
|
||||
}
|
||||
?>
|
||||
|
||||
</div>
|
||||
|
||||
<?php
|
||||
}
|
||||
?>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
1979
extlib/htmLawed/htmLawed_README.htm
Normal file
1979
extlib/htmLawed/htmLawed_README.htm
Normal file
File diff suppressed because it is too large
Load Diff
1600
extlib/htmLawed/htmLawed_README.txt
Normal file
1600
extlib/htmLawed/htmLawed_README.txt
Normal file
File diff suppressed because it is too large
Load Diff
370
extlib/htmLawed/htmLawed_TESTCASE.txt
Normal file
370
extlib/htmLawed/htmLawed_TESTCASE.txt
Normal file
@ -0,0 +1,370 @@
|
||||
/*
|
||||
htmLawed_TESTCASE.txt, 23 April 2009
|
||||
htmLawed 1.1.8.1, 16 July 2009
|
||||
Copyright Santosh Patnaik
|
||||
GPL v3 license
|
||||
A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed
|
||||
*/
|
||||
|
||||
This file has UTF-8-encoded text with both correct and incorrect/malformed HTML/XHTML code snippets to test htmLawed (test cases/samples). The entire text may also be used as a unit.
|
||||
|
||||
************************************************
|
||||
when viewing this file in a web browser, set the
|
||||
character encoding to Unicode/UTF-8
|
||||
************************************************
|
||||
|
||||
--------------------- start --------------------
|
||||
|
||||
<em>Try different $config and $spec values. Some text even when filtered in will not be displayed in a rendered web-page</em><br />
|
||||
|
||||
<h6>Attributes</h6>
|
||||
|
||||
<strong>Xml:lang:</strong><a lang="en" xml:lang="en"></a>, <a lang="en"></a>, <a xml:lang="en"></a><br />
|
||||
<strong>Standard, predefined value, or empty attribute:</strong> <input type="text" disabled />, <input type="text" disabled="DISABLED" />, <input type="text" disabled="1" /><br />
|
||||
<strong>Required:</strong> <img />, <img alt="image" /><br />
|
||||
<strong>Quote & space variation:</strong> <a id=id1 name=xy>a</a>, <a id='id2' name="xy">a</a>, <a id=' id3 ' name = "n" >a</a><br />
|
||||
<strong>Invalid:</strong> <a id="id4" src="s">a</a><br />
|
||||
<strong>Duplicated:</strong> <a id="id5" id="id6">a</a><br />
|
||||
<strong>Deprecated:</strong> <a id="id7" target="self" name="n">a</a>, <hr noshade="noshade" /><br />
|
||||
<strong>Casing:</strong> <a HREF=""></a><br />
|
||||
<strong>Admin-restricted?:</strong> <a href="x" onclick="alert();"></a>
|
||||
|
||||
<h6>Attribute values</h6>
|
||||
|
||||
<strong>Duplicate ID value:</strong><a id="id8"></a>, <a id="my_id8"></a>, <a id="id8"></a><br />
|
||||
(try 'my_' for prefix)<br />
|
||||
<strong>Double-quotes in value:</strong><a title=ab"c"></a>, <a title="ab"c"></a>, <a title='ab"c'></a><br />
|
||||
(try filter for CSS expression)<br />
|
||||
<strong>CSS expression</strong>: <div style="prop:expression();"></div><div style="prop:expression()"></div><div style="prop: expression();"></div><div style="prop : expression()"></div><div style="prop:expression(js);"></div><div style="prop:expression(js;)"></div><div style="prop: expression('js');"></div><div style="prop : expr ession('js':)"></div><div style="prop:expression( 'js@ );"></div><br />
|
||||
<strong>Other:</strong> <input size="50" class="my" value="an input an input an input" />, <input size="5" class="your" value="an input" /><br />
|
||||
(try 'maxlen', 'maxval', etc., for 'input' in '$spec')
|
||||
|
||||
<h6>Blockquotes</h6>
|
||||
|
||||
<blockquote>abc</blockquote><br />
|
||||
<blockquote>abc<div>def</div></blockquote><br />
|
||||
<blockquote><div>abc</div>def</blockquote><br />
|
||||
<blockquote>abc<div>def</div>ghi</blockquote><br />
|
||||
abc<div>def</div>ghi<br />
|
||||
(try with blockquote parent)
|
||||
|
||||
<h6>CDATA sections</h6>
|
||||
|
||||
<strong>Special characters inside:</strong> <![CDATA[ ]]> ]]>, <![CDATA[ 3 < 4 > 3.5, & 4 > 4 ]]><br />
|
||||
<strong>Normal:</strong> <![CDATA[ check ]]>, <em>CDATA follows:<![CDATA[ check ]]></em><br />
|
||||
<strong>Malformed:</strong> <![cdata check ]]>, < ![CDATA check ]]>, <![CDATA check ]]>, < ![CDATA check ] ]><br />
|
||||
<strong>Invalid:</strong> <em <![CDATA[ check ]]>>CDATA in tag content</em>, <table><![CDATA[ check ]]><tr><td>text not allowed</td></tr></table>
|
||||
|
||||
<h6>Complex-1: deprecated elements</h6>
|
||||
|
||||
<center>
|
||||
The PHP <s>software</s> script used for this <strike>web-page</strike> webpage is <font style="font-weight: bold " face=arial size='+3' color = "red ">htmLawedTest.php</font>, from <u style= 'color:green'>PHP Labware</u>.
|
||||
</center>
|
||||
|
||||
<h6>Complex-2: deprecated attributes</h6>
|
||||
|
||||
<img src="s" alt="a" name="n" /><img src="s" alt="a" id="id9" name="n" />
|
||||
<br clear="left" />
|
||||
<hr noshade size="1" />
|
||||
<img name="id10" src="s" align="left" alt="image" hspace="10" vspace="10" width="10em" height="20" border="1" style="padding:5px;" />
|
||||
<table width="50em" align="center" bgcolor="red">
|
||||
<tr>
|
||||
<td width="20%">
|
||||
<div align="center">
|
||||
<h3 align="right">Section</h3>
|
||||
<p align="right">Para</p>
|
||||
<ol type="a" start="e"><li value="x"><a name="x">First</a> <a name="x" id="id11">item</a></li></ol>
|
||||
</div>
|
||||
</td>
|
||||
<td width="*">
|
||||
<ol type="1"><li>First item</li></ol>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br clear="all" />
|
||||
|
||||
<h6>Complex-3: embed, object, area</h6>
|
||||
|
||||
<object width="425" height="350"><param name="movie" value="http://www.youtube.com/v/ls7gi1VwdIQ"></param><embed src="http://www.youtube.com/v/ls7gi1VwdIQ" type="application/x-shockwave-flash" width="425" height="350"></embed></object><br />
|
||||
|
||||
<embed src="http://www.youtube.com/v/ls7gi1VwdIQ" type="application/x-shockwave-flash" width="425" height="350"></embed><br />
|
||||
|
||||
<object data="1.gif" type="image/gif" usemap="#map1"><map name="map1">
|
||||
<p>navigate the site: <a href="1" shape="REct" coOrds="0,0,118,28">1</a> | <a href="3" shape="circle" coords="184,200,60">3</a> | <a href="4" shape="poly" coords="276,0,276,28,100,200,50,50,276,0">4</a></p>
|
||||
<area href="5" shape="Rect" coords="0,0,118,28">
|
||||
</map></object>
|
||||
|
||||
<h6>Complex-4: nested and other tables</h6>
|
||||
|
||||
<table border="1" bgcolor="red"> <tr> <td> Cell </td> <td colspan="2" rowspan="2"> <table border="1" bgcolor="green"> <tr> <td> Cell </td> <td colspan="2" rowspan="2"> </td> </tr> <tr> <td> Cell </td> </tr> <tr> <td> Cell </td> <td> Cell </td> <td> Cell </td> </tr> </table> </td> </tr> <tr> <td> Cell </td> </tr> <tr> <td> Cell </td> <td> Cell </td> <td> Cell </td> </tr> </table><br />
|
||||
<strong>PCDATA wrong:</strong> <table>Well<caption>Hello</caption></table><br />
|
||||
<strong>Missing tr:</strong> <table><td>Well</td></table><br />
|
||||
|
||||
<h6>Complex-5: pseudo, disallowed or non-HTML tags</h6>
|
||||
|
||||
(Try different 'keep_bad' values)
|
||||
<*> Pseudotags <*>
|
||||
<xml>Non-HTML tag xml</xml>
|
||||
<p>
|
||||
Disallowed tag p
|
||||
</p>
|
||||
<ul>Bad<li>OK</li></ul>
|
||||
|
||||
<h6>Elements</h6>
|
||||
|
||||
<strong>Unbalanced:</strong> <a href="h"><em>check</a></em><br />
|
||||
<strong>Non-XHTML:</strong> <div><center><dir></dir></center></div><br />
|
||||
<strong>Malformed:</strong> < a href=""></a>, <a href="" ></a>, <a href="" ></a>, <a href=""
|
||||
></a>, <a href="">< /a>, < a href=""></a >, <img src="s" alt="a" />, <img src="s" alt="a"/ >, <imgsrc="s" alt="a" /><br />
|
||||
<strong>Invalid:</strong> <image src="s" alt="a" /><br />
|
||||
<strong>Empty:</strong> <img src="s" alt="a" />, <img src="s" alt="a"></img>, <img src="s" alt="a">text</img><br />
|
||||
<strong>Content invalid:</strong> <a href="h">1<a>2</a></a><br />
|
||||
<strong>Content invalid?:</strong> <form></form><br /> (try setting 'form' as parent)
|
||||
<strong>Casing:</strong> <A href=""></a>
|
||||
|
||||
<h6>Entities</h6>
|
||||
|
||||
<strong>Special:</strong> & 3 < 2 & 5>4 and j >i >a & i<j>a<br />
|
||||
<strong>Padding:</strong> B B f f  <br />
|
||||
<strong>Malformed:</strong> & #x27;, &x27;, ' &TILDE;, &tilde<br />
|
||||
<strong>Invalid:</strong> , �, , �, , &bad;<br />
|
||||
<strong>Discouraged characters:</strong> , „, , <br />
|
||||
<strong>Context:</strong> '>', <?<br />
|
||||
<strong>Casing:</strong> ', ', &TILDE;, ˜
|
||||
<br />
|
||||
(also check named-to-numeric and hexdec-to-decimal, and vice versa, conversions)
|
||||
|
||||
<h6>Format</h6>
|
||||
|
||||
<strong>Valid but ill-formatted:</strong> text <!-- comment -->
|
||||
text <!--
|
||||
A c o m m e n t -->
|
||||
<script>
|
||||
<![CDATA[
|
||||
code
|
||||
]]>
|
||||
</script><!-- comment --><![CDATA[ cdata ]]> <a>text</b> text<pre id="none">p r e</pre>
|
||||
<textarea>text</textarea> <textarea>
|
||||
text text
|
||||
</textarea> text text <br /><hr />
|
||||
text <img src="none" alt="none" /> t<em class="none">e<strong>x</strong>t</em>
|
||||
text <img src="none" alt="none" /> <b>t<em> e <strong> x </strong> t</em></b>
|
||||
<a href="a"> text <img src="none" alt="none" /> <b>t <em> e <strong> x </strong> t</em></b>
|
||||
</a>
|
||||
<span style="background-color: yellow;">text <img src="none" alt="none" /> <b> <em> t e <strong> x </strong> t</em></b></span>
|
||||
<script>script</script>
|
||||
<div>
|
||||
<pre id="none">p <a>r</a> e <!-- comment --> </pre>
|
||||
<pre>
|
||||
pre
|
||||
</pre>
|
||||
</div>
|
||||
<div><div><table border="1" style="background-color: red;"><tr><td>Cell</td><td colspan="2" rowspan="2"><table border="1" style="background-color: green;"><tr><td>Cell</td><td colspan="2" rowspan="2"></td></tr><tr><td>Cell</td></tr><tr><td>Cell</td><td>Cell</td><td>Cell</td></tr></table></td></tr><tr><td>Cell</td></tr><tr><td>Cell</td><td>Cell</td><td>Cell</td></tr></table></div></div>
|
||||
(try to compact or beautify)
|
||||
|
||||
<h6>Forms</h6>
|
||||
|
||||
(note nesting of 'form', missing required attributes, etc.)<br />
|
||||
<form>
|
||||
<script type="text/javascript">s</script>
|
||||
<fieldset><legend>p</legend>l <input name="personal_lastname" type="text" tabindex="1"></fieldset>
|
||||
<input name="h" type="checkbox" value="h" tabindex="20"> h
|
||||
<textarea name="t">t</textarea>
|
||||
<form action="a" method="get"></form></form><br />
|
||||
<form action="b" method="get"><p><input type="text" value="i" /></form><br />
|
||||
<form>B:<input type="text" value="b" />C:<input type="text" value="c" /></form><br />
|
||||
(try each of these lines separately)<br />
|
||||
<form action="a">what<br />
|
||||
<form action="a">what
|
||||
(try with container as div and as form)<br />
|
||||
<form>c <a>a</a> <b>b</b><input /><script>s</script>
|
||||
|
||||
<h6>HTML comments (also CDATA)</h6>
|
||||
|
||||
Special characters inside: <!-- <![CDATA check ]]> -->, <!-- 3 < 4 > 3.5, & 4 > 4 -->, <!-- che--ck -->, <!--[if !IE]> <--><a>c</a><!--> <![endif]--><br />
|
||||
Normal: <!-- check -->, <!--check -->, <em>comment:<!-- check --></em><!-- check -->, <table><!-- check --><tr><td>text not allowed</td></tr></table><br />
|
||||
Malformed: <![cdata check ]]>, < ![CDATA check ]]>, < ![CDATA check ] ]><br />
|
||||
Invalid: <em <!-- check -->>comment in tag content</em>, <!--check-->
|
||||
|
||||
<h6>Ins-Del</h6>
|
||||
|
||||
(depending on context, these elements can be of either block or inline type)<br />
|
||||
<p><ins datetime="d" cite="c"><div>block</div></ins></p><br />
|
||||
<p><del>d</del></p><br />
|
||||
<p><ins><del>d</del></ins></p><div><ins><p><del><div>d</div></del></p></ins></div><ins><div>d</div></ins>
|
||||
|
||||
<h6>Lists</h6>
|
||||
|
||||
<strong>Invalid character data</strong>: <ul><li>(item</li>)</ul><br />
|
||||
<strong>Definition list</strong>: <dl><dt>a</dt>bad<dd>first <em>one</em></dd><dt>b</dt><dd>second</dd></dl><br />
|
||||
<strong>Definition list, close-tags omitted</strong>: <dl><dt>a</dt>bad<dd>first <em>one</em></dd><dt>b<dd>second</dl><br />
|
||||
<strong>Definition lists, nested</strong>: <dl>
|
||||
<dt>T1</dt>
|
||||
<dd>D1</dd>
|
||||
<dt>T2</dt>
|
||||
<dd>D2<dl><dt>t1</dt><dd>d1</dd><dt>t2</dt><dd>d2</dd></dl></dd>
|
||||
<dt>T3</dt>
|
||||
<dd>D3</dd>
|
||||
<dt>T4</dt>
|
||||
<dd>D4<dl><dt>t1</dt><dd>d1</dd></dl></dd>
|
||||
</dl><br />
|
||||
<strong>Definition lists, nested, close-tags omitted</strong>: <dl>
|
||||
<dt>T1
|
||||
<dd>D1</dd>
|
||||
<dt>T2</dt>
|
||||
<dd>D2<dl><dt>t1<dd>d1<dt>t2</dt><dd>d2</dd></dl></dd>
|
||||
<dt>T3
|
||||
<dd>D3
|
||||
<dt>T4
|
||||
<dd>D4<dl><dt>t1<dd>d1</dl></dd>
|
||||
</dl><br />
|
||||
<strong>Nested</strong>: <ul>
|
||||
<li>l1</li>
|
||||
<li>l2<ol><li>lo1</li><li>lo2</li></ol></li>
|
||||
<li>l3</li>
|
||||
<li>l4<ol><li>lo3</li><li>lo4<ol><li>lo5</li></ol></li></ol></li>
|
||||
</ul><br />
|
||||
<strong>Nested, close-tags omitted</strong>: <ul>
|
||||
<li>l1</li>
|
||||
<li>l2<ol><li>lo1<li>lo2</ol>
|
||||
<li>l3
|
||||
<li>l4<ol><li>lo3<li>lo4<ol><li>lo5</ol></ol>
|
||||
</ul><br />
|
||||
<strong>Complex</strong>:
|
||||
<ol><script></script><li><table><tr><td>
|
||||
<ul><li id="search" class="widget widget_search"> <form id="searchform" method="get" action="http://kohei.us">
|
||||
<div>
|
||||
|
||||
<input type="text" name="s" id="s" size="15" /><br />
|
||||
<input type="submit" value="Search" />
|
||||
</div>
|
||||
</form>
|
||||
</li></ul>
|
||||
</td></tr></table></li></ol>
|
||||
|
||||
<h6>Non-English text-1</h6>
|
||||
|
||||
Inscrieţi-vă acum la a Zecea Conferinţă Internaţională<br />
|
||||
გთხოვთ ახლავე გაიაროთ რეგისტრაცია<br />
|
||||
večjezično računalništvo<br />
|
||||
<a title="הירשמו
|
||||
כעת לכנס ">Зарегистрируйтесь сейчас
|
||||
на Десятую Международную Конференцию по</a><br />
|
||||
(this file should have utf-8 encoding; some characters may not be displayed because of missing fonts, etc.)
|
||||
|
||||
<h6>Non-English text-2: entities</h6>
|
||||
|
||||
用统一码<br />
|
||||
გთხოვთ<br />
|
||||
Inscreva-se agora para a Décima Conferência Internacional Sobre O Unicode, realizada entre os dias 10 e 12 de março de 1997 em Mainz
|
||||
na Alemanha.
|
||||
|
||||
<h6>Ruby</h6>
|
||||
|
||||
(need compatible browser)<br />
|
||||
<ruby xml:lang="ja">
|
||||
<rbc>
|
||||
<rb>斎</rb>
|
||||
<rb>藤</rb>
|
||||
<rb>信</rb>
|
||||
<rb>男</rb>
|
||||
</rbc>
|
||||
<rtc class="reading">
|
||||
<rt>さい</rt>
|
||||
<rt>とう</rt>
|
||||
<rt>のぶ</rt>
|
||||
<rt>お</rt>
|
||||
</rtc>
|
||||
<rtc class="annotation">
|
||||
<rt rbspan="4" xml:lang="en">W3C Associate Chairman</rt>
|
||||
</rtc>
|
||||
</ruby><br />
|
||||
<ruby>
|
||||
<rb>WWW</rb>
|
||||
<rp>(</rp><rt>World Wide Web</rt><rp>)</rp>
|
||||
</ruby><br />
|
||||
<ruby>
|
||||
A
|
||||
<rp>(</rp><rt>aaa</rt><rp>)</rp>
|
||||
</ruby>
|
||||
|
||||
<h6>Tables</h6>
|
||||
|
||||
<strong>Omitted closing tags:</strong> <table>
|
||||
<colgroup><col style="x" /><col style="y" />
|
||||
<thead>
|
||||
<tr><th>h1c1<th>h1c2
|
||||
<tbody>
|
||||
<tr><td>r1c1<td>r1c2
|
||||
<tr><td>r2c1<td>r2c2
|
||||
</table><br />
|
||||
<strong>Nested, omitted closing tags:</strong> <table>
|
||||
<colgroup><col style="x" /><col style="y" />
|
||||
<thead>
|
||||
<tr><th>h1c1<th>h1c2
|
||||
<tbody>
|
||||
<tr><td>r1c1<td>r1c2<table>
|
||||
<colgroup><col style="x" /><col style="y" />
|
||||
<thead>
|
||||
<tr><th>h1c1<th>h1c2
|
||||
<tbody>
|
||||
<tr><td>r1c1<td>r1c2
|
||||
<tr><td>r2c1<td>r2c2
|
||||
</table>
|
||||
<tr><td>r2c1<td>r2c2
|
||||
</table><br />
|
||||
|
||||
<h6>URLs</h6>
|
||||
|
||||
<strong>Relative and absolute:</strong> <a href="mailto:x"></a>, <a href="http://a.com/b/c/d.f"></a>, <a href="./../d.f"></a>, <a href="./d.f"></a>, <a href="d.f"></a>, <a href="#s"></a>, <a href="./../../d.f#s"></a><br />
|
||||
(try base URL value of 'http://a.com/b/')<br />
|
||||
<strong>CSS URLs:</strong> <div style="background-image: url('a.gif');"></div>, <div style="background-image: URL("a.gif");"></div>, <div style="background-image: url('http://a.com/a.gif');"></div>, <div style="background-image: url('./../a.gif');"></div>, <div style="background-image: url('js:xss')"></div><br />
|
||||
<strong>Anti-spam:</strong> (try regex for 'http://a.com', etc.) <a href="mailto:x@y.com"></a>, <a href="http://a.com/b@d.f"></a>, <a href="a.com/d.f" rel="nofollow"></a>, <a href="a.com/d.f" rel="1, 2"></a>, <a href="a.com/d.f"></a>, <a href="b.com/d.f"></a>, <a href="c.com/d.f"></a><br />
|
||||
|
||||
<h6>XSS</h6>
|
||||
|
||||
'';!--"<xss>=&{()}<br />
|
||||
<img src="javascript%3Aalert('xss');" /><br />
|
||||
<img src="javascript:alert('xss');" /><br />
|
||||
<img src="java script:alert('xss');" /><br />
|
||||
<img
|
||||
src=javascript:alert('XSS') /><br />
|
||||
<div style="javascript:alert('xss');"></div><br />
|
||||
<div style="background-image:url(javascript:alert('xss'));"></div><br />
|
||||
<div style="background-image:url("javascript:alert('xss')" );"></div><br />
|
||||
<!--[if gte IE 4]><script>alert('xss');</script><![endif]--><br />
|
||||
<script a=">" src="http://ha.ckers.org/xss.js"></script><br />
|
||||
<div style="background-image: url('js:xss')"></div><br />
|
||||
<a style=";-moz-binding:url(http://lukasz.pilorz.net/xss/xss.xml#xss)" href="http://example.com">test</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="http://x&x=%22+style%3d%22background-image%3a+expression%28alert
|
||||
%28%27xss%3f%29%29">x</a><br />
|
||||
<strong>Bad IE7:</strong> <a style=color:expr/*comment*/ession(alert(document.domain))>xxx</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="xxx" style="background: expression(alert('xss'));">xxx</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="xxx" style="background: expression(alert('xss'));">xxx</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="xxx" style="background: %45xpression(alert('xss'));">xxx</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="xxx" style="background:/**/expression(alert('xss'));">xxx</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="xxx" style="background:/**/Expression(alert('xss'));">xxx</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="xxx" style="background:/**/Expression(alert('xss'));">xxx</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="xxx" style="background: expr%45ssion(alert('xss'));">xxx</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="xxx" style="background: exp/* */ression(alert('xss'));">xxx</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="xxx" style="background: exp /* */ression(alert('xss'));">xxx</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="xxx" style="background: exp/ * * /ression(alert('xss'));">xxx</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="xxx" style="background:/* x */expression(alert('xss'));">xxx</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="xxx" style="background:/* */ */expression(alert('xss'));">xxx</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="x" style="width: /****/**;;;;;;*/expression/**/(alert('xss'));">x</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="x" style="padding:10px; background:/**/expression(alert('xss'));">x</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="x" style="background: huh /* */ */expression(alert('xss'));">x</a><br />
|
||||
<strong>Bad IE7:</strong> <a href="x" style="background:/**/expression(alert('xss'));background:/**/expression(alert('xss'));">x</a><br />
|
||||
<strong>Bad IE7:</strong> exp/*<a style='no\xss:noxss("*//*");xss:ex/*XSS*//*/*/pression(alert("XSS"))'>x</a><br />
|
||||
<strong>Bad IE7:</strong> <a style="background:Expre\ssion(alert('xss'));">hi</a><br />
|
||||
<strong>Bad IE7:</strong> <a style="background:expre\ssion(alert('xss'));">hi</a><br />
|
||||
<strong>Bad IE7:</strong> <a style="color: \0065 \0078 \0070 \0072 \0065 \0073 \0073 \0069 \006f \006e \0028 \0061 \006c \0065 \0072 \0074 \0028 \0031 \0029 \0029">test</a><br />
|
||||
<strong>Bad IE7:</strong> <a style="xss:e\0078pression(window.x?0:(alert(/XSS/),window.x=1));">hi</a><br />
|
||||
<strong>Bad IE7:</strong> <a style="background:url('java
|
||||
script:eval(document.all.mycode.expr)')">hi</a><br />
|
||||
|
||||
<h6>Other</h6>
|
||||
|
||||
3 < 4 <br />
|
||||
3 > 4 <br />
|
||||
> 3 <br />
|
@ -340,7 +340,12 @@ class Attachment extends AttachmentListItem
|
||||
case 'video':
|
||||
case 'link':
|
||||
if (!empty($this->oembed->html)) {
|
||||
$this->out->raw($this->oembed->html);
|
||||
require_once INSTALLDIR.'/extlib/htmLawed/htmLawed.php';
|
||||
$config = array(
|
||||
'safe'=>1,
|
||||
'elements'=>'*+object+embed');
|
||||
$this->out->raw(htmLawed($this->oembed->html,$config));
|
||||
//$this->out->raw($this->oembed->html);
|
||||
}
|
||||
break;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user