231 lines
		
	
	
		
			7.2 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
		
		
			
		
	
	
			231 lines
		
	
	
		
			7.2 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
|   | <?php | ||
|  | 
 | ||
|  | /** | ||
|  |  * Abstract class for a set of proprietary modules that clean up (tidy) | ||
|  |  * poorly written HTML. | ||
|  |  * @todo Figure out how to protect some of these methods/properties | ||
|  |  */ | ||
|  | class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule | ||
|  | { | ||
|  |     /** | ||
|  |      * List of supported levels. | ||
|  |      * Index zero is a special case "no fixes" level. | ||
|  |      * @type array | ||
|  |      */ | ||
|  |     public $levels = array(0 => 'none', 'light', 'medium', 'heavy'); | ||
|  | 
 | ||
|  |     /** | ||
|  |      * Default level to place all fixes in. | ||
|  |      * Disabled by default. | ||
|  |      * @type string | ||
|  |      */ | ||
|  |     public $defaultLevel = null; | ||
|  | 
 | ||
|  |     /** | ||
|  |      * Lists of fixes used by getFixesForLevel(). | ||
|  |      * Format is: | ||
|  |      *      HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2'); | ||
|  |      * @type array | ||
|  |      */ | ||
|  |     public $fixesForLevel = array( | ||
|  |         'light' => array(), | ||
|  |         'medium' => array(), | ||
|  |         'heavy' => array() | ||
|  |     ); | ||
|  | 
 | ||
|  |     /** | ||
|  |      * Lazy load constructs the module by determining the necessary | ||
|  |      * fixes to create and then delegating to the populate() function. | ||
|  |      * @param HTMLPurifier_Config $config | ||
|  |      * @todo Wildcard matching and error reporting when an added or | ||
|  |      *       subtracted fix has no effect. | ||
|  |      */ | ||
|  |     public function setup($config) | ||
|  |     { | ||
|  |         // create fixes, initialize fixesForLevel
 | ||
|  |         $fixes = $this->makeFixes(); | ||
|  |         $this->makeFixesForLevel($fixes); | ||
|  | 
 | ||
|  |         // figure out which fixes to use
 | ||
|  |         $level = $config->get('HTML.TidyLevel'); | ||
|  |         $fixes_lookup = $this->getFixesForLevel($level); | ||
|  | 
 | ||
|  |         // get custom fix declarations: these need namespace processing
 | ||
|  |         $add_fixes = $config->get('HTML.TidyAdd'); | ||
|  |         $remove_fixes = $config->get('HTML.TidyRemove'); | ||
|  | 
 | ||
|  |         foreach ($fixes as $name => $fix) { | ||
|  |             // needs to be refactored a little to implement globbing
 | ||
|  |             if (isset($remove_fixes[$name]) || | ||
|  |                 (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name]))) { | ||
|  |                 unset($fixes[$name]); | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         // populate this module with necessary fixes
 | ||
|  |         $this->populate($fixes); | ||
|  |     } | ||
|  | 
 | ||
|  |     /** | ||
|  |      * Retrieves all fixes per a level, returning fixes for that specific | ||
|  |      * level as well as all levels below it. | ||
|  |      * @param string $level level identifier, see $levels for valid values | ||
|  |      * @return array Lookup up table of fixes | ||
|  |      */ | ||
|  |     public function getFixesForLevel($level) | ||
|  |     { | ||
|  |         if ($level == $this->levels[0]) { | ||
|  |             return array(); | ||
|  |         } | ||
|  |         $activated_levels = array(); | ||
|  |         for ($i = 1, $c = count($this->levels); $i < $c; $i++) { | ||
|  |             $activated_levels[] = $this->levels[$i]; | ||
|  |             if ($this->levels[$i] == $level) { | ||
|  |                 break; | ||
|  |             } | ||
|  |         } | ||
|  |         if ($i == $c) { | ||
|  |             trigger_error( | ||
|  |                 'Tidy level ' . htmlspecialchars($level) . ' not recognized', | ||
|  |                 E_USER_WARNING | ||
|  |             ); | ||
|  |             return array(); | ||
|  |         } | ||
|  |         $ret = array(); | ||
|  |         foreach ($activated_levels as $level) { | ||
|  |             foreach ($this->fixesForLevel[$level] as $fix) { | ||
|  |                 $ret[$fix] = true; | ||
|  |             } | ||
|  |         } | ||
|  |         return $ret; | ||
|  |     } | ||
|  | 
 | ||
|  |     /** | ||
|  |      * Dynamically populates the $fixesForLevel member variable using | ||
|  |      * the fixes array. It may be custom overloaded, used in conjunction | ||
|  |      * with $defaultLevel, or not used at all. | ||
|  |      * @param array $fixes | ||
|  |      */ | ||
|  |     public function makeFixesForLevel($fixes) | ||
|  |     { | ||
|  |         if (!isset($this->defaultLevel)) { | ||
|  |             return; | ||
|  |         } | ||
|  |         if (!isset($this->fixesForLevel[$this->defaultLevel])) { | ||
|  |             trigger_error( | ||
|  |                 'Default level ' . $this->defaultLevel . ' does not exist', | ||
|  |                 E_USER_ERROR | ||
|  |             ); | ||
|  |             return; | ||
|  |         } | ||
|  |         $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes); | ||
|  |     } | ||
|  | 
 | ||
|  |     /** | ||
|  |      * Populates the module with transforms and other special-case code | ||
|  |      * based on a list of fixes passed to it | ||
|  |      * @param array $fixes Lookup table of fixes to activate | ||
|  |      */ | ||
|  |     public function populate($fixes) | ||
|  |     { | ||
|  |         foreach ($fixes as $name => $fix) { | ||
|  |             // determine what the fix is for
 | ||
|  |             list($type, $params) = $this->getFixType($name); | ||
|  |             switch ($type) { | ||
|  |                 case 'attr_transform_pre': | ||
|  |                 case 'attr_transform_post': | ||
|  |                     $attr = $params['attr']; | ||
|  |                     if (isset($params['element'])) { | ||
|  |                         $element = $params['element']; | ||
|  |                         if (empty($this->info[$element])) { | ||
|  |                             $e = $this->addBlankElement($element); | ||
|  |                         } else { | ||
|  |                             $e = $this->info[$element]; | ||
|  |                         } | ||
|  |                     } else { | ||
|  |                         $type = "info_$type"; | ||
|  |                         $e = $this; | ||
|  |                     } | ||
|  |                     // PHP does some weird parsing when I do
 | ||
|  |                     // $e->$type[$attr], so I have to assign a ref.
 | ||
|  |                     $f =& $e->$type; | ||
|  |                     $f[$attr] = $fix; | ||
|  |                     break; | ||
|  |                 case 'tag_transform': | ||
|  |                     $this->info_tag_transform[$params['element']] = $fix; | ||
|  |                     break; | ||
|  |                 case 'child': | ||
|  |                 case 'content_model_type': | ||
|  |                     $element = $params['element']; | ||
|  |                     if (empty($this->info[$element])) { | ||
|  |                         $e = $this->addBlankElement($element); | ||
|  |                     } else { | ||
|  |                         $e = $this->info[$element]; | ||
|  |                     } | ||
|  |                     $e->$type = $fix; | ||
|  |                     break; | ||
|  |                 default: | ||
|  |                     trigger_error("Fix type $type not supported", E_USER_ERROR); | ||
|  |                     break; | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  |     /** | ||
|  |      * Parses a fix name and determines what kind of fix it is, as well | ||
|  |      * as other information defined by the fix | ||
|  |      * @param $name String name of fix | ||
|  |      * @return array(string $fix_type, array $fix_parameters) | ||
|  |      * @note $fix_parameters is type dependant, see populate() for usage | ||
|  |      *       of these parameters | ||
|  |      */ | ||
|  |     public function getFixType($name) | ||
|  |     { | ||
|  |         // parse it
 | ||
|  |         $property = $attr = null; | ||
|  |         if (strpos($name, '#') !== false) { | ||
|  |             list($name, $property) = explode('#', $name); | ||
|  |         } | ||
|  |         if (strpos($name, '@') !== false) { | ||
|  |             list($name, $attr) = explode('@', $name); | ||
|  |         } | ||
|  | 
 | ||
|  |         // figure out the parameters
 | ||
|  |         $params = array(); | ||
|  |         if ($name !== '') { | ||
|  |             $params['element'] = $name; | ||
|  |         } | ||
|  |         if (!is_null($attr)) { | ||
|  |             $params['attr'] = $attr; | ||
|  |         } | ||
|  | 
 | ||
|  |         // special case: attribute transform
 | ||
|  |         if (!is_null($attr)) { | ||
|  |             if (is_null($property)) { | ||
|  |                 $property = 'pre'; | ||
|  |             } | ||
|  |             $type = 'attr_transform_' . $property; | ||
|  |             return array($type, $params); | ||
|  |         } | ||
|  | 
 | ||
|  |         // special case: tag transform
 | ||
|  |         if (is_null($property)) { | ||
|  |             return array('tag_transform', $params); | ||
|  |         } | ||
|  | 
 | ||
|  |         return array($property, $params); | ||
|  | 
 | ||
|  |     } | ||
|  | 
 | ||
|  |     /** | ||
|  |      * Defines all fixes the module will perform in a compact | ||
|  |      * associative array of fix name to fix implementation. | ||
|  |      * @return array | ||
|  |      */ | ||
|  |     public function makeFixes() | ||
|  |     { | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | // vim: et sw=4 sts=4
 |