475 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
		
		
			
		
	
	
			475 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| 
								 | 
							
								<?php
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* 
							 | 
						||
| 
								 | 
							
									
							 | 
						||
| 
								 | 
							
									hKit Library for PHP5 - a generic library for parsing Microformats
							 | 
						||
| 
								 | 
							
									Copyright (C) 2006  Drew McLellan
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									This library is free software; you can redistribute it and/or
							 | 
						||
| 
								 | 
							
									modify it under the terms of the GNU Lesser General Public
							 | 
						||
| 
								 | 
							
									License as published by the Free Software Foundation; either
							 | 
						||
| 
								 | 
							
									version 2.1 of the License, or (at your option) any later version.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									This library is distributed in the hope that it will be useful,
							 | 
						||
| 
								 | 
							
									but WITHOUT ANY WARRANTY; without even the implied warranty of
							 | 
						||
| 
								 | 
							
									MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
							 | 
						||
| 
								 | 
							
									Lesser General Public License for more details.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									You should have received a copy of the GNU Lesser General Public
							 | 
						||
| 
								 | 
							
									License along with this library; if not, write to the Free Software
							 | 
						||
| 
								 | 
							
									Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
							 | 
						||
| 
								 | 
							
									
							 | 
						||
| 
								 | 
							
									Author	
							 | 
						||
| 
								 | 
							
										Drew McLellan - http://allinthehead.com/
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
									Contributors:
							 | 
						||
| 
								 | 
							
										Scott Reynen - http://www.randomchaos.com/
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
									Version 0.5, 22-Jul-2006
							 | 
						||
| 
								 | 
							
										fixed by-ref issue cropping up in PHP 5.0.5
							 | 
						||
| 
								 | 
							
										fixed a bug with a@title
							 | 
						||
| 
								 | 
							
										added support for new fn=n optimisation
							 | 
						||
| 
								 | 
							
										added support for new a.include include-pattern
							 | 
						||
| 
								 | 
							
									Version 0.4, 23-Jun-2006
							 | 
						||
| 
								 | 
							
										prevented nested includes from causing infinite loops
							 | 
						||
| 
								 | 
							
										returns false if URL can't be fetched
							 | 
						||
| 
								 | 
							
										added pre-flight check for base support level
							 | 
						||
| 
								 | 
							
										added deduping of once-only classnames
							 | 
						||
| 
								 | 
							
										prevented accumulation of multiple 'value' values
							 | 
						||
| 
								 | 
							
										tuned whitespace handling and treatment of DEL elements
							 | 
						||
| 
								 | 
							
									Version 0.3, 21-Jun-2006
							 | 
						||
| 
								 | 
							
										added post-processor callback method into profiles
							 | 
						||
| 
								 | 
							
										fixed minor problems raised by hcard testsuite
							 | 
						||
| 
								 | 
							
										added support for include-pattern
							 | 
						||
| 
								 | 
							
										added support for td@headers pattern
							 | 
						||
| 
								 | 
							
										added implied-n optimization into default hcard profile
							 | 
						||
| 
								 | 
							
									Version 0.2, 20-Jun-2006
							 | 
						||
| 
								 | 
							
										added class callback mechanism
							 | 
						||
| 
								 | 
							
										added resolvePath & resolveEmail
							 | 
						||
| 
								 | 
							
										added basic BASE support
							 | 
						||
| 
								 | 
							
									Version 0.1.1, 19-Jun-2006 (different timezone, no time machine)
							 | 
						||
| 
								 | 
							
										added external Tidy option
							 | 
						||
| 
								 | 
							
									Version 0.1, 20-Jun-2006
							 | 
						||
| 
								 | 
							
										initial release
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
									
							 | 
						||
| 
								 | 
							
									
							 | 
						||
| 
								 | 
							
									
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									class hKit
							 | 
						||
| 
								 | 
							
									{
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										public $tidy_mode	= 'proxy'; // 'proxy', 'exec', 'php' or 'none'
							 | 
						||
| 
								 | 
							
										public $tidy_proxy	= 'http://cgi.w3.org/cgi-bin/tidy?forceXML=on&docAddr='; // required only for tidy_mode=proxy
							 | 
						||
| 
								 | 
							
										public $tmp_dir		= '/path/to/writable/dir/'; // required only for tidy_mode=exec
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										private $root_class = '';
							 | 
						||
| 
								 | 
							
										private $classes	= '';
							 | 
						||
| 
								 | 
							
										private $singles	= '';
							 | 
						||
| 
								 | 
							
										private $required	= '';
							 | 
						||
| 
								 | 
							
										private $att_map	= '';
							 | 
						||
| 
								 | 
							
										private $callbacks	= '';
							 | 
						||
| 
								 | 
							
										private $processor 	= '';
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										private $url		= '';
							 | 
						||
| 
								 | 
							
										private $base 		= '';
							 | 
						||
| 
								 | 
							
										private $doc		= '';
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										public function hKit()
							 | 
						||
| 
								 | 
							
										{
							 | 
						||
| 
								 | 
							
											// pre-flight checks
							 | 
						||
| 
								 | 
							
											$pass 		= true; 
							 | 
						||
| 
								 | 
							
											$required	= array('dom_import_simplexml', 'file_get_contents', 'simplexml_load_string');
							 | 
						||
| 
								 | 
							
											$missing	= array();
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											foreach ($required as $f){
							 | 
						||
| 
								 | 
							
												if (!function_exists($f)){
							 | 
						||
| 
								 | 
							
													$pass		= false;
							 | 
						||
| 
								 | 
							
													$missing[] 	= $f . '()';
							 | 
						||
| 
								 | 
							
												}
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											if (!$pass)
							 | 
						||
| 
								 | 
							
												die('hKit error: these required functions are not available: <strong>' . implode(', ', $missing) . '</strong>');
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										public function getByURL($profile='', $url='')
							 | 
						||
| 
								 | 
							
										{
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											if ($profile=='' || $url == '') return false;
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											$this->loadProfile($profile);
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											$source		= $this->loadURL($url);
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											if ($source){
							 | 
						||
| 
								 | 
							
												$tidy_xhtml	= $this->tidyThis($source);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
												$fragment	= false;
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
												if (strrchr($url, '#'))
							 | 
						||
| 
								 | 
							
												$fragment	= array_pop(explode('#', $url));
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
												$doc		= $this->loadDoc($tidy_xhtml, $fragment);
							 | 
						||
| 
								 | 
							
												$s			= $this->processNodes($doc, $this->classes);
							 | 
						||
| 
								 | 
							
												$s			= $this->postProcess($profile, $s);
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
												return $s;
							 | 
						||
| 
								 | 
							
											}else{
							 | 
						||
| 
								 | 
							
												return false;
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										public function getByString($profile='', $input_xml='')
							 | 
						||
| 
								 | 
							
										{
							 | 
						||
| 
								 | 
							
											if ($profile=='' || $input_xml == '') return false;
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											$this->loadProfile($profile);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											$doc	= $this->loadDoc($input_xml);
							 | 
						||
| 
								 | 
							
											$s		= $this->processNodes($doc, $this->classes);
							 | 
						||
| 
								 | 
							
											$s		= $this->postProcess($profile, $s);
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											return $s;
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										private function processNodes($items, $classes, $allow_includes=true){
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											$out	= array();
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											foreach($items as $item){
							 | 
						||
| 
								 | 
							
												$data	= array();
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
												for ($i=0; $i<sizeof($classes); $i++){
							 | 
						||
| 
								 | 
							
													
							 | 
						||
| 
								 | 
							
													if (!is_array($classes[$i])){
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
														$xpath			= ".//*[contains(concat(' ',normalize-space(@class),' '),' " . $classes[$i] . " ')]";
							 | 
						||
| 
								 | 
							
														$results		= $item->xpath($xpath);
							 | 
						||
| 
								 | 
							
														
							 | 
						||
| 
								 | 
							
														if ($results){
							 | 
						||
| 
								 | 
							
															foreach ($results as $result){ 
							 | 
						||
| 
								 | 
							
																if (isset($classes[$i+1]) && is_array($classes[$i+1])){
							 | 
						||
| 
								 | 
							
																	$nodes				= $this->processNodes($results, $classes[$i+1]);
							 | 
						||
| 
								 | 
							
																	if (sizeof($nodes) > 0){
							 | 
						||
| 
								 | 
							
																		$nodes = array_merge(array('text'=>$this->getNodeValue($result, $classes[$i])), $nodes);
							 | 
						||
| 
								 | 
							
																		$data[$classes[$i]]	= $nodes;
							 | 
						||
| 
								 | 
							
																	}else{
							 | 
						||
| 
								 | 
							
																		$data[$classes[$i]]	= $this->getNodeValue($result, $classes[$i]);
							 | 
						||
| 
								 | 
							
																	}
							 | 
						||
| 
								 | 
							
																	
							 | 
						||
| 
								 | 
							
																}else{								
							 | 
						||
| 
								 | 
							
																	if (isset($data[$classes[$i]])){
							 | 
						||
| 
								 | 
							
																		if (is_array($data[$classes[$i]])){
							 | 
						||
| 
								 | 
							
																			// is already an array - append
							 | 
						||
| 
								 | 
							
																			$data[$classes[$i]][]	= $this->getNodeValue($result, $classes[$i]);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
																		}else{
							 | 
						||
| 
								 | 
							
																			// make it an array
							 | 
						||
| 
								 | 
							
																			if ($classes[$i] == 'value'){ // unless it's the 'value' of a type/value pattern
							 | 
						||
| 
								 | 
							
																				$data[$classes[$i]] .= $this->getNodeValue($result, $classes[$i]);
							 | 
						||
| 
								 | 
							
																			}else{
							 | 
						||
| 
								 | 
							
																				$old_val			= $data[$classes[$i]];
							 | 
						||
| 
								 | 
							
																				$data[$classes[$i]]	= array($old_val, $this->getNodeValue($result, $classes[$i]));
							 | 
						||
| 
								 | 
							
																				$old_val			= false;
							 | 
						||
| 
								 | 
							
																			}
							 | 
						||
| 
								 | 
							
																		}
							 | 
						||
| 
								 | 
							
																	}else{										
							 | 
						||
| 
								 | 
							
																		// set as normal value
							 | 
						||
| 
								 | 
							
																		$data[$classes[$i]]	= $this->getNodeValue($result, $classes[$i]);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
																	}
							 | 
						||
| 
								 | 
							
																}
							 | 
						||
| 
								 | 
							
															
							 | 
						||
| 
								 | 
							
																// td@headers pattern
							 | 
						||
| 
								 | 
							
																if (strtoupper(dom_import_simplexml($result)->tagName)== "TD" && $result['headers']){
							 | 
						||
| 
								 | 
							
																	$include_ids	= explode(' ', $result['headers']);
							 | 
						||
| 
								 | 
							
																	$doc			= $this->doc;
							 | 
						||
| 
								 | 
							
																	foreach ($include_ids as $id){
							 | 
						||
| 
								 | 
							
																		$xpath			= "//*[@id='$id']/..";
							 | 
						||
| 
								 | 
							
																		$includes		= $doc->xpath($xpath);
							 | 
						||
| 
								 | 
							
																		foreach ($includes as $include){
							 | 
						||
| 
								 | 
							
																			$tmp = $this->processNodes($include, $this->classes);
							 | 
						||
| 
								 | 
							
																			if (is_array($tmp)) $data = array_merge($data, $tmp);
							 | 
						||
| 
								 | 
							
																		}
							 | 
						||
| 
								 | 
							
																	}
							 | 
						||
| 
								 | 
							
																}
							 | 
						||
| 
								 | 
							
															}					
							 | 
						||
| 
								 | 
							
														}				
							 | 
						||
| 
								 | 
							
													}
							 | 
						||
| 
								 | 
							
													$result	= false;
							 | 
						||
| 
								 | 
							
												}
							 | 
						||
| 
								 | 
							
												
							 | 
						||
| 
								 | 
							
												// include-pattern
							 | 
						||
| 
								 | 
							
												if ($allow_includes){
							 | 
						||
| 
								 | 
							
													$xpath			= ".//*[contains(concat(' ',normalize-space(@class),' '),' include ')]";
							 | 
						||
| 
								 | 
							
													$results		= $item->xpath($xpath);
							 | 
						||
| 
								 | 
							
												
							 | 
						||
| 
								 | 
							
													if ($results){
							 | 
						||
| 
								 | 
							
														foreach ($results as $result){
							 | 
						||
| 
								 | 
							
															$tagName = strtoupper(dom_import_simplexml($result)->tagName);
							 | 
						||
| 
								 | 
							
															if ((($tagName == "OBJECT" && $result['data']) || ($tagName == "A" && $result['href'])) 
							 | 
						||
| 
								 | 
							
																	&& preg_match('/\binclude\b/', $result['class'])){	
							 | 
						||
| 
								 | 
							
																$att		= ($tagName == "OBJECT" ? 'data' : 'href');						
							 | 
						||
| 
								 | 
							
																$id			= str_replace('#', '', $result[$att]);
							 | 
						||
| 
								 | 
							
																$doc		= $this->doc;
							 | 
						||
| 
								 | 
							
																$xpath		= "//*[@id='$id']";
							 | 
						||
| 
								 | 
							
																$includes	= $doc->xpath($xpath);
							 | 
						||
| 
								 | 
							
																foreach ($includes as $include){
							 | 
						||
| 
								 | 
							
																	$include	= simplexml_load_string('<root1><root2>'.$include->asXML().'</root2></root1>'); // don't ask.
							 | 
						||
| 
								 | 
							
																	$tmp 		= $this->processNodes($include, $this->classes, false);
							 | 
						||
| 
								 | 
							
																	if (is_array($tmp)) $data = array_merge($data, $tmp);
							 | 
						||
| 
								 | 
							
																}
							 | 
						||
| 
								 | 
							
															}
							 | 
						||
| 
								 | 
							
														}
							 | 
						||
| 
								 | 
							
													}
							 | 
						||
| 
								 | 
							
												}
							 | 
						||
| 
								 | 
							
												$out[]	= $data;
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											if (sizeof($out) > 1){
							 | 
						||
| 
								 | 
							
												return $out;
							 | 
						||
| 
								 | 
							
											}else if (isset($data)){
							 | 
						||
| 
								 | 
							
												return $data;
							 | 
						||
| 
								 | 
							
											}else{
							 | 
						||
| 
								 | 
							
												return array();
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										private function getNodeValue($node, $className)
							 | 
						||
| 
								 | 
							
										{
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											$tag_name	= strtoupper(dom_import_simplexml($node)->tagName);
							 | 
						||
| 
								 | 
							
											$s			= false;
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											// ignore DEL tags
							 | 
						||
| 
								 | 
							
											if ($tag_name == 'DEL') return $s;
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											// look up att map values
							 | 
						||
| 
								 | 
							
											if (array_key_exists($className, $this->att_map)){
							 | 
						||
| 
								 | 
							
												
							 | 
						||
| 
								 | 
							
												foreach ($this->att_map[$className] as $map){					
							 | 
						||
| 
								 | 
							
													if (preg_match("/$tag_name\|/", $map)){
							 | 
						||
| 
								 | 
							
														$s	= ''.$node[array_pop($foo = explode('|', $map))];
							 | 
						||
| 
								 | 
							
													}
							 | 
						||
| 
								 | 
							
												}
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											// if nothing and OBJ, try data.
							 | 
						||
| 
								 | 
							
											if (!$s && $tag_name=='OBJECT' && $node['data'])	$s	= ''.$node['data'];
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											// if nothing and IMG, try alt.
							 | 
						||
| 
								 | 
							
											if (!$s && $tag_name=='IMG' && $node['alt'])	$s	= ''.$node['alt'];
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											// if nothing and AREA, try alt.
							 | 
						||
| 
								 | 
							
											if (!$s && $tag_name=='AREA' && $node['alt'])	$s	= ''.$node['alt'];
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											//if nothing and not A, try title.
							 | 
						||
| 
								 | 
							
											if (!$s && $tag_name!='A' && $node['title'])	$s	= ''.$node['title'];
							 | 
						||
| 
								 | 
							
												
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											// if nothing found, go with node text
							 | 
						||
| 
								 | 
							
											$s	= ($s ? $s : implode(array_filter($node->xpath('child::node()'), array(&$this, "filterBlankValues")), ' '));			
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											// callbacks			
							 | 
						||
| 
								 | 
							
											if (array_key_exists($className, $this->callbacks)){
							 | 
						||
| 
								 | 
							
												$s	= preg_replace_callback('/.*/', $this->callbacks[$className], $s, 1);
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											// trim and remove line breaks
							 | 
						||
| 
								 | 
							
											if ($tag_name != 'PRE'){
							 | 
						||
| 
								 | 
							
												$s	= trim(preg_replace('/[\r\n\t]+/', '', $s));
							 | 
						||
| 
								 | 
							
												$s	= trim(preg_replace('/(\s{2})+/', ' ', $s));
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											return $s;
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										private function filterBlankValues($s){
							 | 
						||
| 
								 | 
							
											return preg_match("/\w+/", $s);
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										private function tidyThis($source)
							 | 
						||
| 
								 | 
							
										{
							 | 
						||
| 
								 | 
							
											switch ( $this->tidy_mode )
							 | 
						||
| 
								 | 
							
											{
							 | 
						||
| 
								 | 
							
												case 'exec':
							 | 
						||
| 
								 | 
							
													$tmp_file	= $this->tmp_dir.md5($source).'.txt';
							 | 
						||
| 
								 | 
							
													file_put_contents($tmp_file, $source);
							 | 
						||
| 
								 | 
							
													exec("tidy -utf8 -indent -asxhtml -numeric -bare -quiet $tmp_file", $tidy);
							 | 
						||
| 
								 | 
							
													unlink($tmp_file);
							 | 
						||
| 
								 | 
							
													return implode("\n", $tidy);
							 | 
						||
| 
								 | 
							
												break;
							 | 
						||
| 
								 | 
							
												
							 | 
						||
| 
								 | 
							
												case 'php':
							 | 
						||
| 
								 | 
							
													$tidy 	= tidy_parse_string($source);
							 | 
						||
| 
								 | 
							
													return tidy_clean_repair($tidy);
							 | 
						||
| 
								 | 
							
												break;
							 | 
						||
| 
								 | 
							
														
							 | 
						||
| 
								 | 
							
												default:
							 | 
						||
| 
								 | 
							
													return $source;
							 | 
						||
| 
								 | 
							
												break;
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										private function loadProfile($profile)
							 | 
						||
| 
								 | 
							
										{
							 | 
						||
| 
								 | 
							
											require_once("$profile.profile.php");
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										private function loadDoc($input_xml, $fragment=false)
							 | 
						||
| 
								 | 
							
										{
							 | 
						||
| 
								 | 
							
											$xml 		= simplexml_load_string($input_xml);
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											$this->doc	= $xml;
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											if ($fragment){
							 | 
						||
| 
								 | 
							
												$doc	= $xml->xpath("//*[@id='$fragment']");
							 | 
						||
| 
								 | 
							
												$xml	= simplexml_load_string($doc[0]->asXML());
							 | 
						||
| 
								 | 
							
												$doc	= null;
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											// base tag
							 | 
						||
| 
								 | 
							
											if ($xml->head->base['href']) $this->base = $xml->head->base['href'];			
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											// xml:base attribute - PITA with SimpleXML
							 | 
						||
| 
								 | 
							
											preg_match('/xml:base="(.*)"/', $xml->asXML(), $matches);
							 | 
						||
| 
								 | 
							
											if (is_array($matches) && sizeof($matches)>1) $this->base = $matches[1];
							 | 
						||
| 
								 | 
							
																
							 | 
						||
| 
								 | 
							
											return 	$xml->xpath("//*[contains(concat(' ',normalize-space(@class),' '),' $this->root_class ')]");
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										private function loadURL($url)
							 | 
						||
| 
								 | 
							
										{
							 | 
						||
| 
								 | 
							
											$this->url	= $url;
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											if ($this->tidy_mode == 'proxy' && $this->tidy_proxy != ''){
							 | 
						||
| 
								 | 
							
												$url	= $this->tidy_proxy . $url;
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
											return @file_get_contents($url);
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										private function postProcess($profile, $s)
							 | 
						||
| 
								 | 
							
										{
							 | 
						||
| 
								 | 
							
											$required	= $this->required;
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											if (is_array($s) && array_key_exists($required[0], $s)){
							 | 
						||
| 
								 | 
							
												$s	= array($s);
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											$s	= $this->dedupeSingles($s);
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											if (function_exists('hKit_'.$profile.'_post')){
							 | 
						||
| 
								 | 
							
												$s		= call_user_func('hKit_'.$profile.'_post', $s);
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											$s	= $this->removeTextVals($s);
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											return $s;
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										private function resolvePath($filepath)
							 | 
						||
| 
								 | 
							
										{	// ugly code ahoy: needs a serious tidy up
							 | 
						||
| 
								 | 
							
													
							 | 
						||
| 
								 | 
							
											$filepath	= $filepath[0];
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											$base 	= $this->base;
							 | 
						||
| 
								 | 
							
											$url	= $this->url;
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											if ($base != '' &&  strpos($base, '://') !== false)
							 | 
						||
| 
								 | 
							
												$url	= $base;
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											$r		= parse_url($url);
							 | 
						||
| 
								 | 
							
											$domain	= $r['scheme'] . '://' . $r['host'];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											if (!isset($r['path'])) $r['path'] = '/';
							 | 
						||
| 
								 | 
							
											$path	= explode('/', $r['path']);
							 | 
						||
| 
								 | 
							
											$file	= explode('/', $filepath);
							 | 
						||
| 
								 | 
							
											$new	= array('');
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											if (strpos($filepath, '://') !== false || strpos($filepath, 'data:') !== false){
							 | 
						||
| 
								 | 
							
												return $filepath;
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
											if ($file[0] == ''){
							 | 
						||
| 
								 | 
							
												// absolute path
							 | 
						||
| 
								 | 
							
												return ''.$domain . implode('/', $file);
							 | 
						||
| 
								 | 
							
											}else{
							 | 
						||
| 
								 | 
							
												// relative path
							 | 
						||
| 
								 | 
							
												if ($path[sizeof($path)-1] == '') array_pop($path);
							 | 
						||
| 
								 | 
							
												if (strpos($path[sizeof($path)-1], '.') !== false) array_pop($path);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
												foreach ($file as $segment){
							 | 
						||
| 
								 | 
							
													if ($segment == '..'){
							 | 
						||
| 
								 | 
							
														array_pop($path);
							 | 
						||
| 
								 | 
							
													}else{
							 | 
						||
| 
								 | 
							
														$new[]	= $segment;
							 | 
						||
| 
								 | 
							
													}
							 | 
						||
| 
								 | 
							
												}
							 | 
						||
| 
								 | 
							
												return ''.$domain . implode('/', $path) . implode('/', $new);
							 | 
						||
| 
								 | 
							
											}	
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										private function resolveEmail($v)
							 | 
						||
| 
								 | 
							
										{
							 | 
						||
| 
								 | 
							
											$parts	= parse_url($v[0]);
							 | 
						||
| 
								 | 
							
											return ($parts['path']);
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										private function dedupeSingles($s)
							 | 
						||
| 
								 | 
							
										{
							 | 
						||
| 
								 | 
							
											$singles	= $this->singles;
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											foreach ($s as &$item){
							 | 
						||
| 
								 | 
							
												foreach ($singles as $classname){
							 | 
						||
| 
								 | 
							
													if (array_key_exists($classname, $item) && is_array($item[$classname])){
							 | 
						||
| 
								 | 
							
														if (isset($item[$classname][0])) $item[$classname]	= $item[$classname][0];
							 | 
						||
| 
								 | 
							
													}
							 | 
						||
| 
								 | 
							
												}
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											return $s;
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
										
							 | 
						||
| 
								 | 
							
										private function removeTextVals($s)
							 | 
						||
| 
								 | 
							
										{
							 | 
						||
| 
								 | 
							
											foreach ($s as $key => &$val){
							 | 
						||
| 
								 | 
							
												if ($key){
							 | 
						||
| 
								 | 
							
													$k = $key;
							 | 
						||
| 
								 | 
							
												}else{
							 | 
						||
| 
								 | 
							
													$k = '';
							 | 
						||
| 
								 | 
							
												}
							 | 
						||
| 
								 | 
							
												
							 | 
						||
| 
								 | 
							
												if (is_array($val)){
							 | 
						||
| 
								 | 
							
													$val = $this->removeTextVals($val);
							 | 
						||
| 
								 | 
							
												}else{
							 | 
						||
| 
								 | 
							
													if ($k == 'text'){
							 | 
						||
| 
								 | 
							
														$val = '';
							 | 
						||
| 
								 | 
							
													}
							 | 
						||
| 
								 | 
							
												}
							 | 
						||
| 
								 | 
							
											}
							 | 
						||
| 
								 | 
							
											
							 | 
						||
| 
								 | 
							
											return array_filter($s);
							 | 
						||
| 
								 | 
							
										}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								?>
							 |