| // +-----------------------------------------------------------------------+ // // $Id: URL2.php,v 1.10 2008/04/26 21:57:08 schmidt Exp $ // // Net_URL2 Class (PHP5 Only) // This code is released under the BSD License - http://www.opensource.org/licenses/bsd-license.php /** * @license BSD License */ class Net_URL2 { /** * Do strict parsing in resolve() (see RFC 3986, section 5.2.2). Default * is true. */ const OPTION_STRICT = 'strict'; /** * Represent arrays in query using PHP's [] notation. Default is true. */ const OPTION_USE_BRACKETS = 'use_brackets'; /** * URL-encode query variable keys. Default is true. */ const OPTION_ENCODE_KEYS = 'encode_keys'; /** * Query variable separators when parsing the query string. Every character * is considered a separator. Default is specified by the * arg_separator.input php.ini setting (this defaults to "&"). */ const OPTION_SEPARATOR_INPUT = 'input_separator'; /** * Query variable separator used when generating the query string. Default * is specified by the arg_separator.output php.ini setting (this defaults * to "&"). */ const OPTION_SEPARATOR_OUTPUT = 'output_separator'; /** * Default options corresponds to how PHP handles $_GET. */ private $options = array( self::OPTION_STRICT => true, self::OPTION_USE_BRACKETS => true, self::OPTION_ENCODE_KEYS => true, self::OPTION_SEPARATOR_INPUT => 'x&', self::OPTION_SEPARATOR_OUTPUT => 'x&', ); /** * @var string|bool */ private $scheme = false; /** * @var string|bool */ private $userinfo = false; /** * @var string|bool */ private $host = false; /** * @var int|bool */ private $port = false; /** * @var string */ private $path = ''; /** * @var string|bool */ private $query = false; /** * @var string|bool */ private $fragment = false; /** * @param string $url an absolute or relative URL * @param array $options */ public function __construct($url, $options = null) { $this->setOption(self::OPTION_SEPARATOR_INPUT, ini_get('arg_separator.input')); $this->setOption(self::OPTION_SEPARATOR_OUTPUT, ini_get('arg_separator.output')); if (is_array($options)) { foreach ($options as $optionName => $value) { $this->setOption($optionName); } } if (preg_match('@^([a-z][a-z0-9.+-]*):@i', $url, $reg)) { $this->scheme = $reg[1]; $url = substr($url, strlen($reg[0])); } if (preg_match('@^//([^/#?]+)@', $url, $reg)) { $this->setAuthority($reg[1]); $url = substr($url, strlen($reg[0])); } $i = strcspn($url, '?#'); $this->path = substr($url, 0, $i); $url = substr($url, $i); if (preg_match('@^\?([^#]*)@', $url, $reg)) { $this->query = $reg[1]; $url = substr($url, strlen($reg[0])); } if ($url) { $this->fragment = substr($url, 1); } } /** * Returns the scheme, e.g. "http" or "urn", or false if there is no * scheme specified, i.e. if this is a relative URL. * * @return string|bool */ public function getScheme() { return $this->scheme; } /** * @param string|bool $scheme * * @return void * @see getScheme() */ public function setScheme($scheme) { $this->scheme = $scheme; } /** * Returns the user part of the userinfo part (the part preceding the first * ":"), or false if there is no userinfo part. * * @return string|bool */ public function getUser() { return $this->userinfo !== false ? preg_replace('@:.*$@', '', $this->userinfo) : false; } /** * Returns the password part of the userinfo part (the part after the first * ":"), or false if there is no userinfo part (i.e. the URL does not * contain "@" in front of the hostname) or the userinfo part does not * contain ":". * * @return string|bool */ public function getPassword() { return $this->userinfo !== false ? substr(strstr($this->userinfo, ':'), 1) : false; } /** * Returns the userinfo part, or false if there is none, i.e. if the * authority part does not contain "@". * * @return string|bool */ public function getUserinfo() { return $this->userinfo; } /** * Sets the userinfo part. If two arguments are passed, they are combined * in the userinfo part as username ":" password. * * @param string|bool $userinfo userinfo or username * @param string|bool $password * * @return void */ public function setUserinfo($userinfo, $password = false) { $this->userinfo = $userinfo; if ($password !== false) { $this->userinfo .= ':' . $password; } } /** * Returns the host part, or false if there is no authority part, e.g. * relative URLs. * * @return string|bool */ public function getHost() { return $this->host; } /** * @param string|bool $host * * @return void */ public function setHost($host) { $this->host = $host; } /** * Returns the port number, or false if there is no port number specified, * i.e. if the default port is to be used. * * @return int|bool */ public function getPort() { return $this->port; } /** * @param int|bool $port * * @return void */ public function setPort($port) { $this->port = intval($port); } /** * Returns the authority part, i.e. [ userinfo "@" ] host [ ":" port ], or * false if there is no authority none. * * @return string|bool */ public function getAuthority() { if (!$this->host) { return false; } $authority = ''; if ($this->userinfo !== false) { $authority .= $this->userinfo . '@'; } $authority .= $this->host; if ($this->port !== false) { $authority .= ':' . $this->port; } return $authority; } /** * @param string|false $authority * * @return void */ public function setAuthority($authority) { $this->user = false; $this->pass = false; $this->host = false; $this->port = false; if (preg_match('@^(([^\@]+)\@)?([^:]+)(:(\d*))?$@', $authority, $reg)) { if ($reg[1]) { $this->userinfo = $reg[2]; } $this->host = $reg[3]; if (isset($reg[5])) { $this->port = intval($reg[5]); } } } /** * Returns the path part (possibly an empty string). * * @return string */ public function getPath() { return $this->path; } /** * @param string $path * * @return void */ public function setPath($path) { $this->path = $path; } /** * Returns the query string (excluding the leading "?"), or false if "?" * isn't present in the URL. * * @return string|bool * @see self::getQueryVariables() */ public function getQuery() { return $this->query; } /** * @param string|bool $query * * @return void * @see self::setQueryVariables() */ public function setQuery($query) { $this->query = $query; } /** * Returns the fragment name, or false if "#" isn't present in the URL. * * @return string|bool */ public function getFragment() { return $this->fragment; } /** * @param string|bool $fragment * * @return void */ public function setFragment($fragment) { $this->fragment = $fragment; } /** * Returns the query string like an array as the variables would appear in * $_GET in a PHP script. * * @return array */ public function getQueryVariables() { $pattern = '/[' . preg_quote($this->getOption(self::OPTION_SEPARATOR_INPUT), '/') . ']/'; $parts = preg_split($pattern, $this->query, -1, PREG_SPLIT_NO_EMPTY); $return = array(); foreach ($parts as $part) { if (strpos($part, '=') !== false) { list($key, $value) = explode('=', $part, 2); } else { $key = $part; $value = null; } if ($this->getOption(self::OPTION_ENCODE_KEYS)) { $key = rawurldecode($key); } $value = rawurldecode($value); if ($this->getOption(self::OPTION_USE_BRACKETS) && preg_match('#^(.*)\[([0-9a-z_-]*)\]#i', $key, $matches)) { $key = $matches[1]; $idx = $matches[2]; // Ensure is an array if (empty($return[$key]) || !is_array($return[$key])) { $return[$key] = array(); } // Add data if ($idx === '') { $return[$key][] = $value; } else { $return[$key][$idx] = $value; } } elseif (!$this->getOption(self::OPTION_USE_BRACKETS) && !empty($return[$key]) ) { $return[$key] = (array) $return[$key]; $return[$key][] = $value; } else { $return[$key] = $value; } } return $return; } /** * @param array $array (name => value) array * * @return void */ public function setQueryVariables(array $array) { if (!$array) { $this->query = false; } else { foreach ($array as $name => $value) { if ($this->getOption(self::OPTION_ENCODE_KEYS)) { $name = rawurlencode($name); } if (is_array($value)) { foreach ($value as $k => $v) { $parts[] = $this->getOption(self::OPTION_USE_BRACKETS) ? sprintf('%s[%s]=%s', $name, $k, $v) : ($name . '=' . $v); } } elseif (!is_null($value)) { $parts[] = $name . '=' . $value; } else { $parts[] = $name; } } $this->query = implode($this->getOption(self::OPTION_SEPARATOR_OUTPUT), $parts); } } /** * @param string $name * @param mixed $value * * @return array */ public function setQueryVariable($name, $value) { $array = $this->getQueryVariables(); $array[$name] = $value; $this->setQueryVariables($array); } /** * @param string $name * * @return void */ public function unsetQueryVariable($name) { $array = $this->getQueryVariables(); unset($array[$name]); $this->setQueryVariables($array); } /** * Returns a string representation of this URL. * * @return string */ public function getURL() { // See RFC 3986, section 5.3 $url = ""; if ($this->scheme !== false) { $url .= $this->scheme . ':'; } $authority = $this->getAuthority(); if ($authority !== false) { $url .= '//' . $authority; } $url .= $this->path; if ($this->query !== false) { $url .= '?' . $this->query; } if ($this->fragment !== false) { $url .= '#' . $this->fragment; } return $url; } /** * Returns a normalized string representation of this URL. This is useful * for comparison of URLs. * * @return string */ public function getNormalizedURL() { $url = clone $this; $url->normalize(); return $url->getUrl(); } /** * Returns a normalized Net_URL2 instance. * * @return Net_URL2 */ public function normalize() { // See RFC 3886, section 6 // Schemes are case-insensitive if ($this->scheme) { $this->scheme = strtolower($this->scheme); } // Hostnames are case-insensitive if ($this->host) { $this->host = strtolower($this->host); } // Remove default port number for known schemes (RFC 3986, section 6.2.3) if ($this->port && $this->scheme && $this->port == getservbyname($this->scheme, 'tcp')) { $this->port = false; } // Normalize case of %XX percentage-encodings (RFC 3986, section 6.2.2.1) foreach (array('userinfo', 'host', 'path') as $part) { if ($this->$part) { $this->$part = preg_replace('/%[0-9a-f]{2}/ie', 'strtoupper("\0")', $this->$part); } } // Path segment normalization (RFC 3986, section 6.2.2.3) $this->path = self::removeDotSegments($this->path); // Scheme based normalization (RFC 3986, section 6.2.3) if ($this->host && !$this->path) { $this->path = '/'; } } /** * Returns whether this instance represents an absolute URL. * * @return bool */ public function isAbsolute() { return (bool) $this->scheme; } /** * Returns an Net_URL2 instance representing an absolute URL relative to * this URL. * * @param Net_URL2|string $reference relative URL * * @return Net_URL2 */ public function resolve($reference) { if (is_string($reference)) { $reference = new self($reference); } if (!$this->isAbsolute()) { throw new Exception('Base-URL must be absolute'); } // A non-strict parser may ignore a scheme in the reference if it is // identical to the base URI's scheme. if (!$this->getOption(self::OPTION_STRICT) && $reference->scheme == $this->scheme) { $reference->scheme = false; } $target = new self(''); if ($reference->scheme !== false) { $target->scheme = $reference->scheme; $target->setAuthority($reference->getAuthority()); $target->path = self::removeDotSegments($reference->path); $target->query = $reference->query; } else { $authority = $reference->getAuthority(); if ($authority !== false) { $target->setAuthority($authority); $target->path = self::removeDotSegments($reference->path); $target->query = $reference->query; } else { if ($reference->path == '') { $target->path = $this->path; if ($reference->query !== false) { $target->query = $reference->query; } else { $target->query = $this->query; } } else { if (substr($reference->path, 0, 1) == '/') { $target->path = self::removeDotSegments($reference->path); } else { // Merge paths (RFC 3986, section 5.2.3) if ($this->host !== false && $this->path == '') { $target->path = '/' . $this->path; } else { $i = strrpos($this->path, '/'); if ($i !== false) { $target->path = substr($this->path, 0, $i + 1); } $target->path .= $reference->path; } $target->path = self::removeDotSegments($target->path); } $target->query = $reference->query; } $target->setAuthority($this->getAuthority()); } $target->scheme = $this->scheme; } $target->fragment = $reference->fragment; return $target; } /** * Removes dots as described in RFC 3986, section 5.2.4, e.g. * "/foo/../bar/baz" => "/bar/baz" * * @param string $path a path * * @return string a path */ private static function removeDotSegments($path) { $output = ''; // Make sure not to be trapped in an infinite loop due to a bug in this // method $j = 0; while ($path && $j++ < 100) { // Step A if (substr($path, 0, 2) == './') { $path = substr($path, 2); } elseif (substr($path, 0, 3) == '../') { $path = substr($path, 3); // Step B } elseif (substr($path, 0, 3) == '/./' || $path == '/.') { $path = '/' . substr($path, 3); // Step C } elseif (substr($path, 0, 4) == '/../' || $path == '/..') { $path = '/' . substr($path, 4); $i = strrpos($output, '/'); $output = $i === false ? '' : substr($output, 0, $i); // Step D } elseif ($path == '.' || $path == '..') { $path = ''; // Step E } else { $i = strpos($path, '/'); if ($i === 0) { $i = strpos($path, '/', 1); } if ($i === false) { $i = strlen($path); } $output .= substr($path, 0, $i); $path = substr($path, $i); } } return $output; } /** * Returns a Net_URL2 instance representing the canonical URL of the * currently executing PHP script. * * @return string */ public static function getCanonical() { if (!isset($_SERVER['REQUEST_METHOD'])) { // ALERT - no current URL throw new Exception('Script was not called through a webserver'); } // Begin with a relative URL $url = new self($_SERVER['PHP_SELF']); $url->scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http'; $url->host = $_SERVER['SERVER_NAME']; $port = intval($_SERVER['SERVER_PORT']); if ($url->scheme == 'http' && $port != 80 || $url->scheme == 'https' && $port != 443) { $url->port = $port; } return $url; } /** * Returns the URL used to retrieve the current request. * * @return string */ public static function getRequestedURL() { return self::getRequested()->getUrl(); } /** * Returns a Net_URL2 instance representing the URL used to retrieve the * current request. * * @return Net_URL2 */ public static function getRequested() { if (!isset($_SERVER['REQUEST_METHOD'])) { // ALERT - no current URL throw new Exception('Script was not called through a webserver'); } // Begin with a relative URL $url = new self($_SERVER['REQUEST_URI']); $url->scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http'; // Set host and possibly port $url->setAuthority($_SERVER['HTTP_HOST']); return $url; } /** * Sets the specified option. * * @param string $optionName a self::OPTION_ constant * @param mixed $value option value * * @return void * @see self::OPTION_STRICT * @see self::OPTION_USE_BRACKETS * @see self::OPTION_ENCODE_KEYS */ function setOption($optionName, $value) { if (!array_key_exists($optionName, $this->options)) { return false; } $this->options[$optionName] = $value; } /** * Returns the value of the specified option. * * @param string $optionName The name of the option to retrieve * * @return mixed */ function getOption($optionName) { return isset($this->options[$optionName]) ? $this->options[$optionName] : false; } }