2010-03-31 07:42:18 +01:00
|
|
|
<?php
|
|
|
|
|
|
|
|
namespace Symfony\Components\CssSelector;
|
|
|
|
|
|
|
|
/*
|
2010-04-07 01:51:29 +01:00
|
|
|
* This file is part of the Symfony package.
|
2010-03-31 07:42:18 +01:00
|
|
|
*
|
|
|
|
* (c) Fabien Potencier <fabien.potencier@symfony-project.com>
|
|
|
|
*
|
|
|
|
* For the full copyright and license information, please view the LICENSE
|
|
|
|
* file that was distributed with this source code.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parser is the main entry point of the component and can convert CSS
|
|
|
|
* selectors to XPath expressions.
|
|
|
|
*
|
|
|
|
* $xpath = Parser::cssToXpath('h1.foo');
|
|
|
|
*
|
|
|
|
* This component is a port of the Python lxml library,
|
|
|
|
* which is copyright Infrae and distributed under the BSD license.
|
|
|
|
*
|
2010-04-07 01:51:29 +01:00
|
|
|
* @package Symfony
|
|
|
|
* @subpackage Components_CssSelector
|
2010-03-31 07:42:18 +01:00
|
|
|
* @author Fabien Potencier <fabien.potencier@symfony-project.com>
|
|
|
|
*/
|
|
|
|
class Parser
|
|
|
|
{
|
2010-04-04 18:11:32 +01:00
|
|
|
/**
|
|
|
|
* @throws SyntaxError When got None for xpath expression
|
|
|
|
*/
|
2010-03-31 07:42:18 +01:00
|
|
|
static public function cssToXpath($cssExpr, $prefix = 'descendant-or-self::')
|
|
|
|
{
|
|
|
|
if (is_string($cssExpr))
|
|
|
|
{
|
|
|
|
if (preg_match('#^\w+\s*$#u', $cssExpr, $match))
|
|
|
|
{
|
|
|
|
return $prefix.trim($match[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (preg_match('~^(\w*)#(\w+)\s*$~u', $cssExpr, $match))
|
|
|
|
{
|
|
|
|
return sprintf("%s%s[@id = '%s']", $prefix, $match[1] ? $match[1] : '*', $match[2]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (preg_match('#^(\w*)\.(\w+)\s*$#u', $cssExpr, $match))
|
|
|
|
{
|
|
|
|
return sprintf("%s%s[contains(concat(' ', normalize-space(@class), ' '), ' %s ')]", $prefix, $match[1] ? $match[1] : '*', $match[2]);
|
|
|
|
}
|
|
|
|
|
|
|
|
$parser = new self();
|
|
|
|
$cssExpr = $parser->parse($cssExpr);
|
|
|
|
}
|
|
|
|
|
|
|
|
$expr = $cssExpr->toXpath();
|
|
|
|
|
|
|
|
// @codeCoverageIgnoreStart
|
|
|
|
if (!$expr)
|
|
|
|
{
|
2010-04-24 00:22:16 +01:00
|
|
|
throw new SyntaxError(sprintf('Got None for xpath expression from %s.', $cssExpr));
|
2010-03-31 07:42:18 +01:00
|
|
|
}
|
|
|
|
// @codeCoverageIgnoreEnd
|
|
|
|
|
|
|
|
if ($prefix)
|
|
|
|
{
|
|
|
|
$expr->addPrefix($prefix);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (string) $expr;
|
|
|
|
}
|
|
|
|
|
2010-04-04 18:11:32 +01:00
|
|
|
/**
|
|
|
|
* @throws \Exception When tokenizer throws it while parsing
|
|
|
|
*/
|
2010-03-31 07:42:18 +01:00
|
|
|
public function parse($string)
|
|
|
|
{
|
|
|
|
$tokenizer = new Tokenizer();
|
|
|
|
|
|
|
|
$stream = new TokenStream($tokenizer->tokenize($string), $string);
|
|
|
|
|
|
|
|
try
|
|
|
|
{
|
|
|
|
return $this->parseSelectorGroup($stream);
|
|
|
|
}
|
|
|
|
catch (\Exception $e)
|
|
|
|
{
|
|
|
|
$class = get_class($e);
|
|
|
|
|
2010-04-24 00:22:16 +01:00
|
|
|
throw new $class(sprintf('%s at %s -> %s', $e->getMessage(), implode($stream->getUsed(), ''), $stream->peek()));
|
2010-03-31 07:42:18 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
protected function parseSelectorGroup($stream)
|
|
|
|
{
|
|
|
|
$result = array();
|
|
|
|
while (1)
|
|
|
|
{
|
|
|
|
$result[] = $this->parseSelector($stream);
|
|
|
|
if ($stream->peek() == ',')
|
|
|
|
{
|
|
|
|
$stream->next();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (count($result) == 1)
|
|
|
|
{
|
|
|
|
return $result[0];
|
|
|
|
}
|
2010-04-03 00:54:42 +01:00
|
|
|
|
|
|
|
return new Node\OrNode($result);
|
2010-03-31 07:42:18 +01:00
|
|
|
}
|
|
|
|
|
2010-04-04 18:11:32 +01:00
|
|
|
/**
|
|
|
|
* @throws SyntaxError When expected selector but got something else
|
|
|
|
*/
|
2010-03-31 07:42:18 +01:00
|
|
|
protected function parseSelector($stream)
|
|
|
|
{
|
|
|
|
$result = $this->parseSimpleSelector($stream);
|
|
|
|
|
|
|
|
while (1)
|
|
|
|
{
|
|
|
|
$peek = $stream->peek();
|
|
|
|
if ($peek == ',' || $peek === null)
|
|
|
|
{
|
|
|
|
return $result;
|
|
|
|
}
|
|
|
|
elseif (in_array($peek, array('+', '>', '~')))
|
|
|
|
{
|
2010-04-03 00:48:10 +01:00
|
|
|
// A combinator
|
2010-03-31 07:42:18 +01:00
|
|
|
$combinator = (string) $stream->next();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$combinator = ' ';
|
|
|
|
}
|
|
|
|
$consumed = count($stream->getUsed());
|
|
|
|
$next_selector = $this->parseSimpleSelector($stream);
|
|
|
|
if ($consumed == count($stream->getUsed()))
|
|
|
|
{
|
|
|
|
throw new SyntaxError(sprintf("Expected selector, got '%s'", $stream->peek()));
|
|
|
|
}
|
|
|
|
|
|
|
|
$result = new Node\CombinedSelectorNode($result, $combinator, $next_selector);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $result;
|
|
|
|
}
|
|
|
|
|
2010-04-04 18:11:32 +01:00
|
|
|
/**
|
|
|
|
* @throws SyntaxError When expected symbol but got something else
|
|
|
|
*/
|
2010-03-31 07:42:18 +01:00
|
|
|
protected function parseSimpleSelector($stream)
|
|
|
|
{
|
|
|
|
$peek = $stream->peek();
|
|
|
|
if ($peek != '*' && !$peek->isType('Symbol'))
|
|
|
|
{
|
|
|
|
$element = $namespace = '*';
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$next = $stream->next();
|
|
|
|
if ($next != '*' && !$next->isType('Symbol'))
|
|
|
|
{
|
|
|
|
throw new SyntaxError(sprintf("Expected symbol, got '%s'", $next));
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($stream->peek() == '|')
|
|
|
|
{
|
|
|
|
$namespace = $next;
|
|
|
|
$stream->next();
|
|
|
|
$element = $stream->next();
|
|
|
|
if ($element != '*' && !$next->isType('Symbol'))
|
|
|
|
{
|
|
|
|
throw new SyntaxError(sprintf("Expected symbol, got '%s'", $next));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$namespace = '*';
|
|
|
|
$element = $next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$result = new Node\ElementNode($namespace, $element);
|
|
|
|
$has_hash = false;
|
|
|
|
while (1)
|
|
|
|
{
|
|
|
|
$peek = $stream->peek();
|
|
|
|
if ($peek == '#')
|
|
|
|
{
|
|
|
|
if ($has_hash)
|
|
|
|
{
|
2010-04-03 00:48:10 +01:00
|
|
|
/* You can't have two hashes
|
|
|
|
(FIXME: is there some more general rule I'm missing?) */
|
2010-03-31 07:42:18 +01:00
|
|
|
// @codeCoverageIgnoreStart
|
|
|
|
break;
|
|
|
|
// @codeCoverageIgnoreEnd
|
|
|
|
}
|
|
|
|
$stream->next();
|
|
|
|
$result = new Node\HashNode($result, $stream->next());
|
|
|
|
$has_hash = true;
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
elseif ($peek == '.')
|
|
|
|
{
|
|
|
|
$stream->next();
|
|
|
|
$result = new Node\ClassNode($result, $stream->next());
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
elseif ($peek == '[')
|
|
|
|
{
|
|
|
|
$stream->next();
|
|
|
|
$result = $this->parseAttrib($result, $stream);
|
|
|
|
$next = $stream->next();
|
|
|
|
if ($next != ']')
|
|
|
|
{
|
|
|
|
throw new SyntaxError(sprintf("] expected, got '%s'", $next));
|
|
|
|
}
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
elseif ($peek == ':' || $peek == '::')
|
|
|
|
{
|
|
|
|
$type = $stream->next();
|
|
|
|
$ident = $stream->next();
|
|
|
|
if (!$ident || !$ident->isType('Symbol'))
|
|
|
|
{
|
|
|
|
throw new SyntaxError(sprintf("Expected symbol, got '%s'", $ident));
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($stream->peek() == '(')
|
|
|
|
{
|
|
|
|
$stream->next();
|
|
|
|
$peek = $stream->peek();
|
|
|
|
if ($peek->isType('String'))
|
|
|
|
{
|
|
|
|
$selector = $stream->next();
|
|
|
|
}
|
|
|
|
elseif ($peek->isType('Symbol') && is_int($peek))
|
|
|
|
{
|
|
|
|
$selector = intval($stream->next());
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2010-04-03 00:48:10 +01:00
|
|
|
// FIXME: parseSimpleSelector, or selector, or...?
|
2010-03-31 07:42:18 +01:00
|
|
|
$selector = $this->parseSimpleSelector($stream);
|
|
|
|
}
|
|
|
|
$next = $stream->next();
|
|
|
|
if ($next != ')')
|
|
|
|
{
|
|
|
|
throw new SyntaxError(sprintf("Expected ')', got '%s' and '%s'", $next, $selector));
|
|
|
|
}
|
|
|
|
|
|
|
|
$result = new Node\FunctionNode($result, $type, $ident, $selector);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$result = new Node\PseudoNode($result, $type, $ident);
|
|
|
|
}
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if ($peek == ' ')
|
|
|
|
{
|
|
|
|
$stream->next();
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
2010-04-03 00:48:10 +01:00
|
|
|
// FIXME: not sure what "negation" is
|
2010-03-31 07:42:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return $result;
|
|
|
|
}
|
|
|
|
|
2010-04-04 18:11:32 +01:00
|
|
|
/**
|
2010-04-07 02:07:59 +01:00
|
|
|
* @throws SyntaxError When encountered unexpected selector
|
2010-04-04 18:11:32 +01:00
|
|
|
*/
|
2010-03-31 07:42:18 +01:00
|
|
|
protected function parseAttrib($selector, $stream)
|
|
|
|
{
|
|
|
|
$attrib = $stream->next();
|
|
|
|
if ($stream->peek() == '|')
|
|
|
|
{
|
|
|
|
$namespace = $attrib;
|
|
|
|
$stream->next();
|
|
|
|
$attrib = $stream->next();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$namespace = '*';
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($stream->peek() == ']')
|
|
|
|
{
|
|
|
|
return new Node\AttribNode($selector, $namespace, $attrib, 'exists', null);
|
|
|
|
}
|
|
|
|
|
|
|
|
$op = $stream->next();
|
|
|
|
if (!in_array($op, array('^=', '$=', '*=', '=', '~=', '|=', '!=')))
|
|
|
|
{
|
|
|
|
throw new SyntaxError(sprintf("Operator expected, got '%s'", $op));
|
|
|
|
}
|
|
|
|
|
|
|
|
$value = $stream->next();
|
|
|
|
if (!$value->isType('Symbol') && !$value->isType('String'))
|
|
|
|
{
|
|
|
|
throw new SyntaxError(sprintf("Expected string or symbol, got '%s'", $value));
|
|
|
|
}
|
|
|
|
|
|
|
|
return new Node\AttribNode($selector, $namespace, $attrib, $op, $value);
|
|
|
|
}
|
|
|
|
}
|