Fix/Rewrite .gitignore regex builder

This commit is contained in:
Michael Voříšek 2021-04-13 17:30:56 +02:00 committed by Fabien Potencier
parent ed576b2554
commit 83f9fd3adf
2 changed files with 287 additions and 142 deletions

View File

@ -14,6 +14,7 @@ namespace Symfony\Component\Finder;
/** /**
* Gitignore matches against text. * Gitignore matches against text.
* *
* @author Michael Voříšek <vorismi3@fel.cvut.cz>
* @author Ahmed Abdou <mail@ahmd.io> * @author Ahmed Abdou <mail@ahmd.io>
*/ */
class Gitignore class Gitignore
@ -21,113 +22,66 @@ class Gitignore
/** /**
* Returns a regexp which is the equivalent of the gitignore pattern. * Returns a regexp which is the equivalent of the gitignore pattern.
* *
* @return string The regexp * Format specification: https://git-scm.com/docs/gitignore#_pattern_format
*/ */
public static function toRegex(string $gitignoreFileContent): string public static function toRegex(string $gitignoreFileContent): string
{ {
$gitignoreFileContent = preg_replace('/^[^\\\r\n]*#.*/m', '', $gitignoreFileContent); $gitignoreFileContent = preg_replace('~(?<!\\\\)#[^\n\r]*~', '', $gitignoreFileContent);
$gitignoreLines = preg_split('/\r\n|\r|\n/', $gitignoreFileContent); $gitignoreLines = preg_split('~\r\n?|\n~', $gitignoreFileContent);
$positives = []; $res = self::lineToRegex('');
$negatives = [];
foreach ($gitignoreLines as $i => $line) { foreach ($gitignoreLines as $i => $line) {
$line = trim($line); $line = preg_replace('~(?<!\\\\)[ \t]+$~', '', $line);
if ('' === $line) {
continue; if ('!' === substr($line, 0, 1)) {
$line = substr($line, 1);
$isNegative = true;
} else {
$isNegative = false;
} }
if (1 === preg_match('/^!/', $line)) { if ('' !== $line) {
$positives[$i] = null; if ($isNegative) {
$negatives[$i] = self::getRegexFromGitignore(preg_replace('/^!(.*)/', '${1}', $line), true); $res = '(?!'.self::lineToRegex($line).'$)'.$res;
} else {
continue; $res = '(?:'.$res.'|'.self::lineToRegex($line).')';
}
} }
$negatives[$i] = null;
$positives[$i] = self::getRegexFromGitignore($line);
} }
$index = 0; return '~^(?:'.$res.')~s';
$patterns = [];
foreach ($positives as $pattern) {
if (null === $pattern) {
continue;
}
$negativesAfter = array_filter(\array_slice($negatives, ++$index));
if ([] !== $negativesAfter) {
$pattern .= sprintf('(?<!%s)', implode('|', $negativesAfter));
}
$patterns[] = $pattern;
}
return sprintf('/^((%s))$/', implode(')|(', $patterns));
} }
private static function getRegexFromGitignore(string $gitignorePattern, bool $negative = false): string private static function lineToRegex(string $gitignoreLine): string
{ {
$regex = ''; if ('' === $gitignoreLine) {
$isRelativePath = false; return '$f'; // always false
// If there is a separator at the beginning or middle (or both) of the pattern, then the pattern is relative to the directory level of the particular .gitignore file itself
$slashPosition = strpos($gitignorePattern, '/');
if (false !== $slashPosition && \strlen($gitignorePattern) - 1 !== $slashPosition) {
if (0 === $slashPosition) {
$gitignorePattern = substr($gitignorePattern, 1);
}
$isRelativePath = true;
$regex .= '^';
} }
if ('/' === $gitignorePattern[\strlen($gitignorePattern) - 1]) { $slashPos = strpos($gitignoreLine, '/');
$gitignorePattern = substr($gitignorePattern, 0, -1); if (false !== $slashPos && \strlen($gitignoreLine) - 1 !== $slashPos) {
if (0 === $slashPos) {
$gitignoreLine = substr($gitignoreLine, 1);
}
$isAbsolute = true;
} else {
$isAbsolute = false;
} }
$iMax = \strlen($gitignorePattern); $parts = array_map(function (string $v): string {
for ($i = 0; $i < $iMax; ++$i) { $v = preg_quote(str_replace('\\', '', $v), '~');
$tripleChars = substr($gitignorePattern, $i, 3); $v = preg_replace_callback('~\\\\\[([^\[\]]*)\\\\\]~', function (array $matches): string {
if ('**/' === $tripleChars || '/**' === $tripleChars) { return '['.str_replace('\\-', '-', $matches[1]).']';
$regex .= '.*'; }, $v);
$i += 2; $v = preg_replace('~\\\\\*\\\\\*~', '[^/]+(?:/[^/]+)*', $v);
continue; $v = preg_replace('~\\\\\*~', '[^/]*', $v);
} $v = preg_replace('~\\\\\?~', '[^/]', $v);
$doubleChars = substr($gitignorePattern, $i, 2); return $v;
if ('**' === $doubleChars) { }, explode('/', $gitignoreLine));
$regex .= '.*';
++$i;
continue;
}
if ('*/' === $doubleChars) {
$regex .= '[^\/]*\/?[^\/]*';
++$i;
continue;
}
$c = $gitignorePattern[$i]; return ($isAbsolute ? '' : '(?:[^/]+/)*')
switch ($c) { .implode('/', $parts)
case '*': .('' !== end($parts) ? '(?:$|/)' : '');
$regex .= $isRelativePath ? '[^\/]*' : '[^\/]*\/?[^\/]*';
break;
case '/':
case '.':
case ':':
case '(':
case ')':
case '{':
case '}':
$regex .= '\\'.$c;
break;
default:
$regex .= $c;
}
}
if ($negative) {
// a lookbehind assertion has to be a fixed width (it can not have nested '|' statements)
return sprintf('%s$|%s\/$', $regex, $regex);
}
return '(?>'.$regex.'($|\/.*))';
} }
} }

View File

@ -13,136 +13,327 @@ namespace Symfony\Component\Finder\Tests;
use PHPUnit\Framework\TestCase; use PHPUnit\Framework\TestCase;
use Symfony\Component\Finder\Gitignore; use Symfony\Component\Finder\Gitignore;
/**
* @author Michael Voříšek <vorismi3@fel.cvut.cz>
*/
class GitignoreTest extends TestCase class GitignoreTest extends TestCase
{ {
/** /**
* @dataProvider provider * @dataProvider provider
* @dataProvider providerExtended
*/ */
public function testCases(string $patterns, array $matchingCases, array $nonMatchingCases) public function testToRegex(array $gitignoreLines, array $matchingCases, array $nonMatchingCases)
{ {
$patterns = implode("\n", $gitignoreLines);
$regex = Gitignore::toRegex($patterns); $regex = Gitignore::toRegex($patterns);
$this->assertSame($regex, Gitignore::toRegex(implode("\r\n", $gitignoreLines)));
$this->assertSame($regex, Gitignore::toRegex(implode("\r", $gitignoreLines)));
foreach ($matchingCases as $matchingCase) { foreach ($matchingCases as $matchingCase) {
$this->assertMatchesRegularExpression($regex, $matchingCase, sprintf('Failed asserting path [%s] matches gitignore patterns [%s] using regex [%s]', $matchingCase, $patterns, $regex)); $this->assertMatchesRegularExpression(
$regex,
$matchingCase,
sprintf(
"Failed asserting path:\n%s\nmatches gitignore patterns:\n%s",
preg_replace('~^~m', ' ', $matchingCase),
preg_replace('~^~m', ' ', $patterns)
)
);
} }
foreach ($nonMatchingCases as $nonMatchingCase) { foreach ($nonMatchingCases as $nonMatchingCase) {
$this->assertDoesNotMatchRegularExpression($regex, $nonMatchingCase, sprintf('Failed asserting path [%s] not matching gitignore patterns [%s] using regex [%s]', $nonMatchingCase, $patterns, $regex)); $this->assertDoesNotMatchRegularExpression(
$regex,
$nonMatchingCase,
sprintf("Failed asserting path:\n%s\nNOT matching gitignore patterns:\n%s",
preg_replace('~^~m', ' ', $nonMatchingCase),
preg_replace('~^~m', ' ', $patterns)
)
);
} }
} }
/**
* @return array return is array of
* [
* [
* '', // Git-ignore Pattern
* [], // array of file paths matching
* [], // array of file paths not matching
* ],
* ]
*/
public function provider(): array public function provider(): array
{ {
return [ $cases = [
[ [
' [''],
* [],
!/bin ['a', 'a/b', 'a/b/c', 'aa', 'm.txt', '.txt'],
!/bin/bash ],
', [
['a', 'X'],
['a', 'a/b', 'a/b/c', 'X', 'b/a', 'b/c/a', 'a/X', 'a/X/y', 'b/a/X/y'],
['A', 'x', 'aa', 'm.txt', '.txt', 'aa/b', 'b/aa'],
],
[
['/a', 'x', 'd/'],
['a', 'a/b', 'a/b/c', 'x', 'a/x', 'a/x/y', 'b/a/x/y', 'd/', 'd/u', 'e/d/', 'e/d/u'],
['b/a', 'b/c/a', 'aa', 'm.txt', '.txt', 'aa/b', 'b/aa', 'e/d'],
],
[
['a/', 'x'],
['a/b', 'a/b/c', 'x', 'a/x', 'a/x/y', 'b/a/x/y'],
['a', 'b/a', 'b/c/a', 'aa', 'm.txt', '.txt', 'aa/b', 'b/aa'],
],
[
['*'],
['a', 'a/b', 'a/b/c', 'aa', 'm.txt', '.txt'],
[],
],
[
['/*'],
['a', 'a/b', 'a/b/c', 'aa', 'm.txt', '.txt'],
[],
],
[
['/a', 'm/*'],
['a', 'a/b', 'a/b/c', 'm/'],
['aa', 'm', 'b/m', 'b/m/'],
],
[
['a', '!x'],
['a', 'a/b', 'a/b/c', 'b/a', 'b/c/a'],
['x', 'aa', 'm.txt', '.txt', 'aa/b', 'b/aa'],
],
[
['a', '!a/', 'b', '!b/b'],
['a', 'a/x', 'x/a', 'x/a/x', 'b', 'b'],
['a/', 'x/a/', 'bb', 'b/b', 'bb'],
],
[
['[a-c]', 'x[C-E][][o]', 'g-h'],
['a', 'b', 'c', 'xDo', 'g-h'],
['A', 'xdo', 'u', 'g', 'h'],
],
[
['a?', '*/??b?'],
['ax', 'x/xxbx'],
['a', 'axy', 'xxax', 'x/xxax', 'x/y/xxax'],
],
[
[' ', ' \ ', ' \ ', '/a ', '/b/c \ '],
[' ', ' ', 'x/ ', 'x/ ', 'a', 'a/x', 'b/c '],
[' ', ' ', 'x/ ', 'x/ ', 'a ', 'b/c '],
],
[
['#', ' #', '/ #', ' #', '/ #', ' \ #', ' \ #', 'a #', 'a #', 'a \ #', 'a \ #'],
[' ', ' ', 'a', 'a ', 'a '],
[' ', ' ', 'a ', 'a '],
],
[
["\t", "\t\\\t", " \t\\\t ", "\t#", "a\t#", "a\t\t#", "a \t#", "a\t\t\\\t#", "a \t\t\\\t\t#"],
["\t\t", " \t\t", 'a', "a\t\t\t", "a \t\t\t"],
["\t", "\t\t ", " \t\t ", "a\t", 'a ', "a \t", "a\t\t"],
],
[
[' a', 'b ', '\ ', 'c\ '],
[' a', 'b', ' ', 'c '],
['a', 'b ', 'c'],
],
[
['#a', '\#b', '\#/'],
['#b', '#/'],
['#a', 'a', 'b'],
],
[
['*', '!!', '!!*x', '\!!b'],
['a', '!!', '!!b'],
['!', '!x', '!xx'],
],
[
[
'*',
'!/bin',
'!/bin/bash',
],
['bin/cat', 'abc/bin/cat'], ['bin/cat', 'abc/bin/cat'],
['bin/bash'], ['bin/bash'],
], ],
[ [
'fi#le.txt', ['fi#le.txt'],
[], [],
['#file.txt'], ['#file.txt'],
], ],
[ [
' [
/bin/ '/bin/',
/usr/local/ '/usr/local/',
!/bin/bash '!/bin/bash',
!/usr/local/bin/bash '!/usr/local/bin/bash',
', ],
['bin/cat'], ['bin/cat'],
['bin/bash'], ['bin/bash'],
], ],
[ [
'*.py[co]', ['*.py[co]'],
['file.pyc', 'file.pyc'], ['file.pyc', 'file.pyc'],
['filexpyc', 'file.pycx', 'file.py'], ['filexpyc', 'file.pycx', 'file.py'],
], ],
[ [
'dir1/**/dir2/', ['dir1/**/dir2/'],
['dir1/dirA/dir2/', 'dir1/dirA/dirB/dir2/'], ['dir1/dirA/dir2/', 'dir1/dirA/dirB/dir2/'],
[], [],
], ],
[ [
'dir1/*/dir2/', ['dir1/*/dir2/'],
['dir1/dirA/dir2/'], ['dir1/dirA/dir2/'],
['dir1/dirA/dirB/dir2/'], ['dir1/dirA/dirB/dir2/'],
], ],
[ [
'/*.php', ['/*.php'],
['file.php'], ['file.php'],
['app/file.php'], ['app/file.php'],
], ],
[ [
'\#file.txt', ['\#file.txt'],
['#file.txt'], ['#file.txt'],
[], [],
], ],
[ [
'*.php', ['*.php'],
['app/file.php', 'file.php'], ['app/file.php', 'file.php'],
['file.phps', 'file.phps', 'filephps'], ['file.phps', 'file.phps', 'filephps'],
], ],
[ [
'app/cache/', ['app/cache/'],
['app/cache/file.txt', 'app/cache/dir1/dir2/file.txt'], ['app/cache/file.txt', 'app/cache/dir1/dir2/file.txt'],
['a/app/cache/file.txt'], ['a/app/cache/file.txt'],
], ],
[ [
' [
#IamComment '#IamComment',
/app/cache/', '/app/cache/',
],
['app/cache/file.txt', 'app/cache/subdir/ile.txt'], ['app/cache/file.txt', 'app/cache/subdir/ile.txt'],
['a/app/cache/file.txt', '#IamComment', 'IamComment'], ['a/app/cache/file.txt', '#IamComment', 'IamComment'],
], ],
[ [
' [
/app/cache/ '/app/cache/',
#LastLineIsComment', '#LastLineIsComment',
],
['app/cache/file.txt', 'app/cache/subdir/ile.txt'], ['app/cache/file.txt', 'app/cache/subdir/ile.txt'],
['a/app/cache/file.txt', '#LastLineIsComment', 'LastLineIsComment'], ['a/app/cache/file.txt', '#LastLineIsComment', 'LastLineIsComment'],
], ],
[ [
' [
/app/cache/ '/app/cache/',
\#file.txt '\#file.txt',
#LastLineIsComment', '#LastLineIsComment',
],
['app/cache/file.txt', 'app/cache/subdir/ile.txt', '#file.txt'], ['app/cache/file.txt', 'app/cache/subdir/ile.txt', '#file.txt'],
['a/app/cache/file.txt', '#LastLineIsComment', 'LastLineIsComment'], ['a/app/cache/file.txt', '#LastLineIsComment', 'LastLineIsComment'],
], ],
[ [
' [
/app/cache/ '/app/cache/',
\#file.txt '\#file.txt',
#IamComment '#IamComment',
another_file.txt', 'another_file.txt',
],
['app/cache/file.txt', 'app/cache/subdir/ile.txt', '#file.txt', 'another_file.txt'], ['app/cache/file.txt', 'app/cache/subdir/ile.txt', '#file.txt', 'another_file.txt'],
['a/app/cache/file.txt', 'IamComment', '#IamComment'], ['a/app/cache/file.txt', 'IamComment', '#IamComment'],
], ],
[ [
' [
/app/** '/app/**',
!/app/bin '!/app/bin',
!/app/bin/test '!/app/bin/test',
', ],
['app/test/file', 'app/bin/file'], ['app/test/file', 'app/bin/file'],
['app/bin/test'], ['app/bin/test'],
], ],
[
[
'/app/*/img',
'!/app/*/img/src',
],
['app/a/img', 'app/a/img/x', 'app/a/img/src/x'],
['app/a/img/src', 'app/a/img/src/'],
],
[
[
'app/**/img',
'!/app/**/img/src',
],
['app/a/img', 'app/a/img/x', 'app/a/img/src/x', 'app/a/b/img', 'app/a/b/img/x', 'app/a/b/img/src/x', 'app/a/b/c/img'],
['app/a/img/src', 'app/a/b/img/src', 'app/a/c/b/img/src'],
],
[
[
'/*',
'!/foo',
'/foo/*',
'!/foo/bar',
],
['bar', 'foo/ba', 'foo/barx', 'x/foo/bar'],
['foo', 'foo/bar'],
],
[
[
'/example/**',
'!/example/example.txt',
'!/example/packages',
],
['example/test', 'example/example.txt2', 'example/packages/foo.yaml'],
['example/example.txt', 'example/packages', 'example/packages/'],
],
]; ];
return $cases;
}
public function providerExtended(): array
{
$basicCases = $this->provider();
$cases = [];
foreach ($basicCases as $case) {
$cases[] = [
array_merge(['never'], $case[0], ['!never']),
$case[1],
$case[2],
];
$cases[] = [
array_merge(['!*'], $case[0]),
$case[1],
$case[2],
];
$cases[] = [
array_merge(['*', '!*'], $case[0]),
$case[1],
$case[2],
];
$cases[] = [
array_merge(['never', '**/never2', 'never3/**'], $case[0]),
$case[1],
$case[2],
];
$cases[] = [
array_merge(['!never', '!**/never2', '!never3/**'], $case[0]),
$case[1],
$case[2],
];
$lines = [];
for ($i = 0; $i < 30; ++$i) {
foreach ($case[0] as $line) {
$lines[] = $line;
}
}
$cases[] = [
array_merge(['!never', '!**/never2', '!never3/**'], $lines),
$case[1],
$case[2],
];
}
return $cases;
} }
} }