bug #37560 [Finder] Fix GitIgnore parser when dealing with (sub)directories and take order of lines into account (Jeroeny)

This PR was squashed before being merged into the 4.4 branch.

Discussion
----------

[Finder] Fix GitIgnore parser when dealing with (sub)directories and take order of lines into account

| Q             | A
| ------------- | ---
| Branch?       |  4.4
| Bug fix?      | yes
| New feature?  | no
| Deprecations? | no
| Tickets       | Fix https://github.com/symfony/symfony/issues/37424
| License       | MIT

The new regex is made per positive `.gitignore` line. Which is a match group followed by a negative lookbehind with all the negations that were on lines after that line. This also fixes some other bugs that didn't match the `.gitignore` spec and two incorrect tests. I think it's likely that there are more edge cases this PR may not cover, but I haven't found them yet.

See the issue for more info.

Commits
-------

609dcf6b08 [Finder] Fix GitIgnore parser when dealing with (sub)directories and take order of lines into account
This commit is contained in:
Fabien Potencier 2020-07-31 08:51:39 +02:00
commit bea431935f
2 changed files with 70 additions and 32 deletions

View File

@ -27,42 +27,56 @@ class Gitignore
{ {
$gitignoreFileContent = preg_replace('/^[^\\\r\n]*#.*/m', '', $gitignoreFileContent); $gitignoreFileContent = preg_replace('/^[^\\\r\n]*#.*/m', '', $gitignoreFileContent);
$gitignoreLines = preg_split('/\r\n|\r|\n/', $gitignoreFileContent); $gitignoreLines = preg_split('/\r\n|\r|\n/', $gitignoreFileContent);
$gitignoreLines = array_map('trim', $gitignoreLines);
$gitignoreLines = array_filter($gitignoreLines);
$ignoreLinesPositive = array_filter($gitignoreLines, function (string $line) { $positives = [];
return !preg_match('/^!/', $line); $negatives = [];
}); foreach ($gitignoreLines as $i => $line) {
$line = trim($line);
if ('' === $line) {
continue;
}
$ignoreLinesNegative = array_filter($gitignoreLines, function (string $line) { if (1 === preg_match('/^!/', $line)) {
return preg_match('/^!/', $line); $positives[$i] = null;
}); $negatives[$i] = self::getRegexFromGitignore(preg_replace('/^!(.*)/', '${1}', $line), true);
$ignoreLinesNegative = array_map(function (string $line) { continue;
return preg_replace('/^!(.*)/', '${1}', $line); }
}, $ignoreLinesNegative); $negatives[$i] = null;
$ignoreLinesNegative = array_map([__CLASS__, 'getRegexFromGitignore'], $ignoreLinesNegative); $positives[$i] = self::getRegexFromGitignore($line);
$ignoreLinesPositive = array_map([__CLASS__, 'getRegexFromGitignore'], $ignoreLinesPositive);
if (empty($ignoreLinesPositive)) {
return '/^$/';
} }
if (empty($ignoreLinesNegative)) { $index = 0;
return sprintf('/%s/', implode('|', $ignoreLinesPositive)); $patterns = [];
foreach ($positives as $pattern) {
if (null === $pattern) {
continue;
}
$negativesAfter = array_filter(\array_slice($negatives, ++$index));
if ($negativesAfter !== []) {
$pattern .= sprintf('(?<!%s)', implode('|', $negativesAfter));
}
$patterns[] = $pattern;
} }
return sprintf('/(?=^(?:(?!(%s)).)*$)(%s)/', implode('|', $ignoreLinesNegative), implode('|', $ignoreLinesPositive)); return sprintf('/^((%s))$/', implode(')|(', $patterns));
} }
private static function getRegexFromGitignore(string $gitignorePattern): string private static function getRegexFromGitignore(string $gitignorePattern, bool $negative = false): string
{ {
$regex = '('; $regex = '';
if (0 === strpos($gitignorePattern, '/')) { $isRelativePath = false;
$gitignorePattern = substr($gitignorePattern, 1); // If there is a separator at the beginning or middle (or both) of the pattern, then the pattern is relative to the directory level of the particular .gitignore file itself
$slashPosition = strpos($gitignorePattern, '/');
if (false !== $slashPosition && \strlen($gitignorePattern) - 1 !== $slashPosition) {
if (0 === $slashPosition) {
$gitignorePattern = substr($gitignorePattern, 1);
}
$isRelativePath = true;
$regex .= '^'; $regex .= '^';
} else {
$regex .= '(^|\/)';
} }
if ('/' === $gitignorePattern[\strlen($gitignorePattern) - 1]) { if ('/' === $gitignorePattern[\strlen($gitignorePattern) - 1]) {
@ -71,9 +85,21 @@ class Gitignore
$iMax = \strlen($gitignorePattern); $iMax = \strlen($gitignorePattern);
for ($i = 0; $i < $iMax; ++$i) { for ($i = 0; $i < $iMax; ++$i) {
$tripleChars = substr($gitignorePattern, $i, 3);
if ('**/' === $tripleChars || '/**' === $tripleChars) {
$regex .= '.*';
$i += 2;
continue;
}
$doubleChars = substr($gitignorePattern, $i, 2); $doubleChars = substr($gitignorePattern, $i, 2);
if ('**' === $doubleChars) { if ('**' === $doubleChars) {
$regex .= '.+'; $regex .= '.*';
++$i;
continue;
}
if ('*/' === $doubleChars) {
$regex .= '[^\/]*\/?[^\/]*';
++$i; ++$i;
continue; continue;
} }
@ -81,7 +107,7 @@ class Gitignore
$c = $gitignorePattern[$i]; $c = $gitignorePattern[$i];
switch ($c) { switch ($c) {
case '*': case '*':
$regex .= '[^\/]+'; $regex .= $isRelativePath ? '[^\/]*' : '[^\/]*\/?[^\/]*';
break; break;
case '/': case '/':
case '.': case '.':
@ -97,9 +123,11 @@ class Gitignore
} }
} }
$regex .= '($|\/)'; if ($negative) {
$regex .= ')'; // a lookbehind assertion has to be a fixed width (it can not have nested '|' statements)
return sprintf('%s$|%s\/$', $regex, $regex);
}
return $regex; return '(?>'.$regex.'($|\/.*))';
} }
} }

View File

@ -47,6 +47,7 @@ class GitignoreTest extends TestCase
[ [
' '
* *
!/bin
!/bin/bash !/bin/bash
', ',
['bin/cat', 'abc/bin/cat'], ['bin/cat', 'abc/bin/cat'],
@ -99,8 +100,8 @@ class GitignoreTest extends TestCase
], ],
[ [
'app/cache/', 'app/cache/',
['app/cache/file.txt', 'app/cache/dir1/dir2/file.txt', 'a/app/cache/file.txt'], ['app/cache/file.txt', 'app/cache/dir1/dir2/file.txt'],
[], ['a/app/cache/file.txt'],
], ],
[ [
' '
@ -133,6 +134,15 @@ class GitignoreTest extends TestCase
['app/cache/file.txt', 'app/cache/subdir/ile.txt', '#file.txt', 'another_file.txt'], ['app/cache/file.txt', 'app/cache/subdir/ile.txt', '#file.txt', 'another_file.txt'],
['a/app/cache/file.txt', 'IamComment', '#IamComment'], ['a/app/cache/file.txt', 'IamComment', '#IamComment'],
], ],
[
'
/app/**
!/app/bin
!/app/bin/test
',
['app/test/file', 'app/bin/file'],
['app/bin/test'],
],
]; ];
} }
} }