From 0057d9874c3d9ebdbf625a16098862bd460c7372 Mon Sep 17 00:00:00 2001 From: Timo Ewalds Date: Thu, 10 Apr 2025 00:15:15 +0100 Subject: [PATCH 1/7] Add an implicit ".git/" line which git does internally. This is useful if you want to list only the files that git would track. It clearly doesn't track its own files. --- gitignore_parser.py | 5 ++++- tests.py | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/gitignore_parser.py b/gitignore_parser.py index 98c4afe..66ef477 100644 --- a/gitignore_parser.py +++ b/gitignore_parser.py @@ -15,7 +15,10 @@ def handle_negation(file_path, rules: Reversible["IgnoreRule"]): def parse_gitignore(full_path, base_dir=None): if base_dir is None: base_dir = dirname(full_path) - rules = [] + rules = [ + rule_from_pattern(".git/", base_path=_normalize_path(base_dir), + source=(full_path, -1)), + ] with open(full_path) as ignore_file: counter = 0 for line in ignore_file: diff --git a/tests.py b/tests.py index 5ccd3af..4170171 100644 --- a/tests.py +++ b/tests.py @@ -19,6 +19,13 @@ def test_simple(self): self.assertTrue(matches('/home/michael/dir/main.pyc')) self.assertTrue(matches('/home/michael/__pycache__')) + def test_ignores_git_directory(self): + matches = _parse_gitignore_string('*.py', fake_base_dir='/home/michael') + self.assertFalse(matches('/home/michael/.gitignore')) + self.assertTrue(matches('/home/michael/.git')) + self.assertTrue(matches('/home/michael/.git/config')) + self.assertTrue(matches('/home/michael/.git/logs/refs/remotes/origin/HEAD')) + def test_incomplete_filename(self): matches = _parse_gitignore_string('o.py', fake_base_dir='/home/michael') self.assertTrue(matches('/home/michael/o.py')) From 2fdc88b1c9f8d12060b8589bf30e49d95d0991d9 Mon Sep 17 00:00:00 2001 From: Timo Ewalds Date: Thu, 10 Apr 2025 17:42:45 +0100 Subject: [PATCH 2/7] Add parse_gitignore_string, which is helpful for users, but removes the need for a lot of mocks. --- gitignore_parser.py | 24 ++++++------- tests.py | 84 ++++++++++++++++++++++----------------------- 2 files changed, 52 insertions(+), 56 deletions(-) diff --git a/gitignore_parser.py b/gitignore_parser.py index 66ef477..89a38aa 100644 --- a/gitignore_parser.py +++ b/gitignore_parser.py @@ -13,21 +13,19 @@ def handle_negation(file_path, rules: Reversible["IgnoreRule"]): return False def parse_gitignore(full_path, base_dir=None): + with open(full_path) as ignore_file: + return parse_gitignore_string(ignore_file.read(), full_path, base_dir) + +def parse_gitignore_string(gitignore_str, full_path, base_dir=None): if base_dir is None: base_dir = dirname(full_path) - rules = [ - rule_from_pattern(".git/", base_path=_normalize_path(base_dir), - source=(full_path, -1)), - ] - with open(full_path) as ignore_file: - counter = 0 - for line in ignore_file: - counter += 1 - line = line.rstrip('\n') - rule = rule_from_pattern(line, base_path=_normalize_path(base_dir), - source=(full_path, counter)) - if rule: - rules.append(rule) + base_dir = _normalize_path(base_dir) + rules = [] + for line_no, line in enumerate(gitignore_str.splitlines(), start=1): + rule = rule_from_pattern( + line.rstrip('\n'), base_path=base_dir, source=(full_path, line_no)) + if rule: + rules.append(rule) if not any(r.negation for r in rules): return lambda file_path: any(r.match(file_path) for r in rules) else: diff --git a/tests.py b/tests.py index 4170171..d5d79fa 100644 --- a/tests.py +++ b/tests.py @@ -2,32 +2,35 @@ from pathlib import Path from tempfile import TemporaryDirectory -from gitignore_parser import parse_gitignore +from gitignore_parser import parse_gitignore, parse_gitignore_string from unittest import TestCase, main class Test(TestCase): def test_simple(self): - matches = _parse_gitignore_string( + matches = parse_gitignore_string( '__pycache__/\n' '*.py[cod]', - fake_base_dir='/home/michael' + full_path='/home/michael/.gitignore' ) self.assertFalse(matches('/home/michael/main.py')) self.assertTrue(matches('/home/michael/main.pyc')) self.assertTrue(matches('/home/michael/dir/main.pyc')) self.assertTrue(matches('/home/michael/__pycache__')) - def test_ignores_git_directory(self): - matches = _parse_gitignore_string('*.py', fake_base_dir='/home/michael') - self.assertFalse(matches('/home/michael/.gitignore')) - self.assertTrue(matches('/home/michael/.git')) - self.assertTrue(matches('/home/michael/.git/config')) - self.assertTrue(matches('/home/michael/.git/logs/refs/remotes/origin/HEAD')) + def test_simple_parse_file(self): + with patch('builtins.open', mock_open(read_data= + '__pycache__/\n' + '*.py[cod]')): + matches = parse_gitignore(full_path='/home/michael/.gitignore') + self.assertFalse(matches('/home/michael/main.py')) + self.assertTrue(matches('/home/michael/main.pyc')) + self.assertTrue(matches('/home/michael/dir/main.pyc')) + self.assertTrue(matches('/home/michael/__pycache__')) def test_incomplete_filename(self): - matches = _parse_gitignore_string('o.py', fake_base_dir='/home/michael') + matches = parse_gitignore_string('o.py', full_path='/home/michael/.gitignore') self.assertTrue(matches('/home/michael/o.py')) self.assertFalse(matches('/home/michael/foo.py')) self.assertFalse(matches('/home/michael/o.pyc')) @@ -36,9 +39,9 @@ def test_incomplete_filename(self): self.assertFalse(matches('/home/michael/dir/o.pyc')) def test_wildcard(self): - matches = _parse_gitignore_string( + matches = parse_gitignore_string( 'hello.*', - fake_base_dir='/home/michael' + full_path='/home/michael/.gitignore' ) self.assertTrue(matches('/home/michael/hello.txt')) self.assertTrue(matches('/home/michael/hello.foobar/')) @@ -48,22 +51,22 @@ def test_wildcard(self): self.assertFalse(matches('/home/michael/helloX')) def test_anchored_wildcard(self): - matches = _parse_gitignore_string( + matches = parse_gitignore_string( '/hello.*', - fake_base_dir='/home/michael' + full_path='/home/michael/.gitignore' ) self.assertTrue(matches('/home/michael/hello.txt')) self.assertTrue(matches('/home/michael/hello.c')) self.assertFalse(matches('/home/michael/a/hello.java')) def test_trailingspaces(self): - matches = _parse_gitignore_string( + matches = parse_gitignore_string( 'ignoretrailingspace \n' 'notignoredspace\\ \n' 'partiallyignoredspace\\ \n' 'partiallyignoredspace2 \\ \n' 'notignoredmultiplespace\\ \\ \\ ', - fake_base_dir='/home/michael' + full_path='/home/michael/.gitignore' ) self.assertTrue(matches('/home/michael/ignoretrailingspace')) self.assertFalse(matches('/home/michael/ignoretrailingspace ')) @@ -80,12 +83,12 @@ def test_trailingspaces(self): self.assertFalse(matches('/home/michael/notignoredmultiplespace')) def test_comment(self): - matches = _parse_gitignore_string( + matches = parse_gitignore_string( 'somematch\n' '#realcomment\n' 'othermatch\n' '\\#imnocomment', - fake_base_dir='/home/michael' + full_path='/home/michael/.gitignore' ) self.assertTrue(matches('/home/michael/somematch')) self.assertFalse(matches('/home/michael/#realcomment')) @@ -94,7 +97,7 @@ def test_comment(self): def test_ignore_directory(self): matches = \ - _parse_gitignore_string('.venv/', fake_base_dir='/home/michael') + parse_gitignore_string('.venv/', full_path='/home/michael/.gitignore') self.assertTrue(matches('/home/michael/.venv')) self.assertTrue(matches('/home/michael/.venv/folder')) self.assertTrue(matches('/home/michael/.venv/file.txt')) @@ -103,34 +106,34 @@ def test_ignore_directory(self): def test_ignore_directory_asterisk(self): matches = \ - _parse_gitignore_string('.venv/*', fake_base_dir='/home/michael') + parse_gitignore_string('.venv/*', full_path='/home/michael/.gitignore') self.assertFalse(matches('/home/michael/.venv')) self.assertTrue(matches('/home/michael/.venv/folder')) self.assertTrue(matches('/home/michael/.venv/file.txt')) def test_negation(self): - matches = _parse_gitignore_string( + matches = parse_gitignore_string( ''' *.ignore !keep.ignore ''', - fake_base_dir='/home/michael' + full_path='/home/michael/.gitignore' ) self.assertTrue(matches('/home/michael/trash.ignore')) self.assertFalse(matches('/home/michael/keep.ignore')) self.assertTrue(matches('/home/michael/waste.ignore')) def test_literal_exclamation_mark(self): - matches = _parse_gitignore_string( - '\\!ignore_me!', fake_base_dir='/home/michael' + matches = parse_gitignore_string( + '\\!ignore_me!', full_path='/home/michael/.gitignore' ) self.assertTrue(matches('/home/michael/!ignore_me!')) self.assertFalse(matches('/home/michael/ignore_me!')) self.assertFalse(matches('/home/michael/ignore_me')) def test_double_asterisks(self): - matches = _parse_gitignore_string( - 'foo/**/Bar', fake_base_dir='/home/michael' + matches = parse_gitignore_string( + 'foo/**/Bar', full_path='/home/michael/.gitignore' ) self.assertTrue(matches('/home/michael/foo/hello/Bar')) self.assertTrue(matches('/home/michael/foo/world/Bar')) @@ -139,7 +142,7 @@ def test_double_asterisks(self): def test_double_asterisk_without_slashes_handled_like_single_asterisk(self): matches = \ - _parse_gitignore_string('a/b**c/d', fake_base_dir='/home/michael') + parse_gitignore_string('a/b**c/d', full_path='/home/michael/.gitignore') self.assertTrue(matches('/home/michael/a/bc/d')) self.assertTrue(matches('/home/michael/a/bXc/d')) self.assertTrue(matches('/home/michael/a/bbc/d')) @@ -151,22 +154,22 @@ def test_double_asterisk_without_slashes_handled_like_single_asterisk(self): def test_more_asterisks_handled_like_single_asterisk(self): matches = \ - _parse_gitignore_string('***a/b', fake_base_dir='/home/michael') + parse_gitignore_string('***a/b', full_path='/home/michael/.gitignore') self.assertTrue(matches('/home/michael/XYZa/b')) self.assertFalse(matches('/home/michael/foo/a/b')) matches = \ - _parse_gitignore_string('a/b***', fake_base_dir='/home/michael') + parse_gitignore_string('a/b***', full_path='/home/michael/.gitignore') self.assertTrue(matches('/home/michael/a/bXYZ')) self.assertFalse(matches('/home/michael/a/b/foo')) def test_directory_only_negation(self): - matches = _parse_gitignore_string(''' + matches = parse_gitignore_string(''' data/** !data/**/ !.gitkeep !data/01_raw/* ''', - fake_base_dir='/home/michael' + full_path='/home/michael/.gitignore' ) self.assertFalse(matches('/home/michael/data/01_raw/')) self.assertFalse(matches('/home/michael/data/01_raw/.gitkeep')) @@ -178,21 +181,21 @@ def test_directory_only_negation(self): ) def test_single_asterisk(self): - matches = _parse_gitignore_string('*', fake_base_dir='/home/michael') + matches = parse_gitignore_string('*', full_path='/home/michael/.gitignore') self.assertTrue(matches('/home/michael/file.txt')) self.assertTrue(matches('/home/michael/directory')) self.assertTrue(matches('/home/michael/directory-trailing/')) def test_supports_path_type_argument(self): - matches = _parse_gitignore_string( - 'file1\n!file2', fake_base_dir='/home/michael' + matches = parse_gitignore_string( + 'file1\n!file2', full_path='/home/michael/.gitignore' ) self.assertTrue(matches(Path('/home/michael/file1'))) self.assertFalse(matches(Path('/home/michael/file2'))) def test_slash_in_range_does_not_match_dirs(self): - matches = _parse_gitignore_string( - 'abc[X-Z/]def', fake_base_dir='/home/michael' + matches = parse_gitignore_string( + 'abc[X-Z/]def', full_path='/home/michael/.gitignore' ) self.assertFalse(matches('/home/michael/abcdef')) self.assertTrue(matches('/home/michael/abcXdef')) @@ -205,7 +208,7 @@ def test_symlink_to_another_directory(self): with TemporaryDirectory() as project_dir: with TemporaryDirectory() as another_dir: matches = \ - _parse_gitignore_string('link', fake_base_dir=project_dir) + parse_gitignore_string('link', full_path=f"{project_dir}/.gitignore") # Create a symlink to another directory. link = Path(project_dir, 'link') @@ -225,14 +228,9 @@ def test_symlink_to_symlink_directory(self): link.symlink_to(project_dir) file = Path(link, 'file.txt') matches = \ - _parse_gitignore_string('file.txt', fake_base_dir=str(link)) + parse_gitignore_string('file.txt', full_path=f"{link_dir}/.gitignore") self.assertTrue(matches(file)) -def _parse_gitignore_string(data: str, fake_base_dir: str = None): - with patch('builtins.open', mock_open(read_data=data)): - success = parse_gitignore(f'{fake_base_dir}/.gitignore', fake_base_dir) - return success - if __name__ == '__main__': main() From b15d86d3544739b757e01fe202ef991dc9311591 Mon Sep 17 00:00:00 2001 From: Timo Ewalds Date: Thu, 10 Apr 2025 18:52:47 +0100 Subject: [PATCH 3/7] parse_gitignore_string -> parse_gitignore_str --- gitignore_parser.py | 4 ++-- tests.py | 42 +++++++++++++++++++++--------------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/gitignore_parser.py b/gitignore_parser.py index 89a38aa..24e62ab 100644 --- a/gitignore_parser.py +++ b/gitignore_parser.py @@ -14,9 +14,9 @@ def handle_negation(file_path, rules: Reversible["IgnoreRule"]): def parse_gitignore(full_path, base_dir=None): with open(full_path) as ignore_file: - return parse_gitignore_string(ignore_file.read(), full_path, base_dir) + return parse_gitignore_str(ignore_file.read(), full_path, base_dir) -def parse_gitignore_string(gitignore_str, full_path, base_dir=None): +def parse_gitignore_str(gitignore_str, full_path, base_dir=None): if base_dir is None: base_dir = dirname(full_path) base_dir = _normalize_path(base_dir) diff --git a/tests.py b/tests.py index d5d79fa..3fc7eb3 100644 --- a/tests.py +++ b/tests.py @@ -2,14 +2,14 @@ from pathlib import Path from tempfile import TemporaryDirectory -from gitignore_parser import parse_gitignore, parse_gitignore_string +from gitignore_parser import parse_gitignore, parse_gitignore_str from unittest import TestCase, main class Test(TestCase): def test_simple(self): - matches = parse_gitignore_string( + matches = parse_gitignore_str( '__pycache__/\n' '*.py[cod]', full_path='/home/michael/.gitignore' @@ -30,7 +30,7 @@ def test_simple_parse_file(self): self.assertTrue(matches('/home/michael/__pycache__')) def test_incomplete_filename(self): - matches = parse_gitignore_string('o.py', full_path='/home/michael/.gitignore') + matches = parse_gitignore_str('o.py', full_path='/home/michael/.gitignore') self.assertTrue(matches('/home/michael/o.py')) self.assertFalse(matches('/home/michael/foo.py')) self.assertFalse(matches('/home/michael/o.pyc')) @@ -39,7 +39,7 @@ def test_incomplete_filename(self): self.assertFalse(matches('/home/michael/dir/o.pyc')) def test_wildcard(self): - matches = parse_gitignore_string( + matches = parse_gitignore_str( 'hello.*', full_path='/home/michael/.gitignore' ) @@ -51,7 +51,7 @@ def test_wildcard(self): self.assertFalse(matches('/home/michael/helloX')) def test_anchored_wildcard(self): - matches = parse_gitignore_string( + matches = parse_gitignore_str( '/hello.*', full_path='/home/michael/.gitignore' ) @@ -60,7 +60,7 @@ def test_anchored_wildcard(self): self.assertFalse(matches('/home/michael/a/hello.java')) def test_trailingspaces(self): - matches = parse_gitignore_string( + matches = parse_gitignore_str( 'ignoretrailingspace \n' 'notignoredspace\\ \n' 'partiallyignoredspace\\ \n' @@ -83,7 +83,7 @@ def test_trailingspaces(self): self.assertFalse(matches('/home/michael/notignoredmultiplespace')) def test_comment(self): - matches = parse_gitignore_string( + matches = parse_gitignore_str( 'somematch\n' '#realcomment\n' 'othermatch\n' @@ -97,7 +97,7 @@ def test_comment(self): def test_ignore_directory(self): matches = \ - parse_gitignore_string('.venv/', full_path='/home/michael/.gitignore') + parse_gitignore_str('.venv/', full_path='/home/michael/.gitignore') self.assertTrue(matches('/home/michael/.venv')) self.assertTrue(matches('/home/michael/.venv/folder')) self.assertTrue(matches('/home/michael/.venv/file.txt')) @@ -106,13 +106,13 @@ def test_ignore_directory(self): def test_ignore_directory_asterisk(self): matches = \ - parse_gitignore_string('.venv/*', full_path='/home/michael/.gitignore') + parse_gitignore_str('.venv/*', full_path='/home/michael/.gitignore') self.assertFalse(matches('/home/michael/.venv')) self.assertTrue(matches('/home/michael/.venv/folder')) self.assertTrue(matches('/home/michael/.venv/file.txt')) def test_negation(self): - matches = parse_gitignore_string( + matches = parse_gitignore_str( ''' *.ignore !keep.ignore @@ -124,7 +124,7 @@ def test_negation(self): self.assertTrue(matches('/home/michael/waste.ignore')) def test_literal_exclamation_mark(self): - matches = parse_gitignore_string( + matches = parse_gitignore_str( '\\!ignore_me!', full_path='/home/michael/.gitignore' ) self.assertTrue(matches('/home/michael/!ignore_me!')) @@ -132,7 +132,7 @@ def test_literal_exclamation_mark(self): self.assertFalse(matches('/home/michael/ignore_me')) def test_double_asterisks(self): - matches = parse_gitignore_string( + matches = parse_gitignore_str( 'foo/**/Bar', full_path='/home/michael/.gitignore' ) self.assertTrue(matches('/home/michael/foo/hello/Bar')) @@ -142,7 +142,7 @@ def test_double_asterisks(self): def test_double_asterisk_without_slashes_handled_like_single_asterisk(self): matches = \ - parse_gitignore_string('a/b**c/d', full_path='/home/michael/.gitignore') + parse_gitignore_str('a/b**c/d', full_path='/home/michael/.gitignore') self.assertTrue(matches('/home/michael/a/bc/d')) self.assertTrue(matches('/home/michael/a/bXc/d')) self.assertTrue(matches('/home/michael/a/bbc/d')) @@ -154,16 +154,16 @@ def test_double_asterisk_without_slashes_handled_like_single_asterisk(self): def test_more_asterisks_handled_like_single_asterisk(self): matches = \ - parse_gitignore_string('***a/b', full_path='/home/michael/.gitignore') + parse_gitignore_str('***a/b', full_path='/home/michael/.gitignore') self.assertTrue(matches('/home/michael/XYZa/b')) self.assertFalse(matches('/home/michael/foo/a/b')) matches = \ - parse_gitignore_string('a/b***', full_path='/home/michael/.gitignore') + parse_gitignore_str('a/b***', full_path='/home/michael/.gitignore') self.assertTrue(matches('/home/michael/a/bXYZ')) self.assertFalse(matches('/home/michael/a/b/foo')) def test_directory_only_negation(self): - matches = parse_gitignore_string(''' + matches = parse_gitignore_str(''' data/** !data/**/ !.gitkeep @@ -181,20 +181,20 @@ def test_directory_only_negation(self): ) def test_single_asterisk(self): - matches = parse_gitignore_string('*', full_path='/home/michael/.gitignore') + matches = parse_gitignore_str('*', full_path='/home/michael/.gitignore') self.assertTrue(matches('/home/michael/file.txt')) self.assertTrue(matches('/home/michael/directory')) self.assertTrue(matches('/home/michael/directory-trailing/')) def test_supports_path_type_argument(self): - matches = parse_gitignore_string( + matches = parse_gitignore_str( 'file1\n!file2', full_path='/home/michael/.gitignore' ) self.assertTrue(matches(Path('/home/michael/file1'))) self.assertFalse(matches(Path('/home/michael/file2'))) def test_slash_in_range_does_not_match_dirs(self): - matches = parse_gitignore_string( + matches = parse_gitignore_str( 'abc[X-Z/]def', full_path='/home/michael/.gitignore' ) self.assertFalse(matches('/home/michael/abcdef')) @@ -208,7 +208,7 @@ def test_symlink_to_another_directory(self): with TemporaryDirectory() as project_dir: with TemporaryDirectory() as another_dir: matches = \ - parse_gitignore_string('link', full_path=f"{project_dir}/.gitignore") + parse_gitignore_str('link', full_path=f"{project_dir}/.gitignore") # Create a symlink to another directory. link = Path(project_dir, 'link') @@ -228,7 +228,7 @@ def test_symlink_to_symlink_directory(self): link.symlink_to(project_dir) file = Path(link, 'file.txt') matches = \ - parse_gitignore_string('file.txt', full_path=f"{link_dir}/.gitignore") + parse_gitignore_str('file.txt', full_path=f"{link_dir}/.gitignore") self.assertTrue(matches(file)) From 8032fb5cd6f64240404fde1d20484a1c1476b019 Mon Sep 17 00:00:00 2001 From: Timo Ewalds Date: Fri, 11 Apr 2025 09:33:11 +0100 Subject: [PATCH 4/7] Simplify parse_gitignore and parse_gitignore_str to both delegate to _parse_gitignore_lines, simplify the interface of parse_gitignore_str. --- gitignore_parser.py | 16 ++++++++++------ tests.py | 44 +++++++++++++++++++++----------------------- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/gitignore_parser.py b/gitignore_parser.py index 24e62ab..95e5f25 100644 --- a/gitignore_parser.py +++ b/gitignore_parser.py @@ -13,15 +13,19 @@ def handle_negation(file_path, rules: Reversible["IgnoreRule"]): return False def parse_gitignore(full_path, base_dir=None): - with open(full_path) as ignore_file: - return parse_gitignore_str(ignore_file.read(), full_path, base_dir) - -def parse_gitignore_str(gitignore_str, full_path, base_dir=None): if base_dir is None: base_dir = dirname(full_path) - base_dir = _normalize_path(base_dir) + with open(full_path) as ignore_file: + return _parse_gitignore_lines(ignore_file, full_path, base_dir) + +def parse_gitignore_str(gitignore_str, base_dir): + full_path = os.path.join(base_dir, '.gitignore') + lines = gitignore_str.splitlines() + return _parse_gitignore_lines(lines, full_path, base_dir) + +def _parse_gitignore_lines(lines, full_path, base_dir): rules = [] - for line_no, line in enumerate(gitignore_str.splitlines(), start=1): + for line_no, line in enumerate(lines, start=1): rule = rule_from_pattern( line.rstrip('\n'), base_path=base_dir, source=(full_path, line_no)) if rule: diff --git a/tests.py b/tests.py index 3fc7eb3..b1b17d4 100644 --- a/tests.py +++ b/tests.py @@ -12,7 +12,7 @@ def test_simple(self): matches = parse_gitignore_str( '__pycache__/\n' '*.py[cod]', - full_path='/home/michael/.gitignore' + base_dir='/home/michael/' ) self.assertFalse(matches('/home/michael/main.py')) self.assertTrue(matches('/home/michael/main.pyc')) @@ -23,14 +23,14 @@ def test_simple_parse_file(self): with patch('builtins.open', mock_open(read_data= '__pycache__/\n' '*.py[cod]')): - matches = parse_gitignore(full_path='/home/michael/.gitignore') + matches = parse_gitignore('/home/michael/.gitignore') self.assertFalse(matches('/home/michael/main.py')) self.assertTrue(matches('/home/michael/main.pyc')) self.assertTrue(matches('/home/michael/dir/main.pyc')) self.assertTrue(matches('/home/michael/__pycache__')) def test_incomplete_filename(self): - matches = parse_gitignore_str('o.py', full_path='/home/michael/.gitignore') + matches = parse_gitignore_str('o.py', base_dir='/home/michael/') self.assertTrue(matches('/home/michael/o.py')) self.assertFalse(matches('/home/michael/foo.py')) self.assertFalse(matches('/home/michael/o.pyc')) @@ -41,7 +41,7 @@ def test_incomplete_filename(self): def test_wildcard(self): matches = parse_gitignore_str( 'hello.*', - full_path='/home/michael/.gitignore' + base_dir='/home/michael/' ) self.assertTrue(matches('/home/michael/hello.txt')) self.assertTrue(matches('/home/michael/hello.foobar/')) @@ -53,7 +53,7 @@ def test_wildcard(self): def test_anchored_wildcard(self): matches = parse_gitignore_str( '/hello.*', - full_path='/home/michael/.gitignore' + base_dir='/home/michael/' ) self.assertTrue(matches('/home/michael/hello.txt')) self.assertTrue(matches('/home/michael/hello.c')) @@ -66,7 +66,7 @@ def test_trailingspaces(self): 'partiallyignoredspace\\ \n' 'partiallyignoredspace2 \\ \n' 'notignoredmultiplespace\\ \\ \\ ', - full_path='/home/michael/.gitignore' + base_dir='/home/michael/' ) self.assertTrue(matches('/home/michael/ignoretrailingspace')) self.assertFalse(matches('/home/michael/ignoretrailingspace ')) @@ -88,7 +88,7 @@ def test_comment(self): '#realcomment\n' 'othermatch\n' '\\#imnocomment', - full_path='/home/michael/.gitignore' + base_dir='/home/michael/' ) self.assertTrue(matches('/home/michael/somematch')) self.assertFalse(matches('/home/michael/#realcomment')) @@ -97,7 +97,7 @@ def test_comment(self): def test_ignore_directory(self): matches = \ - parse_gitignore_str('.venv/', full_path='/home/michael/.gitignore') + parse_gitignore_str('.venv/', base_dir='/home/michael/') self.assertTrue(matches('/home/michael/.venv')) self.assertTrue(matches('/home/michael/.venv/folder')) self.assertTrue(matches('/home/michael/.venv/file.txt')) @@ -106,7 +106,7 @@ def test_ignore_directory(self): def test_ignore_directory_asterisk(self): matches = \ - parse_gitignore_str('.venv/*', full_path='/home/michael/.gitignore') + parse_gitignore_str('.venv/*', base_dir='/home/michael/') self.assertFalse(matches('/home/michael/.venv')) self.assertTrue(matches('/home/michael/.venv/folder')) self.assertTrue(matches('/home/michael/.venv/file.txt')) @@ -117,7 +117,7 @@ def test_negation(self): *.ignore !keep.ignore ''', - full_path='/home/michael/.gitignore' + base_dir='/home/michael/' ) self.assertTrue(matches('/home/michael/trash.ignore')) self.assertFalse(matches('/home/michael/keep.ignore')) @@ -125,7 +125,7 @@ def test_negation(self): def test_literal_exclamation_mark(self): matches = parse_gitignore_str( - '\\!ignore_me!', full_path='/home/michael/.gitignore' + '\\!ignore_me!', base_dir='/home/michael/' ) self.assertTrue(matches('/home/michael/!ignore_me!')) self.assertFalse(matches('/home/michael/ignore_me!')) @@ -133,7 +133,7 @@ def test_literal_exclamation_mark(self): def test_double_asterisks(self): matches = parse_gitignore_str( - 'foo/**/Bar', full_path='/home/michael/.gitignore' + 'foo/**/Bar', base_dir='/home/michael/' ) self.assertTrue(matches('/home/michael/foo/hello/Bar')) self.assertTrue(matches('/home/michael/foo/world/Bar')) @@ -142,7 +142,7 @@ def test_double_asterisks(self): def test_double_asterisk_without_slashes_handled_like_single_asterisk(self): matches = \ - parse_gitignore_str('a/b**c/d', full_path='/home/michael/.gitignore') + parse_gitignore_str('a/b**c/d', base_dir='/home/michael/') self.assertTrue(matches('/home/michael/a/bc/d')) self.assertTrue(matches('/home/michael/a/bXc/d')) self.assertTrue(matches('/home/michael/a/bbc/d')) @@ -154,11 +154,11 @@ def test_double_asterisk_without_slashes_handled_like_single_asterisk(self): def test_more_asterisks_handled_like_single_asterisk(self): matches = \ - parse_gitignore_str('***a/b', full_path='/home/michael/.gitignore') + parse_gitignore_str('***a/b', base_dir='/home/michael/') self.assertTrue(matches('/home/michael/XYZa/b')) self.assertFalse(matches('/home/michael/foo/a/b')) matches = \ - parse_gitignore_str('a/b***', full_path='/home/michael/.gitignore') + parse_gitignore_str('a/b***', base_dir='/home/michael/') self.assertTrue(matches('/home/michael/a/bXYZ')) self.assertFalse(matches('/home/michael/a/b/foo')) @@ -169,7 +169,7 @@ def test_directory_only_negation(self): !.gitkeep !data/01_raw/* ''', - full_path='/home/michael/.gitignore' + base_dir='/home/michael/' ) self.assertFalse(matches('/home/michael/data/01_raw/')) self.assertFalse(matches('/home/michael/data/01_raw/.gitkeep')) @@ -181,21 +181,21 @@ def test_directory_only_negation(self): ) def test_single_asterisk(self): - matches = parse_gitignore_str('*', full_path='/home/michael/.gitignore') + matches = parse_gitignore_str('*', base_dir='/home/michael/') self.assertTrue(matches('/home/michael/file.txt')) self.assertTrue(matches('/home/michael/directory')) self.assertTrue(matches('/home/michael/directory-trailing/')) def test_supports_path_type_argument(self): matches = parse_gitignore_str( - 'file1\n!file2', full_path='/home/michael/.gitignore' + 'file1\n!file2', base_dir='/home/michael/' ) self.assertTrue(matches(Path('/home/michael/file1'))) self.assertFalse(matches(Path('/home/michael/file2'))) def test_slash_in_range_does_not_match_dirs(self): matches = parse_gitignore_str( - 'abc[X-Z/]def', full_path='/home/michael/.gitignore' + 'abc[X-Z/]def', base_dir='/home/michael/' ) self.assertFalse(matches('/home/michael/abcdef')) self.assertTrue(matches('/home/michael/abcXdef')) @@ -207,8 +207,7 @@ def test_slash_in_range_does_not_match_dirs(self): def test_symlink_to_another_directory(self): with TemporaryDirectory() as project_dir: with TemporaryDirectory() as another_dir: - matches = \ - parse_gitignore_str('link', full_path=f"{project_dir}/.gitignore") + matches = parse_gitignore_str('link', base_dir=project_dir) # Create a symlink to another directory. link = Path(project_dir, 'link') @@ -227,8 +226,7 @@ def test_symlink_to_symlink_directory(self): link = Path(link_dir, 'link') link.symlink_to(project_dir) file = Path(link, 'file.txt') - matches = \ - parse_gitignore_str('file.txt', full_path=f"{link_dir}/.gitignore") + matches = parse_gitignore_str('file.txt', base_dir=str(link_dir)) self.assertTrue(matches(file)) From bde047715ace1d1cff5801a18d701889113b42c1 Mon Sep 17 00:00:00 2001 From: Timo Ewalds Date: Fri, 11 Apr 2025 11:12:40 +0100 Subject: [PATCH 5/7] import join, remove trailing slash. --- gitignore_parser.py | 4 ++-- tests.py | 36 ++++++++++++++++++------------------ 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/gitignore_parser.py b/gitignore_parser.py index 95e5f25..a566bec 100644 --- a/gitignore_parser.py +++ b/gitignore_parser.py @@ -2,7 +2,7 @@ import os import re -from os.path import abspath, dirname +from os.path import abspath, dirname, join from pathlib import Path from typing import Reversible, Union @@ -19,7 +19,7 @@ def parse_gitignore(full_path, base_dir=None): return _parse_gitignore_lines(ignore_file, full_path, base_dir) def parse_gitignore_str(gitignore_str, base_dir): - full_path = os.path.join(base_dir, '.gitignore') + full_path = join(base_dir, '.gitignore') lines = gitignore_str.splitlines() return _parse_gitignore_lines(lines, full_path, base_dir) diff --git a/tests.py b/tests.py index b1b17d4..bd63855 100644 --- a/tests.py +++ b/tests.py @@ -12,7 +12,7 @@ def test_simple(self): matches = parse_gitignore_str( '__pycache__/\n' '*.py[cod]', - base_dir='/home/michael/' + base_dir='/home/michael' ) self.assertFalse(matches('/home/michael/main.py')) self.assertTrue(matches('/home/michael/main.pyc')) @@ -30,7 +30,7 @@ def test_simple_parse_file(self): self.assertTrue(matches('/home/michael/__pycache__')) def test_incomplete_filename(self): - matches = parse_gitignore_str('o.py', base_dir='/home/michael/') + matches = parse_gitignore_str('o.py', base_dir='/home/michael') self.assertTrue(matches('/home/michael/o.py')) self.assertFalse(matches('/home/michael/foo.py')) self.assertFalse(matches('/home/michael/o.pyc')) @@ -41,7 +41,7 @@ def test_incomplete_filename(self): def test_wildcard(self): matches = parse_gitignore_str( 'hello.*', - base_dir='/home/michael/' + base_dir='/home/michael' ) self.assertTrue(matches('/home/michael/hello.txt')) self.assertTrue(matches('/home/michael/hello.foobar/')) @@ -53,7 +53,7 @@ def test_wildcard(self): def test_anchored_wildcard(self): matches = parse_gitignore_str( '/hello.*', - base_dir='/home/michael/' + base_dir='/home/michael' ) self.assertTrue(matches('/home/michael/hello.txt')) self.assertTrue(matches('/home/michael/hello.c')) @@ -66,7 +66,7 @@ def test_trailingspaces(self): 'partiallyignoredspace\\ \n' 'partiallyignoredspace2 \\ \n' 'notignoredmultiplespace\\ \\ \\ ', - base_dir='/home/michael/' + base_dir='/home/michael' ) self.assertTrue(matches('/home/michael/ignoretrailingspace')) self.assertFalse(matches('/home/michael/ignoretrailingspace ')) @@ -88,7 +88,7 @@ def test_comment(self): '#realcomment\n' 'othermatch\n' '\\#imnocomment', - base_dir='/home/michael/' + base_dir='/home/michael' ) self.assertTrue(matches('/home/michael/somematch')) self.assertFalse(matches('/home/michael/#realcomment')) @@ -97,7 +97,7 @@ def test_comment(self): def test_ignore_directory(self): matches = \ - parse_gitignore_str('.venv/', base_dir='/home/michael/') + parse_gitignore_str('.venv/', base_dir='/home/michael') self.assertTrue(matches('/home/michael/.venv')) self.assertTrue(matches('/home/michael/.venv/folder')) self.assertTrue(matches('/home/michael/.venv/file.txt')) @@ -106,7 +106,7 @@ def test_ignore_directory(self): def test_ignore_directory_asterisk(self): matches = \ - parse_gitignore_str('.venv/*', base_dir='/home/michael/') + parse_gitignore_str('.venv/*', base_dir='/home/michael') self.assertFalse(matches('/home/michael/.venv')) self.assertTrue(matches('/home/michael/.venv/folder')) self.assertTrue(matches('/home/michael/.venv/file.txt')) @@ -117,7 +117,7 @@ def test_negation(self): *.ignore !keep.ignore ''', - base_dir='/home/michael/' + base_dir='/home/michael' ) self.assertTrue(matches('/home/michael/trash.ignore')) self.assertFalse(matches('/home/michael/keep.ignore')) @@ -125,7 +125,7 @@ def test_negation(self): def test_literal_exclamation_mark(self): matches = parse_gitignore_str( - '\\!ignore_me!', base_dir='/home/michael/' + '\\!ignore_me!', base_dir='/home/michael' ) self.assertTrue(matches('/home/michael/!ignore_me!')) self.assertFalse(matches('/home/michael/ignore_me!')) @@ -133,7 +133,7 @@ def test_literal_exclamation_mark(self): def test_double_asterisks(self): matches = parse_gitignore_str( - 'foo/**/Bar', base_dir='/home/michael/' + 'foo/**/Bar', base_dir='/home/michael' ) self.assertTrue(matches('/home/michael/foo/hello/Bar')) self.assertTrue(matches('/home/michael/foo/world/Bar')) @@ -142,7 +142,7 @@ def test_double_asterisks(self): def test_double_asterisk_without_slashes_handled_like_single_asterisk(self): matches = \ - parse_gitignore_str('a/b**c/d', base_dir='/home/michael/') + parse_gitignore_str('a/b**c/d', base_dir='/home/michael') self.assertTrue(matches('/home/michael/a/bc/d')) self.assertTrue(matches('/home/michael/a/bXc/d')) self.assertTrue(matches('/home/michael/a/bbc/d')) @@ -154,11 +154,11 @@ def test_double_asterisk_without_slashes_handled_like_single_asterisk(self): def test_more_asterisks_handled_like_single_asterisk(self): matches = \ - parse_gitignore_str('***a/b', base_dir='/home/michael/') + parse_gitignore_str('***a/b', base_dir='/home/michael') self.assertTrue(matches('/home/michael/XYZa/b')) self.assertFalse(matches('/home/michael/foo/a/b')) matches = \ - parse_gitignore_str('a/b***', base_dir='/home/michael/') + parse_gitignore_str('a/b***', base_dir='/home/michael') self.assertTrue(matches('/home/michael/a/bXYZ')) self.assertFalse(matches('/home/michael/a/b/foo')) @@ -169,7 +169,7 @@ def test_directory_only_negation(self): !.gitkeep !data/01_raw/* ''', - base_dir='/home/michael/' + base_dir='/home/michael' ) self.assertFalse(matches('/home/michael/data/01_raw/')) self.assertFalse(matches('/home/michael/data/01_raw/.gitkeep')) @@ -181,21 +181,21 @@ def test_directory_only_negation(self): ) def test_single_asterisk(self): - matches = parse_gitignore_str('*', base_dir='/home/michael/') + matches = parse_gitignore_str('*', base_dir='/home/michael') self.assertTrue(matches('/home/michael/file.txt')) self.assertTrue(matches('/home/michael/directory')) self.assertTrue(matches('/home/michael/directory-trailing/')) def test_supports_path_type_argument(self): matches = parse_gitignore_str( - 'file1\n!file2', base_dir='/home/michael/' + 'file1\n!file2', base_dir='/home/michael' ) self.assertTrue(matches(Path('/home/michael/file1'))) self.assertFalse(matches(Path('/home/michael/file2'))) def test_slash_in_range_does_not_match_dirs(self): matches = parse_gitignore_str( - 'abc[X-Z/]def', base_dir='/home/michael/' + 'abc[X-Z/]def', base_dir='/home/michael' ) self.assertFalse(matches('/home/michael/abcdef')) self.assertTrue(matches('/home/michael/abcXdef')) From 55946a7c3bd585f91c45b9ed7e5d118156e7c0ed Mon Sep 17 00:00:00 2001 From: Timo Ewalds Date: Fri, 11 Apr 2025 19:04:07 +0100 Subject: [PATCH 6/7] Add an example usage. --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index 6e406d8..979dd36 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,16 @@ Then: >>> matches('/home/michael/project/__pycache__') True +Alternatively, you can use the `parse_gitignore_str` function: + + >>> from gitignore_parser import parse_gitignore_str + >>> matches = parse_gitignore_str( + '__pycache__/\n*.py[cod]', base_dir='/home/michael/project') + >>> matches('/home/michael/project/main.py') + False + >>> matches('/home/michael/project/main.pyc') + True + ## Motivation I couldn't find a good library for doing the above on PyPI. There are From fec2529cf0b0843ea55a3c0e53c3b98fb3821f90 Mon Sep 17 00:00:00 2001 From: Timo Ewalds Date: Fri, 11 Apr 2025 19:04:20 +0100 Subject: [PATCH 7/7] Move normalize_path up, which is slightly faster. --- gitignore_parser.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gitignore_parser.py b/gitignore_parser.py index a566bec..ef91b2b 100644 --- a/gitignore_parser.py +++ b/gitignore_parser.py @@ -24,6 +24,7 @@ def parse_gitignore_str(gitignore_str, base_dir): return _parse_gitignore_lines(lines, full_path, base_dir) def _parse_gitignore_lines(lines, full_path, base_dir): + base_dir = _normalize_path(base_dir) rules = [] for line_no, line in enumerate(lines, start=1): rule = rule_from_pattern( @@ -105,7 +106,7 @@ def rule_from_pattern(pattern, base_path=None, source=None): negation=negation, directory_only=directory_only, anchored=anchored, - base_path=_normalize_path(base_path) if base_path else None, + base_path=base_path if base_path else None, source=source )