-
Notifications
You must be signed in to change notification settings - Fork 44
/
Copy pathfolder_searcher.py
103 lines (87 loc) · 3.74 KB
/
folder_searcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import functools
import os
import pathlib
import re
from fnmatch import translate
from typing import Generator, List, Optional, Pattern
def _is_included(
filename_include_regex: Pattern,
multipart_include_regex: Optional[Pattern],
path: pathlib.Path,
):
return filename_include_regex.match(path.name) and (
multipart_include_regex is None
or multipart_include_regex.match(str(path.resolve()))
)
def _is_excluded(
filename_exclude_regex: Optional[Pattern],
multipart_exclude_regex: Optional[Pattern],
path: pathlib.Path,
):
return (
filename_exclude_regex is not None and filename_exclude_regex.match(path.name)
) or (
multipart_exclude_regex is not None and multipart_exclude_regex.match(str(path))
)
def search_files(
folder_to_search: pathlib.Path,
folders_to_ignore: List[str],
*,
filename_include_regex: Pattern,
filename_exclude_regex: Optional[Pattern] = None,
multipart_include_regex: Optional[Pattern] = None,
multipart_exclude_regex: Optional[Pattern] = None,
search_for_directories: bool = False,
) -> Generator[pathlib.Path, None, None]:
""" "
Searches for files or directories in a given folder
Parameters:
folder_to_search (pathlib.Path): in which folder you want the search to be
folders_to_ignore (list of str): what folders inside the folder_to_search to ignore and not search inside
filename_include_regex (regex): Regex for filenames only, this does not include the full path of the file
filename_exclude_regex (regex): Regex for filenames only, this does not include the full path of the file
multipart_include_regex (regex): Regex for full path of the files you want to include
multipart_exclude_regex (regex): Regex for full path of the files you want to exclude
search_for_directories (bool)
"""
this_is_included = functools.partial(
_is_included, filename_include_regex, multipart_include_regex
)
this_is_excluded = functools.partial(
_is_excluded, filename_exclude_regex, multipart_exclude_regex
)
for dirpath, dirnames, filenames in os.walk(folder_to_search):
dirs_to_remove = set(d for d in dirnames if d in folders_to_ignore)
if multipart_exclude_regex is not None:
dirs_to_remove.union(
directory
for directory in dirnames
if multipart_exclude_regex.match(str(pathlib.Path(dirpath) / directory))
)
for directory in dirs_to_remove:
# Removing to ensure we don't even try to search those
# This is the documented way of doing this on python docs
dirnames.remove(directory)
if search_for_directories:
for directory in dirnames:
dir_path = pathlib.Path(dirpath) / directory
if not this_is_excluded(dir_path) and this_is_included(dir_path):
yield dir_path
else:
for single_filename in filenames:
file_path = pathlib.Path(dirpath) / single_filename
if not this_is_excluded(file_path) and this_is_included(file_path):
yield file_path
def globs_to_regex(patterns: List[str]) -> Optional[Pattern]:
"""
Converts a list of glob patterns to a combined ORed regex
Parameters:
patterns (List[str]): a list of globs, possibly empty
Returns:
(Pattern): a combined ORed regex, or None if patterns is an empty list
"""
# if patterns is an empty list, avoid returning re.compile("") since it matches everything
if not patterns:
return None
regex_str = ["(" + translate(pattern) + ")" for pattern in patterns]
return re.compile("|".join(regex_str))