ยปCore Development>Code coverage>Lib/distutils/filelist.py

Python code coverage for Lib/distutils/filelist.py

#countcontent
1n/a"""distutils.filelist
2n/a
3n/aProvides the FileList class, used for poking about the filesystem
4n/aand building lists of files.
5n/a"""
6n/a
7n/aimport os, re
8n/aimport fnmatch
9n/aimport functools
10n/afrom distutils.util import convert_path
11n/afrom distutils.errors import DistutilsTemplateError, DistutilsInternalError
12n/afrom distutils import log
13n/a
14n/aclass FileList:
15n/a """A list of files built by on exploring the filesystem and filtered by
16n/a applying various patterns to what we find there.
17n/a
18n/a Instance attributes:
19n/a dir
20n/a directory from which files will be taken -- only used if
21n/a 'allfiles' not supplied to constructor
22n/a files
23n/a list of filenames currently being built/filtered/manipulated
24n/a allfiles
25n/a complete list of files under consideration (ie. without any
26n/a filtering applied)
27n/a """
28n/a
29n/a def __init__(self, warn=None, debug_print=None):
30n/a # ignore argument to FileList, but keep them for backwards
31n/a # compatibility
32n/a self.allfiles = None
33n/a self.files = []
34n/a
35n/a def set_allfiles(self, allfiles):
36n/a self.allfiles = allfiles
37n/a
38n/a def findall(self, dir=os.curdir):
39n/a self.allfiles = findall(dir)
40n/a
41n/a def debug_print(self, msg):
42n/a """Print 'msg' to stdout if the global DEBUG (taken from the
43n/a DISTUTILS_DEBUG environment variable) flag is true.
44n/a """
45n/a from distutils.debug import DEBUG
46n/a if DEBUG:
47n/a print(msg)
48n/a
49n/a # -- List-like methods ---------------------------------------------
50n/a
51n/a def append(self, item):
52n/a self.files.append(item)
53n/a
54n/a def extend(self, items):
55n/a self.files.extend(items)
56n/a
57n/a def sort(self):
58n/a # Not a strict lexical sort!
59n/a sortable_files = sorted(map(os.path.split, self.files))
60n/a self.files = []
61n/a for sort_tuple in sortable_files:
62n/a self.files.append(os.path.join(*sort_tuple))
63n/a
64n/a
65n/a # -- Other miscellaneous utility methods ---------------------------
66n/a
67n/a def remove_duplicates(self):
68n/a # Assumes list has been sorted!
69n/a for i in range(len(self.files) - 1, 0, -1):
70n/a if self.files[i] == self.files[i - 1]:
71n/a del self.files[i]
72n/a
73n/a
74n/a # -- "File template" methods ---------------------------------------
75n/a
76n/a def _parse_template_line(self, line):
77n/a words = line.split()
78n/a action = words[0]
79n/a
80n/a patterns = dir = dir_pattern = None
81n/a
82n/a if action in ('include', 'exclude',
83n/a 'global-include', 'global-exclude'):
84n/a if len(words) < 2:
85n/a raise DistutilsTemplateError(
86n/a "'%s' expects <pattern1> <pattern2> ..." % action)
87n/a patterns = [convert_path(w) for w in words[1:]]
88n/a elif action in ('recursive-include', 'recursive-exclude'):
89n/a if len(words) < 3:
90n/a raise DistutilsTemplateError(
91n/a "'%s' expects <dir> <pattern1> <pattern2> ..." % action)
92n/a dir = convert_path(words[1])
93n/a patterns = [convert_path(w) for w in words[2:]]
94n/a elif action in ('graft', 'prune'):
95n/a if len(words) != 2:
96n/a raise DistutilsTemplateError(
97n/a "'%s' expects a single <dir_pattern>" % action)
98n/a dir_pattern = convert_path(words[1])
99n/a else:
100n/a raise DistutilsTemplateError("unknown action '%s'" % action)
101n/a
102n/a return (action, patterns, dir, dir_pattern)
103n/a
104n/a def process_template_line(self, line):
105n/a # Parse the line: split it up, make sure the right number of words
106n/a # is there, and return the relevant words. 'action' is always
107n/a # defined: it's the first word of the line. Which of the other
108n/a # three are defined depends on the action; it'll be either
109n/a # patterns, (dir and patterns), or (dir_pattern).
110n/a (action, patterns, dir, dir_pattern) = self._parse_template_line(line)
111n/a
112n/a # OK, now we know that the action is valid and we have the
113n/a # right number of words on the line for that action -- so we
114n/a # can proceed with minimal error-checking.
115n/a if action == 'include':
116n/a self.debug_print("include " + ' '.join(patterns))
117n/a for pattern in patterns:
118n/a if not self.include_pattern(pattern, anchor=1):
119n/a log.warn("warning: no files found matching '%s'",
120n/a pattern)
121n/a
122n/a elif action == 'exclude':
123n/a self.debug_print("exclude " + ' '.join(patterns))
124n/a for pattern in patterns:
125n/a if not self.exclude_pattern(pattern, anchor=1):
126n/a log.warn(("warning: no previously-included files "
127n/a "found matching '%s'"), pattern)
128n/a
129n/a elif action == 'global-include':
130n/a self.debug_print("global-include " + ' '.join(patterns))
131n/a for pattern in patterns:
132n/a if not self.include_pattern(pattern, anchor=0):
133n/a log.warn(("warning: no files found matching '%s' "
134n/a "anywhere in distribution"), pattern)
135n/a
136n/a elif action == 'global-exclude':
137n/a self.debug_print("global-exclude " + ' '.join(patterns))
138n/a for pattern in patterns:
139n/a if not self.exclude_pattern(pattern, anchor=0):
140n/a log.warn(("warning: no previously-included files matching "
141n/a "'%s' found anywhere in distribution"),
142n/a pattern)
143n/a
144n/a elif action == 'recursive-include':
145n/a self.debug_print("recursive-include %s %s" %
146n/a (dir, ' '.join(patterns)))
147n/a for pattern in patterns:
148n/a if not self.include_pattern(pattern, prefix=dir):
149n/a log.warn(("warning: no files found matching '%s' "
150n/a "under directory '%s'"),
151n/a pattern, dir)
152n/a
153n/a elif action == 'recursive-exclude':
154n/a self.debug_print("recursive-exclude %s %s" %
155n/a (dir, ' '.join(patterns)))
156n/a for pattern in patterns:
157n/a if not self.exclude_pattern(pattern, prefix=dir):
158n/a log.warn(("warning: no previously-included files matching "
159n/a "'%s' found under directory '%s'"),
160n/a pattern, dir)
161n/a
162n/a elif action == 'graft':
163n/a self.debug_print("graft " + dir_pattern)
164n/a if not self.include_pattern(None, prefix=dir_pattern):
165n/a log.warn("warning: no directories found matching '%s'",
166n/a dir_pattern)
167n/a
168n/a elif action == 'prune':
169n/a self.debug_print("prune " + dir_pattern)
170n/a if not self.exclude_pattern(None, prefix=dir_pattern):
171n/a log.warn(("no previously-included directories found "
172n/a "matching '%s'"), dir_pattern)
173n/a else:
174n/a raise DistutilsInternalError(
175n/a "this cannot happen: invalid action '%s'" % action)
176n/a
177n/a
178n/a # -- Filtering/selection methods -----------------------------------
179n/a
180n/a def include_pattern(self, pattern, anchor=1, prefix=None, is_regex=0):
181n/a """Select strings (presumably filenames) from 'self.files' that
182n/a match 'pattern', a Unix-style wildcard (glob) pattern. Patterns
183n/a are not quite the same as implemented by the 'fnmatch' module: '*'
184n/a and '?' match non-special characters, where "special" is platform-
185n/a dependent: slash on Unix; colon, slash, and backslash on
186n/a DOS/Windows; and colon on Mac OS.
187n/a
188n/a If 'anchor' is true (the default), then the pattern match is more
189n/a stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
190n/a 'anchor' is false, both of these will match.
191n/a
192n/a If 'prefix' is supplied, then only filenames starting with 'prefix'
193n/a (itself a pattern) and ending with 'pattern', with anything in between
194n/a them, will match. 'anchor' is ignored in this case.
195n/a
196n/a If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
197n/a 'pattern' is assumed to be either a string containing a regex or a
198n/a regex object -- no translation is done, the regex is just compiled
199n/a and used as-is.
200n/a
201n/a Selected strings will be added to self.files.
202n/a
203n/a Return True if files are found, False otherwise.
204n/a """
205n/a # XXX docstring lying about what the special chars are?
206n/a files_found = False
207n/a pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
208n/a self.debug_print("include_pattern: applying regex r'%s'" %
209n/a pattern_re.pattern)
210n/a
211n/a # delayed loading of allfiles list
212n/a if self.allfiles is None:
213n/a self.findall()
214n/a
215n/a for name in self.allfiles:
216n/a if pattern_re.search(name):
217n/a self.debug_print(" adding " + name)
218n/a self.files.append(name)
219n/a files_found = True
220n/a return files_found
221n/a
222n/a
223n/a def exclude_pattern (self, pattern,
224n/a anchor=1, prefix=None, is_regex=0):
225n/a """Remove strings (presumably filenames) from 'files' that match
226n/a 'pattern'. Other parameters are the same as for
227n/a 'include_pattern()', above.
228n/a The list 'self.files' is modified in place.
229n/a Return True if files are found, False otherwise.
230n/a """
231n/a files_found = False
232n/a pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
233n/a self.debug_print("exclude_pattern: applying regex r'%s'" %
234n/a pattern_re.pattern)
235n/a for i in range(len(self.files)-1, -1, -1):
236n/a if pattern_re.search(self.files[i]):
237n/a self.debug_print(" removing " + self.files[i])
238n/a del self.files[i]
239n/a files_found = True
240n/a return files_found
241n/a
242n/a
243n/a# ----------------------------------------------------------------------
244n/a# Utility functions
245n/a
246n/adef _find_all_simple(path):
247n/a """
248n/a Find all files under 'path'
249n/a """
250n/a results = (
251n/a os.path.join(base, file)
252n/a for base, dirs, files in os.walk(path, followlinks=True)
253n/a for file in files
254n/a )
255n/a return filter(os.path.isfile, results)
256n/a
257n/a
258n/adef findall(dir=os.curdir):
259n/a """
260n/a Find all files under 'dir' and return the list of full filenames.
261n/a Unless dir is '.', return full filenames with dir prepended.
262n/a """
263n/a files = _find_all_simple(dir)
264n/a if dir == os.curdir:
265n/a make_rel = functools.partial(os.path.relpath, start=dir)
266n/a files = map(make_rel, files)
267n/a return list(files)
268n/a
269n/a
270n/adef glob_to_re(pattern):
271n/a """Translate a shell-like glob pattern to a regular expression; return
272n/a a string containing the regex. Differs from 'fnmatch.translate()' in
273n/a that '*' does not match "special characters" (which are
274n/a platform-specific).
275n/a """
276n/a pattern_re = fnmatch.translate(pattern)
277n/a
278n/a # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
279n/a # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
280n/a # and by extension they shouldn't match such "special characters" under
281n/a # any OS. So change all non-escaped dots in the RE to match any
282n/a # character except the special characters (currently: just os.sep).
283n/a sep = os.sep
284n/a if os.sep == '\\':
285n/a # we're using a regex to manipulate a regex, so we need
286n/a # to escape the backslash twice
287n/a sep = r'\\\\'
288n/a escaped = r'\1[^%s]' % sep
289n/a pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
290n/a return pattern_re
291n/a
292n/a
293n/adef translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
294n/a """Translate a shell-like wildcard pattern to a compiled regular
295n/a expression. Return the compiled regex. If 'is_regex' true,
296n/a then 'pattern' is directly compiled to a regex (if it's a string)
297n/a or just returned as-is (assumes it's a regex object).
298n/a """
299n/a if is_regex:
300n/a if isinstance(pattern, str):
301n/a return re.compile(pattern)
302n/a else:
303n/a return pattern
304n/a
305n/a # ditch start and end characters
306n/a start, _, end = glob_to_re('_').partition('_')
307n/a
308n/a if pattern:
309n/a pattern_re = glob_to_re(pattern)
310n/a assert pattern_re.startswith(start) and pattern_re.endswith(end)
311n/a else:
312n/a pattern_re = ''
313n/a
314n/a if prefix is not None:
315n/a prefix_re = glob_to_re(prefix)
316n/a assert prefix_re.startswith(start) and prefix_re.endswith(end)
317n/a prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
318n/a sep = os.sep
319n/a if os.sep == '\\':
320n/a sep = r'\\'
321n/a pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
322n/a pattern_re = r'%s\A%s%s.*%s%s' % (start, prefix_re, sep, pattern_re, end)
323n/a else: # no prefix -- respect anchor flag
324n/a if anchor:
325n/a pattern_re = r'%s\A%s' % (start, pattern_re[len(start):])
326n/a
327n/a return re.compile(pattern_re)