ยปCore Development>Code coverage>Lib/packaging/manifest.py

Python code coverage for Lib/packaging/manifest.py

#countcontent
1n/a"""Class representing the list of files in a distribution.
2n/a
3n/aThe Manifest class can be used to:
4n/a
5n/a - read or write a MANIFEST file
6n/a - read a template file and find out the file list
7n/a"""
8n/a# XXX todo: document + add tests
9n/aimport re
10n/aimport os
11n/aimport fnmatch
12n/a
13n/afrom packaging import logger
14n/afrom packaging.util import write_file, convert_path
15n/afrom packaging.errors import (PackagingTemplateError,
16n/a PackagingInternalError)
17n/a
18n/a__all__ = ['Manifest']
19n/a
20n/a# a \ followed by some spaces + EOL
21n/a_COLLAPSE_PATTERN = re.compile('\\\w*\n', re.M)
22n/a_COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S)
23n/a
24n/a
25n/aclass Manifest(object):
26n/a """A list of files built by on exploring the filesystem and filtered by
27n/a applying various patterns to what we find there.
28n/a """
29n/a
30n/a def __init__(self):
31n/a self.allfiles = None
32n/a self.files = []
33n/a
34n/a #
35n/a # Public API
36n/a #
37n/a
38n/a def findall(self, dir=os.curdir):
39n/a self.allfiles = _findall(dir)
40n/a
41n/a def append(self, item):
42n/a self.files.append(item)
43n/a
44n/a def extend(self, items):
45n/a self.files.extend(items)
46n/a
47n/a def sort(self):
48n/a # Not a strict lexical sort!
49n/a self.files = [os.path.join(*path_tuple) for path_tuple in
50n/a sorted(os.path.split(path) for path in self.files)]
51n/a
52n/a def clear(self):
53n/a """Clear all collected files."""
54n/a self.files = []
55n/a if self.allfiles is not None:
56n/a self.allfiles = []
57n/a
58n/a def remove_duplicates(self):
59n/a # Assumes list has been sorted!
60n/a for i in range(len(self.files) - 1, 0, -1):
61n/a if self.files[i] == self.files[i - 1]:
62n/a del self.files[i]
63n/a
64n/a def read_template(self, path_or_file):
65n/a """Read and parse a manifest template file.
66n/a 'path' can be a path or a file-like object.
67n/a
68n/a Updates the list accordingly.
69n/a """
70n/a if isinstance(path_or_file, str):
71n/a f = open(path_or_file)
72n/a else:
73n/a f = path_or_file
74n/a
75n/a try:
76n/a content = f.read()
77n/a # first, let's unwrap collapsed lines
78n/a content = _COLLAPSE_PATTERN.sub('', content)
79n/a # next, let's remove commented lines and empty lines
80n/a content = _COMMENTED_LINE.sub('', content)
81n/a
82n/a # now we have our cleaned up lines
83n/a lines = [line.strip() for line in content.split('\n')]
84n/a finally:
85n/a f.close()
86n/a
87n/a for line in lines:
88n/a if line == '':
89n/a continue
90n/a try:
91n/a self._process_template_line(line)
92n/a except PackagingTemplateError as msg:
93n/a logger.warning("%s, %s", path_or_file, msg)
94n/a
95n/a def write(self, path):
96n/a """Write the file list in 'self.filelist' (presumably as filled in
97n/a by 'add_defaults()' and 'read_template()') to the manifest file
98n/a named by 'self.manifest'.
99n/a """
100n/a if os.path.isfile(path):
101n/a with open(path) as fp:
102n/a first_line = fp.readline()
103n/a
104n/a if first_line != '# file GENERATED by packaging, do NOT edit\n':
105n/a logger.info("not writing to manually maintained "
106n/a "manifest file %r", path)
107n/a return
108n/a
109n/a self.sort()
110n/a self.remove_duplicates()
111n/a content = self.files[:]
112n/a content.insert(0, '# file GENERATED by packaging, do NOT edit')
113n/a logger.info("writing manifest file %r", path)
114n/a write_file(path, content)
115n/a
116n/a def read(self, path):
117n/a """Read the manifest file (named by 'self.manifest') and use it to
118n/a fill in 'self.filelist', the list of files to include in the source
119n/a distribution.
120n/a """
121n/a logger.info("reading manifest file %r", path)
122n/a with open(path) as manifest:
123n/a for line in manifest.readlines():
124n/a self.append(line)
125n/a
126n/a def exclude_pattern(self, pattern, anchor=True, prefix=None,
127n/a is_regex=False):
128n/a """Remove strings (presumably filenames) from 'files' that match
129n/a 'pattern'.
130n/a
131n/a Other parameters are the same as for 'include_pattern()', above.
132n/a The list 'self.files' is modified in place. Return True if files are
133n/a found.
134n/a """
135n/a files_found = False
136n/a pattern_re = _translate_pattern(pattern, anchor, prefix, is_regex)
137n/a for i in range(len(self.files) - 1, -1, -1):
138n/a if pattern_re.search(self.files[i]):
139n/a del self.files[i]
140n/a files_found = True
141n/a
142n/a return files_found
143n/a
144n/a #
145n/a # Private API
146n/a #
147n/a
148n/a def _parse_template_line(self, line):
149n/a words = line.split()
150n/a if len(words) == 1 and words[0] not in (
151n/a 'include', 'exclude', 'global-include', 'global-exclude',
152n/a 'recursive-include', 'recursive-exclude', 'graft', 'prune'):
153n/a # no action given, let's use the default 'include'
154n/a words.insert(0, 'include')
155n/a
156n/a action = words[0]
157n/a patterns = dir = dir_pattern = None
158n/a
159n/a if action in ('include', 'exclude',
160n/a 'global-include', 'global-exclude'):
161n/a if len(words) < 2:
162n/a raise PackagingTemplateError(
163n/a "%r expects <pattern1> <pattern2> ..." % action)
164n/a
165n/a patterns = [convert_path(word) for word in words[1:]]
166n/a
167n/a elif action in ('recursive-include', 'recursive-exclude'):
168n/a if len(words) < 3:
169n/a raise PackagingTemplateError(
170n/a "%r expects <dir> <pattern1> <pattern2> ..." % action)
171n/a
172n/a dir = convert_path(words[1])
173n/a patterns = [convert_path(word) for word in words[2:]]
174n/a
175n/a elif action in ('graft', 'prune'):
176n/a if len(words) != 2:
177n/a raise PackagingTemplateError(
178n/a "%r expects a single <dir_pattern>" % action)
179n/a
180n/a dir_pattern = convert_path(words[1])
181n/a
182n/a else:
183n/a raise PackagingTemplateError("unknown action %r" % action)
184n/a
185n/a return action, patterns, dir, dir_pattern
186n/a
187n/a def _process_template_line(self, line):
188n/a # Parse the line: split it up, make sure the right number of words
189n/a # is there, and return the relevant words. 'action' is always
190n/a # defined: it's the first word of the line. Which of the other
191n/a # three are defined depends on the action; it'll be either
192n/a # patterns, (dir and patterns), or (dir_pattern).
193n/a action, patterns, dir, dir_pattern = self._parse_template_line(line)
194n/a
195n/a # OK, now we know that the action is valid and we have the
196n/a # right number of words on the line for that action -- so we
197n/a # can proceed with minimal error-checking.
198n/a if action == 'include':
199n/a for pattern in patterns:
200n/a if not self._include_pattern(pattern, anchor=True):
201n/a logger.warning("no files found matching %r", pattern)
202n/a
203n/a elif action == 'exclude':
204n/a for pattern in patterns:
205n/a if not self.exclude_pattern(pattern, anchor=True):
206n/a logger.warning("no previously-included files "
207n/a "found matching %r", pattern)
208n/a
209n/a elif action == 'global-include':
210n/a for pattern in patterns:
211n/a if not self._include_pattern(pattern, anchor=False):
212n/a logger.warning("no files found matching %r "
213n/a "anywhere in distribution", pattern)
214n/a
215n/a elif action == 'global-exclude':
216n/a for pattern in patterns:
217n/a if not self.exclude_pattern(pattern, anchor=False):
218n/a logger.warning("no previously-included files "
219n/a "matching %r found anywhere in "
220n/a "distribution", pattern)
221n/a
222n/a elif action == 'recursive-include':
223n/a for pattern in patterns:
224n/a if not self._include_pattern(pattern, prefix=dir):
225n/a logger.warning("no files found matching %r "
226n/a "under directory %r", pattern, dir)
227n/a
228n/a elif action == 'recursive-exclude':
229n/a for pattern in patterns:
230n/a if not self.exclude_pattern(pattern, prefix=dir):
231n/a logger.warning("no previously-included files "
232n/a "matching %r found under directory %r",
233n/a pattern, dir)
234n/a
235n/a elif action == 'graft':
236n/a if not self._include_pattern(None, prefix=dir_pattern):
237n/a logger.warning("no directories found matching %r",
238n/a dir_pattern)
239n/a
240n/a elif action == 'prune':
241n/a if not self.exclude_pattern(None, prefix=dir_pattern):
242n/a logger.warning("no previously-included directories found "
243n/a "matching %r", dir_pattern)
244n/a else:
245n/a raise PackagingInternalError(
246n/a "this cannot happen: invalid action %r" % action)
247n/a
248n/a def _include_pattern(self, pattern, anchor=True, prefix=None,
249n/a is_regex=False):
250n/a """Select strings (presumably filenames) from 'self.files' that
251n/a match 'pattern', a Unix-style wildcard (glob) pattern.
252n/a
253n/a Patterns are not quite the same as implemented by the 'fnmatch'
254n/a module: '*' and '?' match non-special characters, where "special"
255n/a is platform-dependent: slash on Unix; colon, slash, and backslash on
256n/a DOS/Windows; and colon on Mac OS.
257n/a
258n/a If 'anchor' is true (the default), then the pattern match is more
259n/a stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
260n/a 'anchor' is false, both of these will match.
261n/a
262n/a If 'prefix' is supplied, then only filenames starting with 'prefix'
263n/a (itself a pattern) and ending with 'pattern', with anything in between
264n/a them, will match. 'anchor' is ignored in this case.
265n/a
266n/a If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
267n/a 'pattern' is assumed to be either a string containing a regex or a
268n/a regex object -- no translation is done, the regex is just compiled
269n/a and used as-is.
270n/a
271n/a Selected strings will be added to self.files.
272n/a
273n/a Return True if files are found.
274n/a """
275n/a # XXX docstring lying about what the special chars are?
276n/a files_found = False
277n/a pattern_re = _translate_pattern(pattern, anchor, prefix, is_regex)
278n/a
279n/a # delayed loading of allfiles list
280n/a if self.allfiles is None:
281n/a self.findall()
282n/a
283n/a for name in self.allfiles:
284n/a if pattern_re.search(name):
285n/a self.files.append(name)
286n/a files_found = True
287n/a
288n/a return files_found
289n/a
290n/a
291n/a#
292n/a# Utility functions
293n/a#
294n/adef _findall(dir=os.curdir):
295n/a """Find all files under 'dir' and return the list of full filenames
296n/a (relative to 'dir').
297n/a """
298n/a from stat import S_ISREG, S_ISDIR, S_ISLNK
299n/a
300n/a list = []
301n/a stack = [dir]
302n/a pop = stack.pop
303n/a push = stack.append
304n/a
305n/a while stack:
306n/a dir = pop()
307n/a names = os.listdir(dir)
308n/a
309n/a for name in names:
310n/a if dir != os.curdir: # avoid the dreaded "./" syndrome
311n/a fullname = os.path.join(dir, name)
312n/a else:
313n/a fullname = name
314n/a
315n/a # Avoid excess stat calls -- just one will do, thank you!
316n/a stat = os.stat(fullname)
317n/a mode = stat.st_mode
318n/a if S_ISREG(mode):
319n/a list.append(fullname)
320n/a elif S_ISDIR(mode) and not S_ISLNK(mode):
321n/a push(fullname)
322n/a
323n/a return list
324n/a
325n/a
326n/adef _glob_to_re(pattern):
327n/a """Translate a shell-like glob pattern to a regular expression.
328n/a
329n/a Return a string containing the regex. Differs from
330n/a 'fnmatch.translate()' in that '*' does not match "special characters"
331n/a (which are platform-specific).
332n/a """
333n/a pattern_re = fnmatch.translate(pattern)
334n/a
335n/a # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
336n/a # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
337n/a # and by extension they shouldn't match such "special characters" under
338n/a # any OS. So change all non-escaped dots in the RE to match any
339n/a # character except the special characters (currently: just os.sep).
340n/a sep = os.sep
341n/a if os.sep == '\\':
342n/a # we're using a regex to manipulate a regex, so we need
343n/a # to escape the backslash twice
344n/a sep = r'\\\\'
345n/a escaped = r'\1[^%s]' % sep
346n/a pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
347n/a return pattern_re
348n/a
349n/a
350n/adef _translate_pattern(pattern, anchor=True, prefix=None, is_regex=False):
351n/a """Translate a shell-like wildcard pattern to a compiled regular
352n/a expression.
353n/a
354n/a Return the compiled regex. If 'is_regex' true,
355n/a then 'pattern' is directly compiled to a regex (if it's a string)
356n/a or just returned as-is (assumes it's a regex object).
357n/a """
358n/a if is_regex:
359n/a if isinstance(pattern, str):
360n/a return re.compile(pattern)
361n/a else:
362n/a return pattern
363n/a
364n/a if pattern:
365n/a pattern_re = _glob_to_re(pattern)
366n/a else:
367n/a pattern_re = ''
368n/a
369n/a if prefix is not None:
370n/a # ditch end of pattern character
371n/a empty_pattern = _glob_to_re('')
372n/a prefix_re = _glob_to_re(prefix)[:-len(empty_pattern)]
373n/a sep = os.sep
374n/a if os.sep == '\\':
375n/a sep = r'\\'
376n/a pattern_re = "^" + sep.join((prefix_re, ".*" + pattern_re))
377n/a else: # no prefix -- respect anchor flag
378n/a if anchor:
379n/a pattern_re = "^" + pattern_re
380n/a
381n/a return re.compile(pattern_re)