ยปCore Development>Code coverage>Lib/modulefinder.py

Python code coverage for Lib/modulefinder.py

#countcontent
1n/a"""Find modules used by a script, using introspection."""
2n/a
3n/aimport dis
4n/aimport importlib._bootstrap_external
5n/aimport importlib.machinery
6n/aimport marshal
7n/aimport os
8n/aimport sys
9n/aimport types
10n/aimport warnings
11n/awith warnings.catch_warnings():
12n/a warnings.simplefilter('ignore', DeprecationWarning)
13n/a import imp
14n/a
15n/aLOAD_CONST = dis.opmap['LOAD_CONST']
16n/aIMPORT_NAME = dis.opmap['IMPORT_NAME']
17n/aSTORE_NAME = dis.opmap['STORE_NAME']
18n/aSTORE_GLOBAL = dis.opmap['STORE_GLOBAL']
19n/aSTORE_OPS = STORE_NAME, STORE_GLOBAL
20n/aEXTENDED_ARG = dis.EXTENDED_ARG
21n/a
22n/a# Modulefinder does a good job at simulating Python's, but it can not
23n/a# handle __path__ modifications packages make at runtime. Therefore there
24n/a# is a mechanism whereby you can register extra paths in this map for a
25n/a# package, and it will be honored.
26n/a
27n/a# Note this is a mapping is lists of paths.
28n/apackagePathMap = {}
29n/a
30n/a# A Public interface
31n/adef AddPackagePath(packagename, path):
32n/a packagePathMap.setdefault(packagename, []).append(path)
33n/a
34n/areplacePackageMap = {}
35n/a
36n/a# This ReplacePackage mechanism allows modulefinder to work around
37n/a# situations in which a package injects itself under the name
38n/a# of another package into sys.modules at runtime by calling
39n/a# ReplacePackage("real_package_name", "faked_package_name")
40n/a# before running ModuleFinder.
41n/a
42n/adef ReplacePackage(oldname, newname):
43n/a replacePackageMap[oldname] = newname
44n/a
45n/a
46n/aclass Module:
47n/a
48n/a def __init__(self, name, file=None, path=None):
49n/a self.__name__ = name
50n/a self.__file__ = file
51n/a self.__path__ = path
52n/a self.__code__ = None
53n/a # The set of global names that are assigned to in the module.
54n/a # This includes those names imported through starimports of
55n/a # Python modules.
56n/a self.globalnames = {}
57n/a # The set of starimports this module did that could not be
58n/a # resolved, ie. a starimport from a non-Python module.
59n/a self.starimports = {}
60n/a
61n/a def __repr__(self):
62n/a s = "Module(%r" % (self.__name__,)
63n/a if self.__file__ is not None:
64n/a s = s + ", %r" % (self.__file__,)
65n/a if self.__path__ is not None:
66n/a s = s + ", %r" % (self.__path__,)
67n/a s = s + ")"
68n/a return s
69n/a
70n/aclass ModuleFinder:
71n/a
72n/a def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
73n/a if path is None:
74n/a path = sys.path
75n/a self.path = path
76n/a self.modules = {}
77n/a self.badmodules = {}
78n/a self.debug = debug
79n/a self.indent = 0
80n/a self.excludes = excludes
81n/a self.replace_paths = replace_paths
82n/a self.processed_paths = [] # Used in debugging only
83n/a
84n/a def msg(self, level, str, *args):
85n/a if level <= self.debug:
86n/a for i in range(self.indent):
87n/a print(" ", end=' ')
88n/a print(str, end=' ')
89n/a for arg in args:
90n/a print(repr(arg), end=' ')
91n/a print()
92n/a
93n/a def msgin(self, *args):
94n/a level = args[0]
95n/a if level <= self.debug:
96n/a self.indent = self.indent + 1
97n/a self.msg(*args)
98n/a
99n/a def msgout(self, *args):
100n/a level = args[0]
101n/a if level <= self.debug:
102n/a self.indent = self.indent - 1
103n/a self.msg(*args)
104n/a
105n/a def run_script(self, pathname):
106n/a self.msg(2, "run_script", pathname)
107n/a with open(pathname) as fp:
108n/a stuff = ("", "r", imp.PY_SOURCE)
109n/a self.load_module('__main__', fp, pathname, stuff)
110n/a
111n/a def load_file(self, pathname):
112n/a dir, name = os.path.split(pathname)
113n/a name, ext = os.path.splitext(name)
114n/a with open(pathname) as fp:
115n/a stuff = (ext, "r", imp.PY_SOURCE)
116n/a self.load_module(name, fp, pathname, stuff)
117n/a
118n/a def import_hook(self, name, caller=None, fromlist=None, level=-1):
119n/a self.msg(3, "import_hook", name, caller, fromlist, level)
120n/a parent = self.determine_parent(caller, level=level)
121n/a q, tail = self.find_head_package(parent, name)
122n/a m = self.load_tail(q, tail)
123n/a if not fromlist:
124n/a return q
125n/a if m.__path__:
126n/a self.ensure_fromlist(m, fromlist)
127n/a return None
128n/a
129n/a def determine_parent(self, caller, level=-1):
130n/a self.msgin(4, "determine_parent", caller, level)
131n/a if not caller or level == 0:
132n/a self.msgout(4, "determine_parent -> None")
133n/a return None
134n/a pname = caller.__name__
135n/a if level >= 1: # relative import
136n/a if caller.__path__:
137n/a level -= 1
138n/a if level == 0:
139n/a parent = self.modules[pname]
140n/a assert parent is caller
141n/a self.msgout(4, "determine_parent ->", parent)
142n/a return parent
143n/a if pname.count(".") < level:
144n/a raise ImportError("relative importpath too deep")
145n/a pname = ".".join(pname.split(".")[:-level])
146n/a parent = self.modules[pname]
147n/a self.msgout(4, "determine_parent ->", parent)
148n/a return parent
149n/a if caller.__path__:
150n/a parent = self.modules[pname]
151n/a assert caller is parent
152n/a self.msgout(4, "determine_parent ->", parent)
153n/a return parent
154n/a if '.' in pname:
155n/a i = pname.rfind('.')
156n/a pname = pname[:i]
157n/a parent = self.modules[pname]
158n/a assert parent.__name__ == pname
159n/a self.msgout(4, "determine_parent ->", parent)
160n/a return parent
161n/a self.msgout(4, "determine_parent -> None")
162n/a return None
163n/a
164n/a def find_head_package(self, parent, name):
165n/a self.msgin(4, "find_head_package", parent, name)
166n/a if '.' in name:
167n/a i = name.find('.')
168n/a head = name[:i]
169n/a tail = name[i+1:]
170n/a else:
171n/a head = name
172n/a tail = ""
173n/a if parent:
174n/a qname = "%s.%s" % (parent.__name__, head)
175n/a else:
176n/a qname = head
177n/a q = self.import_module(head, qname, parent)
178n/a if q:
179n/a self.msgout(4, "find_head_package ->", (q, tail))
180n/a return q, tail
181n/a if parent:
182n/a qname = head
183n/a parent = None
184n/a q = self.import_module(head, qname, parent)
185n/a if q:
186n/a self.msgout(4, "find_head_package ->", (q, tail))
187n/a return q, tail
188n/a self.msgout(4, "raise ImportError: No module named", qname)
189n/a raise ImportError("No module named " + qname)
190n/a
191n/a def load_tail(self, q, tail):
192n/a self.msgin(4, "load_tail", q, tail)
193n/a m = q
194n/a while tail:
195n/a i = tail.find('.')
196n/a if i < 0: i = len(tail)
197n/a head, tail = tail[:i], tail[i+1:]
198n/a mname = "%s.%s" % (m.__name__, head)
199n/a m = self.import_module(head, mname, m)
200n/a if not m:
201n/a self.msgout(4, "raise ImportError: No module named", mname)
202n/a raise ImportError("No module named " + mname)
203n/a self.msgout(4, "load_tail ->", m)
204n/a return m
205n/a
206n/a def ensure_fromlist(self, m, fromlist, recursive=0):
207n/a self.msg(4, "ensure_fromlist", m, fromlist, recursive)
208n/a for sub in fromlist:
209n/a if sub == "*":
210n/a if not recursive:
211n/a all = self.find_all_submodules(m)
212n/a if all:
213n/a self.ensure_fromlist(m, all, 1)
214n/a elif not hasattr(m, sub):
215n/a subname = "%s.%s" % (m.__name__, sub)
216n/a submod = self.import_module(sub, subname, m)
217n/a if not submod:
218n/a raise ImportError("No module named " + subname)
219n/a
220n/a def find_all_submodules(self, m):
221n/a if not m.__path__:
222n/a return
223n/a modules = {}
224n/a # 'suffixes' used to be a list hardcoded to [".py", ".pyc"].
225n/a # But we must also collect Python extension modules - although
226n/a # we cannot separate normal dlls from Python extensions.
227n/a suffixes = []
228n/a suffixes += importlib.machinery.EXTENSION_SUFFIXES[:]
229n/a suffixes += importlib.machinery.SOURCE_SUFFIXES[:]
230n/a suffixes += importlib.machinery.BYTECODE_SUFFIXES[:]
231n/a for dir in m.__path__:
232n/a try:
233n/a names = os.listdir(dir)
234n/a except OSError:
235n/a self.msg(2, "can't list directory", dir)
236n/a continue
237n/a for name in names:
238n/a mod = None
239n/a for suff in suffixes:
240n/a n = len(suff)
241n/a if name[-n:] == suff:
242n/a mod = name[:-n]
243n/a break
244n/a if mod and mod != "__init__":
245n/a modules[mod] = mod
246n/a return modules.keys()
247n/a
248n/a def import_module(self, partname, fqname, parent):
249n/a self.msgin(3, "import_module", partname, fqname, parent)
250n/a try:
251n/a m = self.modules[fqname]
252n/a except KeyError:
253n/a pass
254n/a else:
255n/a self.msgout(3, "import_module ->", m)
256n/a return m
257n/a if fqname in self.badmodules:
258n/a self.msgout(3, "import_module -> None")
259n/a return None
260n/a if parent and parent.__path__ is None:
261n/a self.msgout(3, "import_module -> None")
262n/a return None
263n/a try:
264n/a fp, pathname, stuff = self.find_module(partname,
265n/a parent and parent.__path__, parent)
266n/a except ImportError:
267n/a self.msgout(3, "import_module ->", None)
268n/a return None
269n/a try:
270n/a m = self.load_module(fqname, fp, pathname, stuff)
271n/a finally:
272n/a if fp:
273n/a fp.close()
274n/a if parent:
275n/a setattr(parent, partname, m)
276n/a self.msgout(3, "import_module ->", m)
277n/a return m
278n/a
279n/a def load_module(self, fqname, fp, pathname, file_info):
280n/a suffix, mode, type = file_info
281n/a self.msgin(2, "load_module", fqname, fp and "fp", pathname)
282n/a if type == imp.PKG_DIRECTORY:
283n/a m = self.load_package(fqname, pathname)
284n/a self.msgout(2, "load_module ->", m)
285n/a return m
286n/a if type == imp.PY_SOURCE:
287n/a co = compile(fp.read()+'\n', pathname, 'exec')
288n/a elif type == imp.PY_COMPILED:
289n/a try:
290n/a marshal_data = importlib._bootstrap_external._validate_bytecode_header(fp.read())
291n/a except ImportError as exc:
292n/a self.msgout(2, "raise ImportError: " + str(exc), pathname)
293n/a raise
294n/a co = marshal.loads(marshal_data)
295n/a else:
296n/a co = None
297n/a m = self.add_module(fqname)
298n/a m.__file__ = pathname
299n/a if co:
300n/a if self.replace_paths:
301n/a co = self.replace_paths_in_code(co)
302n/a m.__code__ = co
303n/a self.scan_code(co, m)
304n/a self.msgout(2, "load_module ->", m)
305n/a return m
306n/a
307n/a def _add_badmodule(self, name, caller):
308n/a if name not in self.badmodules:
309n/a self.badmodules[name] = {}
310n/a if caller:
311n/a self.badmodules[name][caller.__name__] = 1
312n/a else:
313n/a self.badmodules[name]["-"] = 1
314n/a
315n/a def _safe_import_hook(self, name, caller, fromlist, level=-1):
316n/a # wrapper for self.import_hook() that won't raise ImportError
317n/a if name in self.badmodules:
318n/a self._add_badmodule(name, caller)
319n/a return
320n/a try:
321n/a self.import_hook(name, caller, level=level)
322n/a except ImportError as msg:
323n/a self.msg(2, "ImportError:", str(msg))
324n/a self._add_badmodule(name, caller)
325n/a else:
326n/a if fromlist:
327n/a for sub in fromlist:
328n/a if sub in self.badmodules:
329n/a self._add_badmodule(sub, caller)
330n/a continue
331n/a try:
332n/a self.import_hook(name, caller, [sub], level=level)
333n/a except ImportError as msg:
334n/a self.msg(2, "ImportError:", str(msg))
335n/a fullname = name + "." + sub
336n/a self._add_badmodule(fullname, caller)
337n/a
338n/a def scan_opcodes(self, co):
339n/a # Scan the code, and yield 'interesting' opcode combinations
340n/a code = co.co_code
341n/a names = co.co_names
342n/a consts = co.co_consts
343n/a opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
344n/a if op != EXTENDED_ARG]
345n/a for i, (op, oparg) in enumerate(opargs):
346n/a if op in STORE_OPS:
347n/a yield "store", (names[oparg],)
348n/a continue
349n/a if (op == IMPORT_NAME and i >= 2
350n/a and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
351n/a level = consts[opargs[i-2][1]]
352n/a fromlist = consts[opargs[i-1][1]]
353n/a if level == 0: # absolute import
354n/a yield "absolute_import", (fromlist, names[oparg])
355n/a else: # relative import
356n/a yield "relative_import", (level, fromlist, names[oparg])
357n/a continue
358n/a
359n/a def scan_code(self, co, m):
360n/a code = co.co_code
361n/a scanner = self.scan_opcodes
362n/a for what, args in scanner(co):
363n/a if what == "store":
364n/a name, = args
365n/a m.globalnames[name] = 1
366n/a elif what == "absolute_import":
367n/a fromlist, name = args
368n/a have_star = 0
369n/a if fromlist is not None:
370n/a if "*" in fromlist:
371n/a have_star = 1
372n/a fromlist = [f for f in fromlist if f != "*"]
373n/a self._safe_import_hook(name, m, fromlist, level=0)
374n/a if have_star:
375n/a # We've encountered an "import *". If it is a Python module,
376n/a # the code has already been parsed and we can suck out the
377n/a # global names.
378n/a mm = None
379n/a if m.__path__:
380n/a # At this point we don't know whether 'name' is a
381n/a # submodule of 'm' or a global module. Let's just try
382n/a # the full name first.
383n/a mm = self.modules.get(m.__name__ + "." + name)
384n/a if mm is None:
385n/a mm = self.modules.get(name)
386n/a if mm is not None:
387n/a m.globalnames.update(mm.globalnames)
388n/a m.starimports.update(mm.starimports)
389n/a if mm.__code__ is None:
390n/a m.starimports[name] = 1
391n/a else:
392n/a m.starimports[name] = 1
393n/a elif what == "relative_import":
394n/a level, fromlist, name = args
395n/a if name:
396n/a self._safe_import_hook(name, m, fromlist, level=level)
397n/a else:
398n/a parent = self.determine_parent(m, level=level)
399n/a self._safe_import_hook(parent.__name__, None, fromlist, level=0)
400n/a else:
401n/a # We don't expect anything else from the generator.
402n/a raise RuntimeError(what)
403n/a
404n/a for c in co.co_consts:
405n/a if isinstance(c, type(co)):
406n/a self.scan_code(c, m)
407n/a
408n/a def load_package(self, fqname, pathname):
409n/a self.msgin(2, "load_package", fqname, pathname)
410n/a newname = replacePackageMap.get(fqname)
411n/a if newname:
412n/a fqname = newname
413n/a m = self.add_module(fqname)
414n/a m.__file__ = pathname
415n/a m.__path__ = [pathname]
416n/a
417n/a # As per comment at top of file, simulate runtime __path__ additions.
418n/a m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
419n/a
420n/a fp, buf, stuff = self.find_module("__init__", m.__path__)
421n/a try:
422n/a self.load_module(fqname, fp, buf, stuff)
423n/a self.msgout(2, "load_package ->", m)
424n/a return m
425n/a finally:
426n/a if fp:
427n/a fp.close()
428n/a
429n/a def add_module(self, fqname):
430n/a if fqname in self.modules:
431n/a return self.modules[fqname]
432n/a self.modules[fqname] = m = Module(fqname)
433n/a return m
434n/a
435n/a def find_module(self, name, path, parent=None):
436n/a if parent is not None:
437n/a # assert path is not None
438n/a fullname = parent.__name__+'.'+name
439n/a else:
440n/a fullname = name
441n/a if fullname in self.excludes:
442n/a self.msgout(3, "find_module -> Excluded", fullname)
443n/a raise ImportError(name)
444n/a
445n/a if path is None:
446n/a if name in sys.builtin_module_names:
447n/a return (None, None, ("", "", imp.C_BUILTIN))
448n/a
449n/a path = self.path
450n/a return imp.find_module(name, path)
451n/a
452n/a def report(self):
453n/a """Print a report to stdout, listing the found modules with their
454n/a paths, as well as modules that are missing, or seem to be missing.
455n/a """
456n/a print()
457n/a print(" %-25s %s" % ("Name", "File"))
458n/a print(" %-25s %s" % ("----", "----"))
459n/a # Print modules found
460n/a keys = sorted(self.modules.keys())
461n/a for key in keys:
462n/a m = self.modules[key]
463n/a if m.__path__:
464n/a print("P", end=' ')
465n/a else:
466n/a print("m", end=' ')
467n/a print("%-25s" % key, m.__file__ or "")
468n/a
469n/a # Print missing modules
470n/a missing, maybe = self.any_missing_maybe()
471n/a if missing:
472n/a print()
473n/a print("Missing modules:")
474n/a for name in missing:
475n/a mods = sorted(self.badmodules[name].keys())
476n/a print("?", name, "imported from", ', '.join(mods))
477n/a # Print modules that may be missing, but then again, maybe not...
478n/a if maybe:
479n/a print()
480n/a print("Submodules that appear to be missing, but could also be", end=' ')
481n/a print("global names in the parent package:")
482n/a for name in maybe:
483n/a mods = sorted(self.badmodules[name].keys())
484n/a print("?", name, "imported from", ', '.join(mods))
485n/a
486n/a def any_missing(self):
487n/a """Return a list of modules that appear to be missing. Use
488n/a any_missing_maybe() if you want to know which modules are
489n/a certain to be missing, and which *may* be missing.
490n/a """
491n/a missing, maybe = self.any_missing_maybe()
492n/a return missing + maybe
493n/a
494n/a def any_missing_maybe(self):
495n/a """Return two lists, one with modules that are certainly missing
496n/a and one with modules that *may* be missing. The latter names could
497n/a either be submodules *or* just global names in the package.
498n/a
499n/a The reason it can't always be determined is that it's impossible to
500n/a tell which names are imported when "from module import *" is done
501n/a with an extension module, short of actually importing it.
502n/a """
503n/a missing = []
504n/a maybe = []
505n/a for name in self.badmodules:
506n/a if name in self.excludes:
507n/a continue
508n/a i = name.rfind(".")
509n/a if i < 0:
510n/a missing.append(name)
511n/a continue
512n/a subname = name[i+1:]
513n/a pkgname = name[:i]
514n/a pkg = self.modules.get(pkgname)
515n/a if pkg is not None:
516n/a if pkgname in self.badmodules[name]:
517n/a # The package tried to import this module itself and
518n/a # failed. It's definitely missing.
519n/a missing.append(name)
520n/a elif subname in pkg.globalnames:
521n/a # It's a global in the package: definitely not missing.
522n/a pass
523n/a elif pkg.starimports:
524n/a # It could be missing, but the package did an "import *"
525n/a # from a non-Python module, so we simply can't be sure.
526n/a maybe.append(name)
527n/a else:
528n/a # It's not a global in the package, the package didn't
529n/a # do funny star imports, it's very likely to be missing.
530n/a # The symbol could be inserted into the package from the
531n/a # outside, but since that's not good style we simply list
532n/a # it missing.
533n/a missing.append(name)
534n/a else:
535n/a missing.append(name)
536n/a missing.sort()
537n/a maybe.sort()
538n/a return missing, maybe
539n/a
540n/a def replace_paths_in_code(self, co):
541n/a new_filename = original_filename = os.path.normpath(co.co_filename)
542n/a for f, r in self.replace_paths:
543n/a if original_filename.startswith(f):
544n/a new_filename = r + original_filename[len(f):]
545n/a break
546n/a
547n/a if self.debug and original_filename not in self.processed_paths:
548n/a if new_filename != original_filename:
549n/a self.msgout(2, "co_filename %r changed to %r" \
550n/a % (original_filename,new_filename,))
551n/a else:
552n/a self.msgout(2, "co_filename %r remains unchanged" \
553n/a % (original_filename,))
554n/a self.processed_paths.append(original_filename)
555n/a
556n/a consts = list(co.co_consts)
557n/a for i in range(len(consts)):
558n/a if isinstance(consts[i], type(co)):
559n/a consts[i] = self.replace_paths_in_code(consts[i])
560n/a
561n/a return types.CodeType(co.co_argcount, co.co_kwonlyargcount,
562n/a co.co_nlocals, co.co_stacksize, co.co_flags,
563n/a co.co_code, tuple(consts), co.co_names,
564n/a co.co_varnames, new_filename, co.co_name,
565n/a co.co_firstlineno, co.co_lnotab, co.co_freevars,
566n/a co.co_cellvars)
567n/a
568n/a
569n/adef test():
570n/a # Parse command line
571n/a import getopt
572n/a try:
573n/a opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
574n/a except getopt.error as msg:
575n/a print(msg)
576n/a return
577n/a
578n/a # Process options
579n/a debug = 1
580n/a domods = 0
581n/a addpath = []
582n/a exclude = []
583n/a for o, a in opts:
584n/a if o == '-d':
585n/a debug = debug + 1
586n/a if o == '-m':
587n/a domods = 1
588n/a if o == '-p':
589n/a addpath = addpath + a.split(os.pathsep)
590n/a if o == '-q':
591n/a debug = 0
592n/a if o == '-x':
593n/a exclude.append(a)
594n/a
595n/a # Provide default arguments
596n/a if not args:
597n/a script = "hello.py"
598n/a else:
599n/a script = args[0]
600n/a
601n/a # Set the path based on sys.path and the script directory
602n/a path = sys.path[:]
603n/a path[0] = os.path.dirname(script)
604n/a path = addpath + path
605n/a if debug > 1:
606n/a print("path:")
607n/a for item in path:
608n/a print(" ", repr(item))
609n/a
610n/a # Create the module finder and turn its crank
611n/a mf = ModuleFinder(path, debug, exclude)
612n/a for arg in args[1:]:
613n/a if arg == '-m':
614n/a domods = 1
615n/a continue
616n/a if domods:
617n/a if arg[-2:] == '.*':
618n/a mf.import_hook(arg[:-2], None, ["*"])
619n/a else:
620n/a mf.import_hook(arg)
621n/a else:
622n/a mf.load_file(arg)
623n/a mf.run_script(script)
624n/a mf.report()
625n/a return mf # for -i debugging
626n/a
627n/a
628n/aif __name__ == '__main__':
629n/a try:
630n/a mf = test()
631n/a except KeyboardInterrupt:
632n/a print("\n[interrupted]")