| 1 | n/a | #!/usr/bin/env python3 |
|---|
| 2 | n/a | |
|---|
| 3 | n/a | """\ |
|---|
| 4 | n/a | List python source files. |
|---|
| 5 | n/a | |
|---|
| 6 | n/a | There are three functions to check whether a file is a Python source, listed |
|---|
| 7 | n/a | here with increasing complexity: |
|---|
| 8 | n/a | |
|---|
| 9 | n/a | - has_python_ext() checks whether a file name ends in '.py[w]'. |
|---|
| 10 | n/a | - look_like_python() checks whether the file is not binary and either has |
|---|
| 11 | n/a | the '.py[w]' extension or the first line contains the word 'python'. |
|---|
| 12 | n/a | - can_be_compiled() checks whether the file can be compiled by compile(). |
|---|
| 13 | n/a | |
|---|
| 14 | n/a | The file also must be of appropriate size - not bigger than a megabyte. |
|---|
| 15 | n/a | |
|---|
| 16 | n/a | walk_python_files() recursively lists all Python files under the given directories. |
|---|
| 17 | n/a | """ |
|---|
| 18 | n/a | __author__ = "Oleg Broytmann, Georg Brandl" |
|---|
| 19 | n/a | |
|---|
| 20 | n/a | __all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"] |
|---|
| 21 | n/a | |
|---|
| 22 | n/a | |
|---|
| 23 | n/a | import os, re |
|---|
| 24 | n/a | |
|---|
| 25 | n/a | binary_re = re.compile(br'[\x00-\x08\x0E-\x1F\x7F]') |
|---|
| 26 | n/a | |
|---|
| 27 | n/a | debug = False |
|---|
| 28 | n/a | |
|---|
| 29 | n/a | def print_debug(msg): |
|---|
| 30 | n/a | if debug: print(msg) |
|---|
| 31 | n/a | |
|---|
| 32 | n/a | |
|---|
| 33 | n/a | def _open(fullpath): |
|---|
| 34 | n/a | try: |
|---|
| 35 | n/a | size = os.stat(fullpath).st_size |
|---|
| 36 | n/a | except OSError as err: # Permission denied - ignore the file |
|---|
| 37 | n/a | print_debug("%s: permission denied: %s" % (fullpath, err)) |
|---|
| 38 | n/a | return None |
|---|
| 39 | n/a | |
|---|
| 40 | n/a | if size > 1024*1024: # too big |
|---|
| 41 | n/a | print_debug("%s: the file is too big: %d bytes" % (fullpath, size)) |
|---|
| 42 | n/a | return None |
|---|
| 43 | n/a | |
|---|
| 44 | n/a | try: |
|---|
| 45 | n/a | return open(fullpath, "rb") |
|---|
| 46 | n/a | except IOError as err: # Access denied, or a special file - ignore it |
|---|
| 47 | n/a | print_debug("%s: access denied: %s" % (fullpath, err)) |
|---|
| 48 | n/a | return None |
|---|
| 49 | n/a | |
|---|
| 50 | n/a | def has_python_ext(fullpath): |
|---|
| 51 | n/a | return fullpath.endswith(".py") or fullpath.endswith(".pyw") |
|---|
| 52 | n/a | |
|---|
| 53 | n/a | def looks_like_python(fullpath): |
|---|
| 54 | n/a | infile = _open(fullpath) |
|---|
| 55 | n/a | if infile is None: |
|---|
| 56 | n/a | return False |
|---|
| 57 | n/a | |
|---|
| 58 | n/a | with infile: |
|---|
| 59 | n/a | line = infile.readline() |
|---|
| 60 | n/a | |
|---|
| 61 | n/a | if binary_re.search(line): |
|---|
| 62 | n/a | # file appears to be binary |
|---|
| 63 | n/a | print_debug("%s: appears to be binary" % fullpath) |
|---|
| 64 | n/a | return False |
|---|
| 65 | n/a | |
|---|
| 66 | n/a | if fullpath.endswith(".py") or fullpath.endswith(".pyw"): |
|---|
| 67 | n/a | return True |
|---|
| 68 | n/a | elif b"python" in line: |
|---|
| 69 | n/a | # disguised Python script (e.g. CGI) |
|---|
| 70 | n/a | return True |
|---|
| 71 | n/a | |
|---|
| 72 | n/a | return False |
|---|
| 73 | n/a | |
|---|
| 74 | n/a | def can_be_compiled(fullpath): |
|---|
| 75 | n/a | infile = _open(fullpath) |
|---|
| 76 | n/a | if infile is None: |
|---|
| 77 | n/a | return False |
|---|
| 78 | n/a | |
|---|
| 79 | n/a | with infile: |
|---|
| 80 | n/a | code = infile.read() |
|---|
| 81 | n/a | |
|---|
| 82 | n/a | try: |
|---|
| 83 | n/a | compile(code, fullpath, "exec") |
|---|
| 84 | n/a | except Exception as err: |
|---|
| 85 | n/a | print_debug("%s: cannot compile: %s" % (fullpath, err)) |
|---|
| 86 | n/a | return False |
|---|
| 87 | n/a | |
|---|
| 88 | n/a | return True |
|---|
| 89 | n/a | |
|---|
| 90 | n/a | |
|---|
| 91 | n/a | def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None): |
|---|
| 92 | n/a | """\ |
|---|
| 93 | n/a | Recursively yield all Python source files below the given paths. |
|---|
| 94 | n/a | |
|---|
| 95 | n/a | paths: a list of files and/or directories to be checked. |
|---|
| 96 | n/a | is_python: a function that takes a file name and checks whether it is a |
|---|
| 97 | n/a | Python source file |
|---|
| 98 | n/a | exclude_dirs: a list of directory base names that should be excluded in |
|---|
| 99 | n/a | the search |
|---|
| 100 | n/a | """ |
|---|
| 101 | n/a | if exclude_dirs is None: |
|---|
| 102 | n/a | exclude_dirs=[] |
|---|
| 103 | n/a | |
|---|
| 104 | n/a | for path in paths: |
|---|
| 105 | n/a | print_debug("testing: %s" % path) |
|---|
| 106 | n/a | if os.path.isfile(path): |
|---|
| 107 | n/a | if is_python(path): |
|---|
| 108 | n/a | yield path |
|---|
| 109 | n/a | elif os.path.isdir(path): |
|---|
| 110 | n/a | print_debug(" it is a directory") |
|---|
| 111 | n/a | for dirpath, dirnames, filenames in os.walk(path): |
|---|
| 112 | n/a | for exclude in exclude_dirs: |
|---|
| 113 | n/a | if exclude in dirnames: |
|---|
| 114 | n/a | dirnames.remove(exclude) |
|---|
| 115 | n/a | for filename in filenames: |
|---|
| 116 | n/a | fullpath = os.path.join(dirpath, filename) |
|---|
| 117 | n/a | print_debug("testing: %s" % fullpath) |
|---|
| 118 | n/a | if is_python(fullpath): |
|---|
| 119 | n/a | yield fullpath |
|---|
| 120 | n/a | else: |
|---|
| 121 | n/a | print_debug(" unknown type") |
|---|
| 122 | n/a | |
|---|
| 123 | n/a | |
|---|
| 124 | n/a | if __name__ == "__main__": |
|---|
| 125 | n/a | # Two simple examples/tests |
|---|
| 126 | n/a | for fullpath in walk_python_files(['.']): |
|---|
| 127 | n/a | print(fullpath) |
|---|
| 128 | n/a | print("----------") |
|---|
| 129 | n/a | for fullpath in walk_python_files(['.'], is_python=can_be_compiled): |
|---|
| 130 | n/a | print(fullpath) |
|---|