ยปCore Development>Code coverage>Lib/tabnanny.py

Python code coverage for Lib/tabnanny.py

#countcontent
1n/a#! /usr/bin/env python3
2n/a
3n/a"""The Tab Nanny despises ambiguous indentation. She knows no mercy.
4n/a
5n/atabnanny -- Detection of ambiguous indentation
6n/a
7n/aFor the time being this module is intended to be called as a script.
8n/aHowever it is possible to import it into an IDE and use the function
9n/acheck() described below.
10n/a
11n/aWarning: The API provided by this module is likely to change in future
12n/areleases; such changes may not be backward compatible.
13n/a"""
14n/a
15n/a# Released to the public domain, by Tim Peters, 15 April 1998.
16n/a
17n/a# XXX Note: this is now a standard library module.
18n/a# XXX The API needs to undergo changes however; the current code is too
19n/a# XXX script-like. This will be addressed later.
20n/a
21n/a__version__ = "6"
22n/a
23n/aimport os
24n/aimport sys
25n/aimport getopt
26n/aimport tokenize
27n/aif not hasattr(tokenize, 'NL'):
28n/a raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
29n/a
30n/a__all__ = ["check", "NannyNag", "process_tokens"]
31n/a
32n/averbose = 0
33n/afilename_only = 0
34n/a
35n/adef errprint(*args):
36n/a sep = ""
37n/a for arg in args:
38n/a sys.stderr.write(sep + str(arg))
39n/a sep = " "
40n/a sys.stderr.write("\n")
41n/a
42n/adef main():
43n/a global verbose, filename_only
44n/a try:
45n/a opts, args = getopt.getopt(sys.argv[1:], "qv")
46n/a except getopt.error as msg:
47n/a errprint(msg)
48n/a return
49n/a for o, a in opts:
50n/a if o == '-q':
51n/a filename_only = filename_only + 1
52n/a if o == '-v':
53n/a verbose = verbose + 1
54n/a if not args:
55n/a errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
56n/a return
57n/a for arg in args:
58n/a check(arg)
59n/a
60n/aclass NannyNag(Exception):
61n/a """
62n/a Raised by tokeneater() if detecting an ambiguous indent.
63n/a Captured and handled in check().
64n/a """
65n/a def __init__(self, lineno, msg, line):
66n/a self.lineno, self.msg, self.line = lineno, msg, line
67n/a def get_lineno(self):
68n/a return self.lineno
69n/a def get_msg(self):
70n/a return self.msg
71n/a def get_line(self):
72n/a return self.line
73n/a
74n/adef check(file):
75n/a """check(file_or_dir)
76n/a
77n/a If file_or_dir is a directory and not a symbolic link, then recursively
78n/a descend the directory tree named by file_or_dir, checking all .py files
79n/a along the way. If file_or_dir is an ordinary Python source file, it is
80n/a checked for whitespace related problems. The diagnostic messages are
81n/a written to standard output using the print statement.
82n/a """
83n/a
84n/a if os.path.isdir(file) and not os.path.islink(file):
85n/a if verbose:
86n/a print("%r: listing directory" % (file,))
87n/a names = os.listdir(file)
88n/a for name in names:
89n/a fullname = os.path.join(file, name)
90n/a if (os.path.isdir(fullname) and
91n/a not os.path.islink(fullname) or
92n/a os.path.normcase(name[-3:]) == ".py"):
93n/a check(fullname)
94n/a return
95n/a
96n/a try:
97n/a f = tokenize.open(file)
98n/a except OSError as msg:
99n/a errprint("%r: I/O Error: %s" % (file, msg))
100n/a return
101n/a
102n/a if verbose > 1:
103n/a print("checking %r ..." % file)
104n/a
105n/a try:
106n/a process_tokens(tokenize.generate_tokens(f.readline))
107n/a
108n/a except tokenize.TokenError as msg:
109n/a errprint("%r: Token Error: %s" % (file, msg))
110n/a return
111n/a
112n/a except IndentationError as msg:
113n/a errprint("%r: Indentation Error: %s" % (file, msg))
114n/a return
115n/a
116n/a except NannyNag as nag:
117n/a badline = nag.get_lineno()
118n/a line = nag.get_line()
119n/a if verbose:
120n/a print("%r: *** Line %d: trouble in tab city! ***" % (file, badline))
121n/a print("offending line: %r" % (line,))
122n/a print(nag.get_msg())
123n/a else:
124n/a if ' ' in file: file = '"' + file + '"'
125n/a if filename_only: print(file)
126n/a else: print(file, badline, repr(line))
127n/a return
128n/a
129n/a finally:
130n/a f.close()
131n/a
132n/a if verbose:
133n/a print("%r: Clean bill of health." % (file,))
134n/a
135n/aclass Whitespace:
136n/a # the characters used for space and tab
137n/a S, T = ' \t'
138n/a
139n/a # members:
140n/a # raw
141n/a # the original string
142n/a # n
143n/a # the number of leading whitespace characters in raw
144n/a # nt
145n/a # the number of tabs in raw[:n]
146n/a # norm
147n/a # the normal form as a pair (count, trailing), where:
148n/a # count
149n/a # a tuple such that raw[:n] contains count[i]
150n/a # instances of S * i + T
151n/a # trailing
152n/a # the number of trailing spaces in raw[:n]
153n/a # It's A Theorem that m.indent_level(t) ==
154n/a # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
155n/a # is_simple
156n/a # true iff raw[:n] is of the form (T*)(S*)
157n/a
158n/a def __init__(self, ws):
159n/a self.raw = ws
160n/a S, T = Whitespace.S, Whitespace.T
161n/a count = []
162n/a b = n = nt = 0
163n/a for ch in self.raw:
164n/a if ch == S:
165n/a n = n + 1
166n/a b = b + 1
167n/a elif ch == T:
168n/a n = n + 1
169n/a nt = nt + 1
170n/a if b >= len(count):
171n/a count = count + [0] * (b - len(count) + 1)
172n/a count[b] = count[b] + 1
173n/a b = 0
174n/a else:
175n/a break
176n/a self.n = n
177n/a self.nt = nt
178n/a self.norm = tuple(count), b
179n/a self.is_simple = len(count) <= 1
180n/a
181n/a # return length of longest contiguous run of spaces (whether or not
182n/a # preceding a tab)
183n/a def longest_run_of_spaces(self):
184n/a count, trailing = self.norm
185n/a return max(len(count)-1, trailing)
186n/a
187n/a def indent_level(self, tabsize):
188n/a # count, il = self.norm
189n/a # for i in range(len(count)):
190n/a # if count[i]:
191n/a # il = il + (i//tabsize + 1)*tabsize * count[i]
192n/a # return il
193n/a
194n/a # quicker:
195n/a # il = trailing + sum (i//ts + 1)*ts*count[i] =
196n/a # trailing + ts * sum (i//ts + 1)*count[i] =
197n/a # trailing + ts * sum i//ts*count[i] + count[i] =
198n/a # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] =
199n/a # trailing + ts * [(sum i//ts*count[i]) + num_tabs]
200n/a # and note that i//ts*count[i] is 0 when i < ts
201n/a
202n/a count, trailing = self.norm
203n/a il = 0
204n/a for i in range(tabsize, len(count)):
205n/a il = il + i//tabsize * count[i]
206n/a return trailing + tabsize * (il + self.nt)
207n/a
208n/a # return true iff self.indent_level(t) == other.indent_level(t)
209n/a # for all t >= 1
210n/a def equal(self, other):
211n/a return self.norm == other.norm
212n/a
213n/a # return a list of tuples (ts, i1, i2) such that
214n/a # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
215n/a # Intended to be used after not self.equal(other) is known, in which
216n/a # case it will return at least one witnessing tab size.
217n/a def not_equal_witness(self, other):
218n/a n = max(self.longest_run_of_spaces(),
219n/a other.longest_run_of_spaces()) + 1
220n/a a = []
221n/a for ts in range(1, n+1):
222n/a if self.indent_level(ts) != other.indent_level(ts):
223n/a a.append( (ts,
224n/a self.indent_level(ts),
225n/a other.indent_level(ts)) )
226n/a return a
227n/a
228n/a # Return True iff self.indent_level(t) < other.indent_level(t)
229n/a # for all t >= 1.
230n/a # The algorithm is due to Vincent Broman.
231n/a # Easy to prove it's correct.
232n/a # XXXpost that.
233n/a # Trivial to prove n is sharp (consider T vs ST).
234n/a # Unknown whether there's a faster general way. I suspected so at
235n/a # first, but no longer.
236n/a # For the special (but common!) case where M and N are both of the
237n/a # form (T*)(S*), M.less(N) iff M.len() < N.len() and
238n/a # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
239n/a # XXXwrite that up.
240n/a # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
241n/a def less(self, other):
242n/a if self.n >= other.n:
243n/a return False
244n/a if self.is_simple and other.is_simple:
245n/a return self.nt <= other.nt
246n/a n = max(self.longest_run_of_spaces(),
247n/a other.longest_run_of_spaces()) + 1
248n/a # the self.n >= other.n test already did it for ts=1
249n/a for ts in range(2, n+1):
250n/a if self.indent_level(ts) >= other.indent_level(ts):
251n/a return False
252n/a return True
253n/a
254n/a # return a list of tuples (ts, i1, i2) such that
255n/a # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
256n/a # Intended to be used after not self.less(other) is known, in which
257n/a # case it will return at least one witnessing tab size.
258n/a def not_less_witness(self, other):
259n/a n = max(self.longest_run_of_spaces(),
260n/a other.longest_run_of_spaces()) + 1
261n/a a = []
262n/a for ts in range(1, n+1):
263n/a if self.indent_level(ts) >= other.indent_level(ts):
264n/a a.append( (ts,
265n/a self.indent_level(ts),
266n/a other.indent_level(ts)) )
267n/a return a
268n/a
269n/adef format_witnesses(w):
270n/a firsts = (str(tup[0]) for tup in w)
271n/a prefix = "at tab size"
272n/a if len(w) > 1:
273n/a prefix = prefix + "s"
274n/a return prefix + " " + ', '.join(firsts)
275n/a
276n/adef process_tokens(tokens):
277n/a INDENT = tokenize.INDENT
278n/a DEDENT = tokenize.DEDENT
279n/a NEWLINE = tokenize.NEWLINE
280n/a JUNK = tokenize.COMMENT, tokenize.NL
281n/a indents = [Whitespace("")]
282n/a check_equal = 0
283n/a
284n/a for (type, token, start, end, line) in tokens:
285n/a if type == NEWLINE:
286n/a # a program statement, or ENDMARKER, will eventually follow,
287n/a # after some (possibly empty) run of tokens of the form
288n/a # (NL | COMMENT)* (INDENT | DEDENT+)?
289n/a # If an INDENT appears, setting check_equal is wrong, and will
290n/a # be undone when we see the INDENT.
291n/a check_equal = 1
292n/a
293n/a elif type == INDENT:
294n/a check_equal = 0
295n/a thisguy = Whitespace(token)
296n/a if not indents[-1].less(thisguy):
297n/a witness = indents[-1].not_less_witness(thisguy)
298n/a msg = "indent not greater e.g. " + format_witnesses(witness)
299n/a raise NannyNag(start[0], msg, line)
300n/a indents.append(thisguy)
301n/a
302n/a elif type == DEDENT:
303n/a # there's nothing we need to check here! what's important is
304n/a # that when the run of DEDENTs ends, the indentation of the
305n/a # program statement (or ENDMARKER) that triggered the run is
306n/a # equal to what's left at the top of the indents stack
307n/a
308n/a # Ouch! This assert triggers if the last line of the source
309n/a # is indented *and* lacks a newline -- then DEDENTs pop out
310n/a # of thin air.
311n/a # assert check_equal # else no earlier NEWLINE, or an earlier INDENT
312n/a check_equal = 1
313n/a
314n/a del indents[-1]
315n/a
316n/a elif check_equal and type not in JUNK:
317n/a # this is the first "real token" following a NEWLINE, so it
318n/a # must be the first token of the next program statement, or an
319n/a # ENDMARKER; the "line" argument exposes the leading whitespace
320n/a # for this statement; in the case of ENDMARKER, line is an empty
321n/a # string, so will properly match the empty string with which the
322n/a # "indents" stack was seeded
323n/a check_equal = 0
324n/a thisguy = Whitespace(line)
325n/a if not indents[-1].equal(thisguy):
326n/a witness = indents[-1].not_equal_witness(thisguy)
327n/a msg = "indent not equal e.g. " + format_witnesses(witness)
328n/a raise NannyNag(start[0], msg, line)
329n/a
330n/a
331n/aif __name__ == '__main__':
332n/a main()