ยปCore Development>Code coverage>Lib/idlelib/PyParse.py

Python code coverage for Lib/idlelib/PyParse.py

#countcontent
1n/aimport re
2n/aimport sys
3n/a
4n/a# Reason last stmt is continued (or C_NONE if it's not).
5n/a(C_NONE, C_BACKSLASH, C_STRING_FIRST_LINE,
6n/a C_STRING_NEXT_LINES, C_BRACKET) = range(5)
7n/a
8n/aif 0: # for throwaway debugging output
9n/a def dump(*stuff):
10n/a sys.__stdout__.write(" ".join(map(str, stuff)) + "\n")
11n/a
12n/a# Find what looks like the start of a popular stmt.
13n/a
14n/a_synchre = re.compile(r"""
15n/a ^
16n/a [ \t]*
17n/a (?: while
18n/a | else
19n/a | def
20n/a | return
21n/a | assert
22n/a | break
23n/a | class
24n/a | continue
25n/a | elif
26n/a | try
27n/a | except
28n/a | raise
29n/a | import
30n/a | yield
31n/a )
32n/a \b
33n/a""", re.VERBOSE | re.MULTILINE).search
34n/a
35n/a# Match blank line or non-indenting comment line.
36n/a
37n/a_junkre = re.compile(r"""
38n/a [ \t]*
39n/a (?: \# \S .* )?
40n/a \n
41n/a""", re.VERBOSE).match
42n/a
43n/a# Match any flavor of string; the terminating quote is optional
44n/a# so that we're robust in the face of incomplete program text.
45n/a
46n/a_match_stringre = re.compile(r"""
47n/a \""" [^"\\]* (?:
48n/a (?: \\. | "(?!"") )
49n/a [^"\\]*
50n/a )*
51n/a (?: \""" )?
52n/a
53n/a| " [^"\\\n]* (?: \\. [^"\\\n]* )* "?
54n/a
55n/a| ''' [^'\\]* (?:
56n/a (?: \\. | '(?!'') )
57n/a [^'\\]*
58n/a )*
59n/a (?: ''' )?
60n/a
61n/a| ' [^'\\\n]* (?: \\. [^'\\\n]* )* '?
62n/a""", re.VERBOSE | re.DOTALL).match
63n/a
64n/a# Match a line that starts with something interesting;
65n/a# used to find the first item of a bracket structure.
66n/a
67n/a_itemre = re.compile(r"""
68n/a [ \t]*
69n/a [^\s#\\] # if we match, m.end()-1 is the interesting char
70n/a""", re.VERBOSE).match
71n/a
72n/a# Match start of stmts that should be followed by a dedent.
73n/a
74n/a_closere = re.compile(r"""
75n/a \s*
76n/a (?: return
77n/a | break
78n/a | continue
79n/a | raise
80n/a | pass
81n/a )
82n/a \b
83n/a""", re.VERBOSE).match
84n/a
85n/a# Chew up non-special chars as quickly as possible. If match is
86n/a# successful, m.end() less 1 is the index of the last boring char
87n/a# matched. If match is unsuccessful, the string starts with an
88n/a# interesting char.
89n/a
90n/a_chew_ordinaryre = re.compile(r"""
91n/a [^[\](){}#'"\\]+
92n/a""", re.VERBOSE).match
93n/a
94n/a# Build translation table to map uninteresting chars to "x", open
95n/a# brackets to "(", and close brackets to ")".
96n/a
97n/a_tran = {}
98n/afor i in range(256):
99n/a _tran[i] = 'x'
100n/afor ch in "({[":
101n/a _tran[ord(ch)] = '('
102n/afor ch in ")}]":
103n/a _tran[ord(ch)] = ')'
104n/afor ch in "\"'\\\n#":
105n/a _tran[ord(ch)] = ch
106n/adel i, ch
107n/a
108n/aclass Parser:
109n/a
110n/a def __init__(self, indentwidth, tabwidth):
111n/a self.indentwidth = indentwidth
112n/a self.tabwidth = tabwidth
113n/a
114n/a def set_str(self, s):
115n/a assert len(s) == 0 or s[-1] == '\n'
116n/a if isinstance(s, str):
117n/a # The parse functions have no idea what to do with Unicode, so
118n/a # replace all Unicode characters with "x". This is "safe"
119n/a # so long as the only characters germane to parsing the structure
120n/a # of Python are 7-bit ASCII. It's *necessary* because Unicode
121n/a # strings don't have a .translate() method that supports
122n/a # deletechars.
123n/a uniphooey = s
124n/a s = []
125n/a push = s.append
126n/a for raw in map(ord, uniphooey):
127n/a push(raw < 127 and chr(raw) or "x")
128n/a s = "".join(s)
129n/a self.str = s
130n/a self.study_level = 0
131n/a
132n/a # Return index of a good place to begin parsing, as close to the
133n/a # end of the string as possible. This will be the start of some
134n/a # popular stmt like "if" or "def". Return None if none found:
135n/a # the caller should pass more prior context then, if possible, or
136n/a # if not (the entire program text up until the point of interest
137n/a # has already been tried) pass 0 to set_lo.
138n/a #
139n/a # This will be reliable iff given a reliable is_char_in_string
140n/a # function, meaning that when it says "no", it's absolutely
141n/a # guaranteed that the char is not in a string.
142n/a
143n/a def find_good_parse_start(self, is_char_in_string=None,
144n/a _synchre=_synchre):
145n/a str, pos = self.str, None
146n/a
147n/a if not is_char_in_string:
148n/a # no clue -- make the caller pass everything
149n/a return None
150n/a
151n/a # Peek back from the end for a good place to start,
152n/a # but don't try too often; pos will be left None, or
153n/a # bumped to a legitimate synch point.
154n/a limit = len(str)
155n/a for tries in range(5):
156n/a i = str.rfind(":\n", 0, limit)
157n/a if i < 0:
158n/a break
159n/a i = str.rfind('\n', 0, i) + 1 # start of colon line
160n/a m = _synchre(str, i, limit)
161n/a if m and not is_char_in_string(m.start()):
162n/a pos = m.start()
163n/a break
164n/a limit = i
165n/a if pos is None:
166n/a # Nothing looks like a block-opener, or stuff does
167n/a # but is_char_in_string keeps returning true; most likely
168n/a # we're in or near a giant string, the colorizer hasn't
169n/a # caught up enough to be helpful, or there simply *aren't*
170n/a # any interesting stmts. In any of these cases we're
171n/a # going to have to parse the whole thing to be sure, so
172n/a # give it one last try from the start, but stop wasting
173n/a # time here regardless of the outcome.
174n/a m = _synchre(str)
175n/a if m and not is_char_in_string(m.start()):
176n/a pos = m.start()
177n/a return pos
178n/a
179n/a # Peeking back worked; look forward until _synchre no longer
180n/a # matches.
181n/a i = pos + 1
182n/a while 1:
183n/a m = _synchre(str, i)
184n/a if m:
185n/a s, i = m.span()
186n/a if not is_char_in_string(s):
187n/a pos = s
188n/a else:
189n/a break
190n/a return pos
191n/a
192n/a # Throw away the start of the string. Intended to be called with
193n/a # find_good_parse_start's result.
194n/a
195n/a def set_lo(self, lo):
196n/a assert lo == 0 or self.str[lo-1] == '\n'
197n/a if lo > 0:
198n/a self.str = self.str[lo:]
199n/a
200n/a # As quickly as humanly possible <wink>, find the line numbers (0-
201n/a # based) of the non-continuation lines.
202n/a # Creates self.{goodlines, continuation}.
203n/a
204n/a def _study1(self):
205n/a if self.study_level >= 1:
206n/a return
207n/a self.study_level = 1
208n/a
209n/a # Map all uninteresting characters to "x", all open brackets
210n/a # to "(", all close brackets to ")", then collapse runs of
211n/a # uninteresting characters. This can cut the number of chars
212n/a # by a factor of 10-40, and so greatly speed the following loop.
213n/a str = self.str
214n/a str = str.translate(_tran)
215n/a str = str.replace('xxxxxxxx', 'x')
216n/a str = str.replace('xxxx', 'x')
217n/a str = str.replace('xx', 'x')
218n/a str = str.replace('xx', 'x')
219n/a str = str.replace('\nx', '\n')
220n/a # note that replacing x\n with \n would be incorrect, because
221n/a # x may be preceded by a backslash
222n/a
223n/a # March over the squashed version of the program, accumulating
224n/a # the line numbers of non-continued stmts, and determining
225n/a # whether & why the last stmt is a continuation.
226n/a continuation = C_NONE
227n/a level = lno = 0 # level is nesting level; lno is line number
228n/a self.goodlines = goodlines = [0]
229n/a push_good = goodlines.append
230n/a i, n = 0, len(str)
231n/a while i < n:
232n/a ch = str[i]
233n/a i = i+1
234n/a
235n/a # cases are checked in decreasing order of frequency
236n/a if ch == 'x':
237n/a continue
238n/a
239n/a if ch == '\n':
240n/a lno = lno + 1
241n/a if level == 0:
242n/a push_good(lno)
243n/a # else we're in an unclosed bracket structure
244n/a continue
245n/a
246n/a if ch == '(':
247n/a level = level + 1
248n/a continue
249n/a
250n/a if ch == ')':
251n/a if level:
252n/a level = level - 1
253n/a # else the program is invalid, but we can't complain
254n/a continue
255n/a
256n/a if ch == '"' or ch == "'":
257n/a # consume the string
258n/a quote = ch
259n/a if str[i-1:i+2] == quote * 3:
260n/a quote = quote * 3
261n/a firstlno = lno
262n/a w = len(quote) - 1
263n/a i = i+w
264n/a while i < n:
265n/a ch = str[i]
266n/a i = i+1
267n/a
268n/a if ch == 'x':
269n/a continue
270n/a
271n/a if str[i-1:i+w] == quote:
272n/a i = i+w
273n/a break
274n/a
275n/a if ch == '\n':
276n/a lno = lno + 1
277n/a if w == 0:
278n/a # unterminated single-quoted string
279n/a if level == 0:
280n/a push_good(lno)
281n/a break
282n/a continue
283n/a
284n/a if ch == '\\':
285n/a assert i < n
286n/a if str[i] == '\n':
287n/a lno = lno + 1
288n/a i = i+1
289n/a continue
290n/a
291n/a # else comment char or paren inside string
292n/a
293n/a else:
294n/a # didn't break out of the loop, so we're still
295n/a # inside a string
296n/a if (lno - 1) == firstlno:
297n/a # before the previous \n in str, we were in the first
298n/a # line of the string
299n/a continuation = C_STRING_FIRST_LINE
300n/a else:
301n/a continuation = C_STRING_NEXT_LINES
302n/a continue # with outer loop
303n/a
304n/a if ch == '#':
305n/a # consume the comment
306n/a i = str.find('\n', i)
307n/a assert i >= 0
308n/a continue
309n/a
310n/a assert ch == '\\'
311n/a assert i < n
312n/a if str[i] == '\n':
313n/a lno = lno + 1
314n/a if i+1 == n:
315n/a continuation = C_BACKSLASH
316n/a i = i+1
317n/a
318n/a # The last stmt may be continued for all 3 reasons.
319n/a # String continuation takes precedence over bracket
320n/a # continuation, which beats backslash continuation.
321n/a if (continuation != C_STRING_FIRST_LINE
322n/a and continuation != C_STRING_NEXT_LINES and level > 0):
323n/a continuation = C_BRACKET
324n/a self.continuation = continuation
325n/a
326n/a # Push the final line number as a sentinel value, regardless of
327n/a # whether it's continued.
328n/a assert (continuation == C_NONE) == (goodlines[-1] == lno)
329n/a if goodlines[-1] != lno:
330n/a push_good(lno)
331n/a
332n/a def get_continuation_type(self):
333n/a self._study1()
334n/a return self.continuation
335n/a
336n/a # study1 was sufficient to determine the continuation status,
337n/a # but doing more requires looking at every character. study2
338n/a # does this for the last interesting statement in the block.
339n/a # Creates:
340n/a # self.stmt_start, stmt_end
341n/a # slice indices of last interesting stmt
342n/a # self.stmt_bracketing
343n/a # the bracketing structure of the last interesting stmt;
344n/a # for example, for the statement "say(boo) or die", stmt_bracketing
345n/a # will be [(0, 0), (3, 1), (8, 0)]. Strings and comments are
346n/a # treated as brackets, for the matter.
347n/a # self.lastch
348n/a # last non-whitespace character before optional trailing
349n/a # comment
350n/a # self.lastopenbracketpos
351n/a # if continuation is C_BRACKET, index of last open bracket
352n/a
353n/a def _study2(self):
354n/a if self.study_level >= 2:
355n/a return
356n/a self._study1()
357n/a self.study_level = 2
358n/a
359n/a # Set p and q to slice indices of last interesting stmt.
360n/a str, goodlines = self.str, self.goodlines
361n/a i = len(goodlines) - 1
362n/a p = len(str) # index of newest line
363n/a while i:
364n/a assert p
365n/a # p is the index of the stmt at line number goodlines[i].
366n/a # Move p back to the stmt at line number goodlines[i-1].
367n/a q = p
368n/a for nothing in range(goodlines[i-1], goodlines[i]):
369n/a # tricky: sets p to 0 if no preceding newline
370n/a p = str.rfind('\n', 0, p-1) + 1
371n/a # The stmt str[p:q] isn't a continuation, but may be blank
372n/a # or a non-indenting comment line.
373n/a if _junkre(str, p):
374n/a i = i-1
375n/a else:
376n/a break
377n/a if i == 0:
378n/a # nothing but junk!
379n/a assert p == 0
380n/a q = p
381n/a self.stmt_start, self.stmt_end = p, q
382n/a
383n/a # Analyze this stmt, to find the last open bracket (if any)
384n/a # and last interesting character (if any).
385n/a lastch = ""
386n/a stack = [] # stack of open bracket indices
387n/a push_stack = stack.append
388n/a bracketing = [(p, 0)]
389n/a while p < q:
390n/a # suck up all except ()[]{}'"#\\
391n/a m = _chew_ordinaryre(str, p, q)
392n/a if m:
393n/a # we skipped at least one boring char
394n/a newp = m.end()
395n/a # back up over totally boring whitespace
396n/a i = newp - 1 # index of last boring char
397n/a while i >= p and str[i] in " \t\n":
398n/a i = i-1
399n/a if i >= p:
400n/a lastch = str[i]
401n/a p = newp
402n/a if p >= q:
403n/a break
404n/a
405n/a ch = str[p]
406n/a
407n/a if ch in "([{":
408n/a push_stack(p)
409n/a bracketing.append((p, len(stack)))
410n/a lastch = ch
411n/a p = p+1
412n/a continue
413n/a
414n/a if ch in ")]}":
415n/a if stack:
416n/a del stack[-1]
417n/a lastch = ch
418n/a p = p+1
419n/a bracketing.append((p, len(stack)))
420n/a continue
421n/a
422n/a if ch == '"' or ch == "'":
423n/a # consume string
424n/a # Note that study1 did this with a Python loop, but
425n/a # we use a regexp here; the reason is speed in both
426n/a # cases; the string may be huge, but study1 pre-squashed
427n/a # strings to a couple of characters per line. study1
428n/a # also needed to keep track of newlines, and we don't
429n/a # have to.
430n/a bracketing.append((p, len(stack)+1))
431n/a lastch = ch
432n/a p = _match_stringre(str, p, q).end()
433n/a bracketing.append((p, len(stack)))
434n/a continue
435n/a
436n/a if ch == '#':
437n/a # consume comment and trailing newline
438n/a bracketing.append((p, len(stack)+1))
439n/a p = str.find('\n', p, q) + 1
440n/a assert p > 0
441n/a bracketing.append((p, len(stack)))
442n/a continue
443n/a
444n/a assert ch == '\\'
445n/a p = p+1 # beyond backslash
446n/a assert p < q
447n/a if str[p] != '\n':
448n/a # the program is invalid, but can't complain
449n/a lastch = ch + str[p]
450n/a p = p+1 # beyond escaped char
451n/a
452n/a # end while p < q:
453n/a
454n/a self.lastch = lastch
455n/a if stack:
456n/a self.lastopenbracketpos = stack[-1]
457n/a self.stmt_bracketing = tuple(bracketing)
458n/a
459n/a # Assuming continuation is C_BRACKET, return the number
460n/a # of spaces the next line should be indented.
461n/a
462n/a def compute_bracket_indent(self):
463n/a self._study2()
464n/a assert self.continuation == C_BRACKET
465n/a j = self.lastopenbracketpos
466n/a str = self.str
467n/a n = len(str)
468n/a origi = i = str.rfind('\n', 0, j) + 1
469n/a j = j+1 # one beyond open bracket
470n/a # find first list item; set i to start of its line
471n/a while j < n:
472n/a m = _itemre(str, j)
473n/a if m:
474n/a j = m.end() - 1 # index of first interesting char
475n/a extra = 0
476n/a break
477n/a else:
478n/a # this line is junk; advance to next line
479n/a i = j = str.find('\n', j) + 1
480n/a else:
481n/a # nothing interesting follows the bracket;
482n/a # reproduce the bracket line's indentation + a level
483n/a j = i = origi
484n/a while str[j] in " \t":
485n/a j = j+1
486n/a extra = self.indentwidth
487n/a return len(str[i:j].expandtabs(self.tabwidth)) + extra
488n/a
489n/a # Return number of physical lines in last stmt (whether or not
490n/a # it's an interesting stmt! this is intended to be called when
491n/a # continuation is C_BACKSLASH).
492n/a
493n/a def get_num_lines_in_stmt(self):
494n/a self._study1()
495n/a goodlines = self.goodlines
496n/a return goodlines[-1] - goodlines[-2]
497n/a
498n/a # Assuming continuation is C_BACKSLASH, return the number of spaces
499n/a # the next line should be indented. Also assuming the new line is
500n/a # the first one following the initial line of the stmt.
501n/a
502n/a def compute_backslash_indent(self):
503n/a self._study2()
504n/a assert self.continuation == C_BACKSLASH
505n/a str = self.str
506n/a i = self.stmt_start
507n/a while str[i] in " \t":
508n/a i = i+1
509n/a startpos = i
510n/a
511n/a # See whether the initial line starts an assignment stmt; i.e.,
512n/a # look for an = operator
513n/a endpos = str.find('\n', startpos) + 1
514n/a found = level = 0
515n/a while i < endpos:
516n/a ch = str[i]
517n/a if ch in "([{":
518n/a level = level + 1
519n/a i = i+1
520n/a elif ch in ")]}":
521n/a if level:
522n/a level = level - 1
523n/a i = i+1
524n/a elif ch == '"' or ch == "'":
525n/a i = _match_stringre(str, i, endpos).end()
526n/a elif ch == '#':
527n/a break
528n/a elif level == 0 and ch == '=' and \
529n/a (i == 0 or str[i-1] not in "=<>!") and \
530n/a str[i+1] != '=':
531n/a found = 1
532n/a break
533n/a else:
534n/a i = i+1
535n/a
536n/a if found:
537n/a # found a legit =, but it may be the last interesting
538n/a # thing on the line
539n/a i = i+1 # move beyond the =
540n/a found = re.match(r"\s*\\", str[i:endpos]) is None
541n/a
542n/a if not found:
543n/a # oh well ... settle for moving beyond the first chunk
544n/a # of non-whitespace chars
545n/a i = startpos
546n/a while str[i] not in " \t\n":
547n/a i = i+1
548n/a
549n/a return len(str[self.stmt_start:i].expandtabs(\
550n/a self.tabwidth)) + 1
551n/a
552n/a # Return the leading whitespace on the initial line of the last
553n/a # interesting stmt.
554n/a
555n/a def get_base_indent_string(self):
556n/a self._study2()
557n/a i, n = self.stmt_start, self.stmt_end
558n/a j = i
559n/a str = self.str
560n/a while j < n and str[j] in " \t":
561n/a j = j + 1
562n/a return str[i:j]
563n/a
564n/a # Did the last interesting stmt open a block?
565n/a
566n/a def is_block_opener(self):
567n/a self._study2()
568n/a return self.lastch == ':'
569n/a
570n/a # Did the last interesting stmt close a block?
571n/a
572n/a def is_block_closer(self):
573n/a self._study2()
574n/a return _closere(self.str, self.stmt_start) is not None
575n/a
576n/a # index of last open bracket ({[, or None if none
577n/a lastopenbracketpos = None
578n/a
579n/a def get_last_open_bracket_pos(self):
580n/a self._study2()
581n/a return self.lastopenbracketpos
582n/a
583n/a # the structure of the bracketing of the last interesting statement,
584n/a # in the format defined in _study2, or None if the text didn't contain
585n/a # anything
586n/a stmt_bracketing = None
587n/a
588n/a def get_last_stmt_bracketing(self):
589n/a self._study2()
590n/a return self.stmt_bracketing