ยปCore Development>Code coverage>Lib/idlelib/pyparse.py

Python code coverage for Lib/idlelib/pyparse.py

#countcontent
1n/afrom collections import Mapping
2n/aimport re
3n/aimport sys
4n/a
5n/a# Reason last stmt is continued (or C_NONE if it's not).
6n/a(C_NONE, C_BACKSLASH, C_STRING_FIRST_LINE,
7n/a C_STRING_NEXT_LINES, C_BRACKET) = range(5)
8n/a
9n/aif 0: # for throwaway debugging output
10n/a def dump(*stuff):
11n/a sys.__stdout__.write(" ".join(map(str, stuff)) + "\n")
12n/a
13n/a# Find what looks like the start of a popular stmt.
14n/a
15n/a_synchre = re.compile(r"""
16n/a ^
17n/a [ \t]*
18n/a (?: while
19n/a | else
20n/a | def
21n/a | return
22n/a | assert
23n/a | break
24n/a | class
25n/a | continue
26n/a | elif
27n/a | try
28n/a | except
29n/a | raise
30n/a | import
31n/a | yield
32n/a )
33n/a \b
34n/a""", re.VERBOSE | re.MULTILINE).search
35n/a
36n/a# Match blank line or non-indenting comment line.
37n/a
38n/a_junkre = re.compile(r"""
39n/a [ \t]*
40n/a (?: \# \S .* )?
41n/a \n
42n/a""", re.VERBOSE).match
43n/a
44n/a# Match any flavor of string; the terminating quote is optional
45n/a# so that we're robust in the face of incomplete program text.
46n/a
47n/a_match_stringre = re.compile(r"""
48n/a \""" [^"\\]* (?:
49n/a (?: \\. | "(?!"") )
50n/a [^"\\]*
51n/a )*
52n/a (?: \""" )?
53n/a
54n/a| " [^"\\\n]* (?: \\. [^"\\\n]* )* "?
55n/a
56n/a| ''' [^'\\]* (?:
57n/a (?: \\. | '(?!'') )
58n/a [^'\\]*
59n/a )*
60n/a (?: ''' )?
61n/a
62n/a| ' [^'\\\n]* (?: \\. [^'\\\n]* )* '?
63n/a""", re.VERBOSE | re.DOTALL).match
64n/a
65n/a# Match a line that starts with something interesting;
66n/a# used to find the first item of a bracket structure.
67n/a
68n/a_itemre = re.compile(r"""
69n/a [ \t]*
70n/a [^\s#\\] # if we match, m.end()-1 is the interesting char
71n/a""", re.VERBOSE).match
72n/a
73n/a# Match start of stmts that should be followed by a dedent.
74n/a
75n/a_closere = re.compile(r"""
76n/a \s*
77n/a (?: return
78n/a | break
79n/a | continue
80n/a | raise
81n/a | pass
82n/a )
83n/a \b
84n/a""", re.VERBOSE).match
85n/a
86n/a# Chew up non-special chars as quickly as possible. If match is
87n/a# successful, m.end() less 1 is the index of the last boring char
88n/a# matched. If match is unsuccessful, the string starts with an
89n/a# interesting char.
90n/a
91n/a_chew_ordinaryre = re.compile(r"""
92n/a [^[\](){}#'"\\]+
93n/a""", re.VERBOSE).match
94n/a
95n/a
96n/aclass StringTranslatePseudoMapping(Mapping):
97n/a r"""Utility class to be used with str.translate()
98n/a
99n/a This Mapping class wraps a given dict. When a value for a key is
100n/a requested via __getitem__() or get(), the key is looked up in the
101n/a given dict. If found there, the value from the dict is returned.
102n/a Otherwise, the default value given upon initialization is returned.
103n/a
104n/a This allows using str.translate() to make some replacements, and to
105n/a replace all characters for which no replacement was specified with
106n/a a given character instead of leaving them as-is.
107n/a
108n/a For example, to replace everything except whitespace with 'x':
109n/a
110n/a >>> whitespace_chars = ' \t\n\r'
111n/a >>> preserve_dict = {ord(c): ord(c) for c in whitespace_chars}
112n/a >>> mapping = StringTranslatePseudoMapping(preserve_dict, ord('x'))
113n/a >>> text = "a + b\tc\nd"
114n/a >>> text.translate(mapping)
115n/a 'x x x\tx\nx'
116n/a """
117n/a def __init__(self, non_defaults, default_value):
118n/a self._non_defaults = non_defaults
119n/a self._default_value = default_value
120n/a
121n/a def _get(key, _get=non_defaults.get, _default=default_value):
122n/a return _get(key, _default)
123n/a self._get = _get
124n/a
125n/a def __getitem__(self, item):
126n/a return self._get(item)
127n/a
128n/a def __len__(self):
129n/a return len(self._non_defaults)
130n/a
131n/a def __iter__(self):
132n/a return iter(self._non_defaults)
133n/a
134n/a def get(self, key, default=None):
135n/a return self._get(key)
136n/a
137n/a
138n/aclass Parser:
139n/a
140n/a def __init__(self, indentwidth, tabwidth):
141n/a self.indentwidth = indentwidth
142n/a self.tabwidth = tabwidth
143n/a
144n/a def set_str(self, s):
145n/a assert len(s) == 0 or s[-1] == '\n'
146n/a self.str = s
147n/a self.study_level = 0
148n/a
149n/a # Return index of a good place to begin parsing, as close to the
150n/a # end of the string as possible. This will be the start of some
151n/a # popular stmt like "if" or "def". Return None if none found:
152n/a # the caller should pass more prior context then, if possible, or
153n/a # if not (the entire program text up until the point of interest
154n/a # has already been tried) pass 0 to set_lo.
155n/a #
156n/a # This will be reliable iff given a reliable is_char_in_string
157n/a # function, meaning that when it says "no", it's absolutely
158n/a # guaranteed that the char is not in a string.
159n/a
160n/a def find_good_parse_start(self, is_char_in_string=None,
161n/a _synchre=_synchre):
162n/a str, pos = self.str, None
163n/a
164n/a if not is_char_in_string:
165n/a # no clue -- make the caller pass everything
166n/a return None
167n/a
168n/a # Peek back from the end for a good place to start,
169n/a # but don't try too often; pos will be left None, or
170n/a # bumped to a legitimate synch point.
171n/a limit = len(str)
172n/a for tries in range(5):
173n/a i = str.rfind(":\n", 0, limit)
174n/a if i < 0:
175n/a break
176n/a i = str.rfind('\n', 0, i) + 1 # start of colon line
177n/a m = _synchre(str, i, limit)
178n/a if m and not is_char_in_string(m.start()):
179n/a pos = m.start()
180n/a break
181n/a limit = i
182n/a if pos is None:
183n/a # Nothing looks like a block-opener, or stuff does
184n/a # but is_char_in_string keeps returning true; most likely
185n/a # we're in or near a giant string, the colorizer hasn't
186n/a # caught up enough to be helpful, or there simply *aren't*
187n/a # any interesting stmts. In any of these cases we're
188n/a # going to have to parse the whole thing to be sure, so
189n/a # give it one last try from the start, but stop wasting
190n/a # time here regardless of the outcome.
191n/a m = _synchre(str)
192n/a if m and not is_char_in_string(m.start()):
193n/a pos = m.start()
194n/a return pos
195n/a
196n/a # Peeking back worked; look forward until _synchre no longer
197n/a # matches.
198n/a i = pos + 1
199n/a while 1:
200n/a m = _synchre(str, i)
201n/a if m:
202n/a s, i = m.span()
203n/a if not is_char_in_string(s):
204n/a pos = s
205n/a else:
206n/a break
207n/a return pos
208n/a
209n/a # Throw away the start of the string. Intended to be called with
210n/a # find_good_parse_start's result.
211n/a
212n/a def set_lo(self, lo):
213n/a assert lo == 0 or self.str[lo-1] == '\n'
214n/a if lo > 0:
215n/a self.str = self.str[lo:]
216n/a
217n/a # Build a translation table to map uninteresting chars to 'x', open
218n/a # brackets to '(', close brackets to ')' while preserving quotes,
219n/a # backslashes, newlines and hashes. This is to be passed to
220n/a # str.translate() in _study1().
221n/a _tran = {}
222n/a _tran.update((ord(c), ord('(')) for c in "({[")
223n/a _tran.update((ord(c), ord(')')) for c in ")}]")
224n/a _tran.update((ord(c), ord(c)) for c in "\"'\\\n#")
225n/a _tran = StringTranslatePseudoMapping(_tran, default_value=ord('x'))
226n/a
227n/a # As quickly as humanly possible <wink>, find the line numbers (0-
228n/a # based) of the non-continuation lines.
229n/a # Creates self.{goodlines, continuation}.
230n/a
231n/a def _study1(self):
232n/a if self.study_level >= 1:
233n/a return
234n/a self.study_level = 1
235n/a
236n/a # Map all uninteresting characters to "x", all open brackets
237n/a # to "(", all close brackets to ")", then collapse runs of
238n/a # uninteresting characters. This can cut the number of chars
239n/a # by a factor of 10-40, and so greatly speed the following loop.
240n/a str = self.str
241n/a str = str.translate(self._tran)
242n/a str = str.replace('xxxxxxxx', 'x')
243n/a str = str.replace('xxxx', 'x')
244n/a str = str.replace('xx', 'x')
245n/a str = str.replace('xx', 'x')
246n/a str = str.replace('\nx', '\n')
247n/a # note that replacing x\n with \n would be incorrect, because
248n/a # x may be preceded by a backslash
249n/a
250n/a # March over the squashed version of the program, accumulating
251n/a # the line numbers of non-continued stmts, and determining
252n/a # whether & why the last stmt is a continuation.
253n/a continuation = C_NONE
254n/a level = lno = 0 # level is nesting level; lno is line number
255n/a self.goodlines = goodlines = [0]
256n/a push_good = goodlines.append
257n/a i, n = 0, len(str)
258n/a while i < n:
259n/a ch = str[i]
260n/a i = i+1
261n/a
262n/a # cases are checked in decreasing order of frequency
263n/a if ch == 'x':
264n/a continue
265n/a
266n/a if ch == '\n':
267n/a lno = lno + 1
268n/a if level == 0:
269n/a push_good(lno)
270n/a # else we're in an unclosed bracket structure
271n/a continue
272n/a
273n/a if ch == '(':
274n/a level = level + 1
275n/a continue
276n/a
277n/a if ch == ')':
278n/a if level:
279n/a level = level - 1
280n/a # else the program is invalid, but we can't complain
281n/a continue
282n/a
283n/a if ch == '"' or ch == "'":
284n/a # consume the string
285n/a quote = ch
286n/a if str[i-1:i+2] == quote * 3:
287n/a quote = quote * 3
288n/a firstlno = lno
289n/a w = len(quote) - 1
290n/a i = i+w
291n/a while i < n:
292n/a ch = str[i]
293n/a i = i+1
294n/a
295n/a if ch == 'x':
296n/a continue
297n/a
298n/a if str[i-1:i+w] == quote:
299n/a i = i+w
300n/a break
301n/a
302n/a if ch == '\n':
303n/a lno = lno + 1
304n/a if w == 0:
305n/a # unterminated single-quoted string
306n/a if level == 0:
307n/a push_good(lno)
308n/a break
309n/a continue
310n/a
311n/a if ch == '\\':
312n/a assert i < n
313n/a if str[i] == '\n':
314n/a lno = lno + 1
315n/a i = i+1
316n/a continue
317n/a
318n/a # else comment char or paren inside string
319n/a
320n/a else:
321n/a # didn't break out of the loop, so we're still
322n/a # inside a string
323n/a if (lno - 1) == firstlno:
324n/a # before the previous \n in str, we were in the first
325n/a # line of the string
326n/a continuation = C_STRING_FIRST_LINE
327n/a else:
328n/a continuation = C_STRING_NEXT_LINES
329n/a continue # with outer loop
330n/a
331n/a if ch == '#':
332n/a # consume the comment
333n/a i = str.find('\n', i)
334n/a assert i >= 0
335n/a continue
336n/a
337n/a assert ch == '\\'
338n/a assert i < n
339n/a if str[i] == '\n':
340n/a lno = lno + 1
341n/a if i+1 == n:
342n/a continuation = C_BACKSLASH
343n/a i = i+1
344n/a
345n/a # The last stmt may be continued for all 3 reasons.
346n/a # String continuation takes precedence over bracket
347n/a # continuation, which beats backslash continuation.
348n/a if (continuation != C_STRING_FIRST_LINE
349n/a and continuation != C_STRING_NEXT_LINES and level > 0):
350n/a continuation = C_BRACKET
351n/a self.continuation = continuation
352n/a
353n/a # Push the final line number as a sentinel value, regardless of
354n/a # whether it's continued.
355n/a assert (continuation == C_NONE) == (goodlines[-1] == lno)
356n/a if goodlines[-1] != lno:
357n/a push_good(lno)
358n/a
359n/a def get_continuation_type(self):
360n/a self._study1()
361n/a return self.continuation
362n/a
363n/a # study1 was sufficient to determine the continuation status,
364n/a # but doing more requires looking at every character. study2
365n/a # does this for the last interesting statement in the block.
366n/a # Creates:
367n/a # self.stmt_start, stmt_end
368n/a # slice indices of last interesting stmt
369n/a # self.stmt_bracketing
370n/a # the bracketing structure of the last interesting stmt;
371n/a # for example, for the statement "say(boo) or die", stmt_bracketing
372n/a # will be [(0, 0), (3, 1), (8, 0)]. Strings and comments are
373n/a # treated as brackets, for the matter.
374n/a # self.lastch
375n/a # last non-whitespace character before optional trailing
376n/a # comment
377n/a # self.lastopenbracketpos
378n/a # if continuation is C_BRACKET, index of last open bracket
379n/a
380n/a def _study2(self):
381n/a if self.study_level >= 2:
382n/a return
383n/a self._study1()
384n/a self.study_level = 2
385n/a
386n/a # Set p and q to slice indices of last interesting stmt.
387n/a str, goodlines = self.str, self.goodlines
388n/a i = len(goodlines) - 1
389n/a p = len(str) # index of newest line
390n/a while i:
391n/a assert p
392n/a # p is the index of the stmt at line number goodlines[i].
393n/a # Move p back to the stmt at line number goodlines[i-1].
394n/a q = p
395n/a for nothing in range(goodlines[i-1], goodlines[i]):
396n/a # tricky: sets p to 0 if no preceding newline
397n/a p = str.rfind('\n', 0, p-1) + 1
398n/a # The stmt str[p:q] isn't a continuation, but may be blank
399n/a # or a non-indenting comment line.
400n/a if _junkre(str, p):
401n/a i = i-1
402n/a else:
403n/a break
404n/a if i == 0:
405n/a # nothing but junk!
406n/a assert p == 0
407n/a q = p
408n/a self.stmt_start, self.stmt_end = p, q
409n/a
410n/a # Analyze this stmt, to find the last open bracket (if any)
411n/a # and last interesting character (if any).
412n/a lastch = ""
413n/a stack = [] # stack of open bracket indices
414n/a push_stack = stack.append
415n/a bracketing = [(p, 0)]
416n/a while p < q:
417n/a # suck up all except ()[]{}'"#\\
418n/a m = _chew_ordinaryre(str, p, q)
419n/a if m:
420n/a # we skipped at least one boring char
421n/a newp = m.end()
422n/a # back up over totally boring whitespace
423n/a i = newp - 1 # index of last boring char
424n/a while i >= p and str[i] in " \t\n":
425n/a i = i-1
426n/a if i >= p:
427n/a lastch = str[i]
428n/a p = newp
429n/a if p >= q:
430n/a break
431n/a
432n/a ch = str[p]
433n/a
434n/a if ch in "([{":
435n/a push_stack(p)
436n/a bracketing.append((p, len(stack)))
437n/a lastch = ch
438n/a p = p+1
439n/a continue
440n/a
441n/a if ch in ")]}":
442n/a if stack:
443n/a del stack[-1]
444n/a lastch = ch
445n/a p = p+1
446n/a bracketing.append((p, len(stack)))
447n/a continue
448n/a
449n/a if ch == '"' or ch == "'":
450n/a # consume string
451n/a # Note that study1 did this with a Python loop, but
452n/a # we use a regexp here; the reason is speed in both
453n/a # cases; the string may be huge, but study1 pre-squashed
454n/a # strings to a couple of characters per line. study1
455n/a # also needed to keep track of newlines, and we don't
456n/a # have to.
457n/a bracketing.append((p, len(stack)+1))
458n/a lastch = ch
459n/a p = _match_stringre(str, p, q).end()
460n/a bracketing.append((p, len(stack)))
461n/a continue
462n/a
463n/a if ch == '#':
464n/a # consume comment and trailing newline
465n/a bracketing.append((p, len(stack)+1))
466n/a p = str.find('\n', p, q) + 1
467n/a assert p > 0
468n/a bracketing.append((p, len(stack)))
469n/a continue
470n/a
471n/a assert ch == '\\'
472n/a p = p+1 # beyond backslash
473n/a assert p < q
474n/a if str[p] != '\n':
475n/a # the program is invalid, but can't complain
476n/a lastch = ch + str[p]
477n/a p = p+1 # beyond escaped char
478n/a
479n/a # end while p < q:
480n/a
481n/a self.lastch = lastch
482n/a if stack:
483n/a self.lastopenbracketpos = stack[-1]
484n/a self.stmt_bracketing = tuple(bracketing)
485n/a
486n/a # Assuming continuation is C_BRACKET, return the number
487n/a # of spaces the next line should be indented.
488n/a
489n/a def compute_bracket_indent(self):
490n/a self._study2()
491n/a assert self.continuation == C_BRACKET
492n/a j = self.lastopenbracketpos
493n/a str = self.str
494n/a n = len(str)
495n/a origi = i = str.rfind('\n', 0, j) + 1
496n/a j = j+1 # one beyond open bracket
497n/a # find first list item; set i to start of its line
498n/a while j < n:
499n/a m = _itemre(str, j)
500n/a if m:
501n/a j = m.end() - 1 # index of first interesting char
502n/a extra = 0
503n/a break
504n/a else:
505n/a # this line is junk; advance to next line
506n/a i = j = str.find('\n', j) + 1
507n/a else:
508n/a # nothing interesting follows the bracket;
509n/a # reproduce the bracket line's indentation + a level
510n/a j = i = origi
511n/a while str[j] in " \t":
512n/a j = j+1
513n/a extra = self.indentwidth
514n/a return len(str[i:j].expandtabs(self.tabwidth)) + extra
515n/a
516n/a # Return number of physical lines in last stmt (whether or not
517n/a # it's an interesting stmt! this is intended to be called when
518n/a # continuation is C_BACKSLASH).
519n/a
520n/a def get_num_lines_in_stmt(self):
521n/a self._study1()
522n/a goodlines = self.goodlines
523n/a return goodlines[-1] - goodlines[-2]
524n/a
525n/a # Assuming continuation is C_BACKSLASH, return the number of spaces
526n/a # the next line should be indented. Also assuming the new line is
527n/a # the first one following the initial line of the stmt.
528n/a
529n/a def compute_backslash_indent(self):
530n/a self._study2()
531n/a assert self.continuation == C_BACKSLASH
532n/a str = self.str
533n/a i = self.stmt_start
534n/a while str[i] in " \t":
535n/a i = i+1
536n/a startpos = i
537n/a
538n/a # See whether the initial line starts an assignment stmt; i.e.,
539n/a # look for an = operator
540n/a endpos = str.find('\n', startpos) + 1
541n/a found = level = 0
542n/a while i < endpos:
543n/a ch = str[i]
544n/a if ch in "([{":
545n/a level = level + 1
546n/a i = i+1
547n/a elif ch in ")]}":
548n/a if level:
549n/a level = level - 1
550n/a i = i+1
551n/a elif ch == '"' or ch == "'":
552n/a i = _match_stringre(str, i, endpos).end()
553n/a elif ch == '#':
554n/a break
555n/a elif level == 0 and ch == '=' and \
556n/a (i == 0 or str[i-1] not in "=<>!") and \
557n/a str[i+1] != '=':
558n/a found = 1
559n/a break
560n/a else:
561n/a i = i+1
562n/a
563n/a if found:
564n/a # found a legit =, but it may be the last interesting
565n/a # thing on the line
566n/a i = i+1 # move beyond the =
567n/a found = re.match(r"\s*\\", str[i:endpos]) is None
568n/a
569n/a if not found:
570n/a # oh well ... settle for moving beyond the first chunk
571n/a # of non-whitespace chars
572n/a i = startpos
573n/a while str[i] not in " \t\n":
574n/a i = i+1
575n/a
576n/a return len(str[self.stmt_start:i].expandtabs(\
577n/a self.tabwidth)) + 1
578n/a
579n/a # Return the leading whitespace on the initial line of the last
580n/a # interesting stmt.
581n/a
582n/a def get_base_indent_string(self):
583n/a self._study2()
584n/a i, n = self.stmt_start, self.stmt_end
585n/a j = i
586n/a str = self.str
587n/a while j < n and str[j] in " \t":
588n/a j = j + 1
589n/a return str[i:j]
590n/a
591n/a # Did the last interesting stmt open a block?
592n/a
593n/a def is_block_opener(self):
594n/a self._study2()
595n/a return self.lastch == ':'
596n/a
597n/a # Did the last interesting stmt close a block?
598n/a
599n/a def is_block_closer(self):
600n/a self._study2()
601n/a return _closere(self.str, self.stmt_start) is not None
602n/a
603n/a # index of last open bracket ({[, or None if none
604n/a lastopenbracketpos = None
605n/a
606n/a def get_last_open_bracket_pos(self):
607n/a self._study2()
608n/a return self.lastopenbracketpos
609n/a
610n/a # the structure of the bracketing of the last interesting statement,
611n/a # in the format defined in _study2, or None if the text didn't contain
612n/a # anything
613n/a stmt_bracketing = None
614n/a
615n/a def get_last_stmt_bracketing(self):
616n/a self._study2()
617n/a return self.stmt_bracketing