Python code coverage for Lib/idlelib/pyparse.py

#	count	content
1	n/a	from collections import Mapping
2	n/a	import re
3	n/a	import sys
4	n/a
5	n/a	# Reason last stmt is continued (or C_NONE if it's not).
6	n/a	(C_NONE, C_BACKSLASH, C_STRING_FIRST_LINE,
7	n/a	C_STRING_NEXT_LINES, C_BRACKET) = range(5)
8	n/a
9	n/a	if 0: # for throwaway debugging output
10	n/a	def dump(*stuff):
11	n/a	sys.__stdout__.write(" ".join(map(str, stuff)) + "\n")
12	n/a
13	n/a	# Find what looks like the start of a popular stmt.
14	n/a
15	n/a	_synchre = re.compile(r"""
16	n/a	^
17	n/a	[ \t]*
18	n/a	(?: while
19	n/a	\| else
20	n/a	\| def
21	n/a	\| return
22	n/a	\| assert
23	n/a	\| break
24	n/a	\| class
25	n/a	\| continue
26	n/a	\| elif
27	n/a	\| try
28	n/a	\| except
29	n/a	\| raise
30	n/a	\| import
31	n/a	\| yield
32	n/a	)
33	n/a	\b
34	n/a	""", re.VERBOSE \| re.MULTILINE).search
35	n/a
36	n/a	# Match blank line or non-indenting comment line.
37	n/a
38	n/a	_junkre = re.compile(r"""
39	n/a	[ \t]*
40	n/a	(?: \# \S .* )?
41	n/a	\n
42	n/a	""", re.VERBOSE).match
43	n/a
44	n/a	# Match any flavor of string; the terminating quote is optional
45	n/a	# so that we're robust in the face of incomplete program text.
46	n/a
47	n/a	_match_stringre = re.compile(r"""
48	n/a	\""" [^"\\]* (?:
49	n/a	(?: \\. \| "(?!"") )
50	n/a	[^"\\]*
51	n/a	)*
52	n/a	(?: \""" )?
53	n/a
54	n/a	\| " [^"\\\n]* (?: \\. [^"\\\n]* )* "?
55	n/a
56	n/a	\| ''' [^'\\]* (?:
57	n/a	(?: \\. \| '(?!'') )
58	n/a	[^'\\]*
59	n/a	)*
60	n/a	(?: ''' )?
61	n/a
62	n/a	\| ' [^'\\\n]* (?: \\. [^'\\\n]* )* '?
63	n/a	""", re.VERBOSE \| re.DOTALL).match
64	n/a
65	n/a	# Match a line that starts with something interesting;
66	n/a	# used to find the first item of a bracket structure.
67	n/a
68	n/a	_itemre = re.compile(r"""
69	n/a	[ \t]*
70	n/a	[^\s#\\] # if we match, m.end()-1 is the interesting char
71	n/a	""", re.VERBOSE).match
72	n/a
73	n/a	# Match start of stmts that should be followed by a dedent.
74	n/a
75	n/a	_closere = re.compile(r"""
76	n/a	\s*
77	n/a	(?: return
78	n/a	\| break
79	n/a	\| continue
80	n/a	\| raise
81	n/a	\| pass
82	n/a	)
83	n/a	\b
84	n/a	""", re.VERBOSE).match
85	n/a
86	n/a	# Chew up non-special chars as quickly as possible. If match is
87	n/a	# successful, m.end() less 1 is the index of the last boring char
88	n/a	# matched. If match is unsuccessful, the string starts with an
89	n/a	# interesting char.
90	n/a
91	n/a	_chew_ordinaryre = re.compile(r"""
92	n/a	[^[\](){}#'"\\]+
93	n/a	""", re.VERBOSE).match
94	n/a
95	n/a
96	n/a	class StringTranslatePseudoMapping(Mapping):
97	n/a	r"""Utility class to be used with str.translate()
98	n/a
99	n/a	This Mapping class wraps a given dict. When a value for a key is
100	n/a	requested via __getitem__() or get(), the key is looked up in the
101	n/a	given dict. If found there, the value from the dict is returned.
102	n/a	Otherwise, the default value given upon initialization is returned.
103	n/a
104	n/a	This allows using str.translate() to make some replacements, and to
105	n/a	replace all characters for which no replacement was specified with
106	n/a	a given character instead of leaving them as-is.
107	n/a
108	n/a	For example, to replace everything except whitespace with 'x':
109	n/a
110	n/a	>>> whitespace_chars = ' \t\n\r'
111	n/a	>>> preserve_dict = {ord(c): ord(c) for c in whitespace_chars}
112	n/a	>>> mapping = StringTranslatePseudoMapping(preserve_dict, ord('x'))
113	n/a	>>> text = "a + b\tc\nd"
114	n/a	>>> text.translate(mapping)
115	n/a	'x x x\tx\nx'
116	n/a	"""
117	n/a	def __init__(self, non_defaults, default_value):
118	n/a	self._non_defaults = non_defaults
119	n/a	self._default_value = default_value
120	n/a
121	n/a	def _get(key, _get=non_defaults.get, _default=default_value):
122	n/a	return _get(key, _default)
123	n/a	self._get = _get
124	n/a
125	n/a	def __getitem__(self, item):
126	n/a	return self._get(item)
127	n/a
128	n/a	def __len__(self):
129	n/a	return len(self._non_defaults)
130	n/a
131	n/a	def __iter__(self):
132	n/a	return iter(self._non_defaults)
133	n/a
134	n/a	def get(self, key, default=None):
135	n/a	return self._get(key)
136	n/a
137	n/a
138	n/a	class Parser:
139	n/a
140	n/a	def __init__(self, indentwidth, tabwidth):
141	n/a	self.indentwidth = indentwidth
142	n/a	self.tabwidth = tabwidth
143	n/a
144	n/a	def set_str(self, s):
145	n/a	assert len(s) == 0 or s[-1] == '\n'
146	n/a	self.str = s
147	n/a	self.study_level = 0
148	n/a
149	n/a	# Return index of a good place to begin parsing, as close to the
150	n/a	# end of the string as possible. This will be the start of some
151	n/a	# popular stmt like "if" or "def". Return None if none found:
152	n/a	# the caller should pass more prior context then, if possible, or
153	n/a	# if not (the entire program text up until the point of interest
154	n/a	# has already been tried) pass 0 to set_lo.
155	n/a	#
156	n/a	# This will be reliable iff given a reliable is_char_in_string
157	n/a	# function, meaning that when it says "no", it's absolutely
158	n/a	# guaranteed that the char is not in a string.
159	n/a
160	n/a	def find_good_parse_start(self, is_char_in_string=None,
161	n/a	_synchre=_synchre):
162	n/a	str, pos = self.str, None
163	n/a
164	n/a	if not is_char_in_string:
165	n/a	# no clue -- make the caller pass everything
166	n/a	return None
167	n/a
168	n/a	# Peek back from the end for a good place to start,
169	n/a	# but don't try too often; pos will be left None, or
170	n/a	# bumped to a legitimate synch point.
171	n/a	limit = len(str)
172	n/a	for tries in range(5):
173	n/a	i = str.rfind(":\n", 0, limit)
174	n/a	if i < 0:
175	n/a	break
176	n/a	i = str.rfind('\n', 0, i) + 1 # start of colon line
177	n/a	m = _synchre(str, i, limit)
178	n/a	if m and not is_char_in_string(m.start()):
179	n/a	pos = m.start()
180	n/a	break
181	n/a	limit = i
182	n/a	if pos is None:
183	n/a	# Nothing looks like a block-opener, or stuff does
184	n/a	# but is_char_in_string keeps returning true; most likely
185	n/a	# we're in or near a giant string, the colorizer hasn't
186	n/a	# caught up enough to be helpful, or there simply aren't
187	n/a	# any interesting stmts. In any of these cases we're
188	n/a	# going to have to parse the whole thing to be sure, so
189	n/a	# give it one last try from the start, but stop wasting
190	n/a	# time here regardless of the outcome.
191	n/a	m = _synchre(str)
192	n/a	if m and not is_char_in_string(m.start()):
193	n/a	pos = m.start()
194	n/a	return pos
195	n/a
196	n/a	# Peeking back worked; look forward until _synchre no longer
197	n/a	# matches.
198	n/a	i = pos + 1
199	n/a	while 1:
200	n/a	m = _synchre(str, i)
201	n/a	if m:
202	n/a	s, i = m.span()
203	n/a	if not is_char_in_string(s):
204	n/a	pos = s
205	n/a	else:
206	n/a	break
207	n/a	return pos
208	n/a
209	n/a	# Throw away the start of the string. Intended to be called with
210	n/a	# find_good_parse_start's result.
211	n/a
212	n/a	def set_lo(self, lo):
213	n/a	assert lo == 0 or self.str[lo-1] == '\n'
214	n/a	if lo > 0:
215	n/a	self.str = self.str[lo:]
216	n/a
217	n/a	# Build a translation table to map uninteresting chars to 'x', open
218	n/a	# brackets to '(', close brackets to ')' while preserving quotes,
219	n/a	# backslashes, newlines and hashes. This is to be passed to
220	n/a	# str.translate() in _study1().
221	n/a	_tran = {}
222	n/a	_tran.update((ord(c), ord('(')) for c in "({[")
223	n/a	_tran.update((ord(c), ord(')')) for c in ")}]")
224	n/a	_tran.update((ord(c), ord(c)) for c in "\"'\\\n#")
225	n/a	_tran = StringTranslatePseudoMapping(_tran, default_value=ord('x'))
226	n/a
227	n/a	# As quickly as humanly possible <wink>, find the line numbers (0-
228	n/a	# based) of the non-continuation lines.
229	n/a	# Creates self.{goodlines, continuation}.
230	n/a
231	n/a	def _study1(self):
232	n/a	if self.study_level >= 1:
233	n/a	return
234	n/a	self.study_level = 1
235	n/a
236	n/a	# Map all uninteresting characters to "x", all open brackets
237	n/a	# to "(", all close brackets to ")", then collapse runs of
238	n/a	# uninteresting characters. This can cut the number of chars
239	n/a	# by a factor of 10-40, and so greatly speed the following loop.
240	n/a	str = self.str
241	n/a	str = str.translate(self._tran)
242	n/a	str = str.replace('xxxxxxxx', 'x')
243	n/a	str = str.replace('xxxx', 'x')
244	n/a	str = str.replace('xx', 'x')
245	n/a	str = str.replace('xx', 'x')
246	n/a	str = str.replace('\nx', '\n')
247	n/a	# note that replacing x\n with \n would be incorrect, because
248	n/a	# x may be preceded by a backslash
249	n/a
250	n/a	# March over the squashed version of the program, accumulating
251	n/a	# the line numbers of non-continued stmts, and determining
252	n/a	# whether & why the last stmt is a continuation.
253	n/a	continuation = C_NONE
254	n/a	level = lno = 0 # level is nesting level; lno is line number
255	n/a	self.goodlines = goodlines = [0]
256	n/a	push_good = goodlines.append
257	n/a	i, n = 0, len(str)
258	n/a	while i < n:
259	n/a	ch = str[i]
260	n/a	i = i+1
261	n/a
262	n/a	# cases are checked in decreasing order of frequency
263	n/a	if ch == 'x':
264	n/a	continue
265	n/a
266	n/a	if ch == '\n':
267	n/a	lno = lno + 1
268	n/a	if level == 0:
269	n/a	push_good(lno)
270	n/a	# else we're in an unclosed bracket structure
271	n/a	continue
272	n/a
273	n/a	if ch == '(':
274	n/a	level = level + 1
275	n/a	continue
276	n/a
277	n/a	if ch == ')':
278	n/a	if level:
279	n/a	level = level - 1
280	n/a	# else the program is invalid, but we can't complain
281	n/a	continue
282	n/a
283	n/a	if ch == '"' or ch == "'":
284	n/a	# consume the string
285	n/a	quote = ch
286	n/a	if str[i-1:i+2] == quote * 3:
287	n/a	quote = quote * 3
288	n/a	firstlno = lno
289	n/a	w = len(quote) - 1
290	n/a	i = i+w
291	n/a	while i < n:
292	n/a	ch = str[i]
293	n/a	i = i+1
294	n/a
295	n/a	if ch == 'x':
296	n/a	continue
297	n/a
298	n/a	if str[i-1:i+w] == quote:
299	n/a	i = i+w
300	n/a	break
301	n/a
302	n/a	if ch == '\n':
303	n/a	lno = lno + 1
304	n/a	if w == 0:
305	n/a	# unterminated single-quoted string
306	n/a	if level == 0:
307	n/a	push_good(lno)
308	n/a	break
309	n/a	continue
310	n/a
311	n/a	if ch == '\\':
312	n/a	assert i < n
313	n/a	if str[i] == '\n':
314	n/a	lno = lno + 1
315	n/a	i = i+1
316	n/a	continue
317	n/a
318	n/a	# else comment char or paren inside string
319	n/a
320	n/a	else:
321	n/a	# didn't break out of the loop, so we're still
322	n/a	# inside a string
323	n/a	if (lno - 1) == firstlno:
324	n/a	# before the previous \n in str, we were in the first
325	n/a	# line of the string
326	n/a	continuation = C_STRING_FIRST_LINE
327	n/a	else:
328	n/a	continuation = C_STRING_NEXT_LINES
329	n/a	continue # with outer loop
330	n/a
331	n/a	if ch == '#':
332	n/a	# consume the comment
333	n/a	i = str.find('\n', i)
334	n/a	assert i >= 0
335	n/a	continue
336	n/a
337	n/a	assert ch == '\\'
338	n/a	assert i < n
339	n/a	if str[i] == '\n':
340	n/a	lno = lno + 1
341	n/a	if i+1 == n:
342	n/a	continuation = C_BACKSLASH
343	n/a	i = i+1
344	n/a
345	n/a	# The last stmt may be continued for all 3 reasons.
346	n/a	# String continuation takes precedence over bracket
347	n/a	# continuation, which beats backslash continuation.
348	n/a	if (continuation != C_STRING_FIRST_LINE
349	n/a	and continuation != C_STRING_NEXT_LINES and level > 0):
350	n/a	continuation = C_BRACKET
351	n/a	self.continuation = continuation
352	n/a
353	n/a	# Push the final line number as a sentinel value, regardless of
354	n/a	# whether it's continued.
355	n/a	assert (continuation == C_NONE) == (goodlines[-1] == lno)
356	n/a	if goodlines[-1] != lno:
357	n/a	push_good(lno)
358	n/a
359	n/a	def get_continuation_type(self):
360	n/a	self._study1()
361	n/a	return self.continuation
362	n/a
363	n/a	# study1 was sufficient to determine the continuation status,
364	n/a	# but doing more requires looking at every character. study2
365	n/a	# does this for the last interesting statement in the block.
366	n/a	# Creates:
367	n/a	# self.stmt_start, stmt_end
368	n/a	# slice indices of last interesting stmt
369	n/a	# self.stmt_bracketing
370	n/a	# the bracketing structure of the last interesting stmt;
371	n/a	# for example, for the statement "say(boo) or die", stmt_bracketing
372	n/a	# will be [(0, 0), (3, 1), (8, 0)]. Strings and comments are
373	n/a	# treated as brackets, for the matter.
374	n/a	# self.lastch
375	n/a	# last non-whitespace character before optional trailing
376	n/a	# comment
377	n/a	# self.lastopenbracketpos
378	n/a	# if continuation is C_BRACKET, index of last open bracket
379	n/a
380	n/a	def _study2(self):
381	n/a	if self.study_level >= 2:
382	n/a	return
383	n/a	self._study1()
384	n/a	self.study_level = 2
385	n/a
386	n/a	# Set p and q to slice indices of last interesting stmt.
387	n/a	str, goodlines = self.str, self.goodlines
388	n/a	i = len(goodlines) - 1
389	n/a	p = len(str) # index of newest line
390	n/a	while i:
391	n/a	assert p
392	n/a	# p is the index of the stmt at line number goodlines[i].
393	n/a	# Move p back to the stmt at line number goodlines[i-1].
394	n/a	q = p
395	n/a	for nothing in range(goodlines[i-1], goodlines[i]):
396	n/a	# tricky: sets p to 0 if no preceding newline
397	n/a	p = str.rfind('\n', 0, p-1) + 1
398	n/a	# The stmt str[p:q] isn't a continuation, but may be blank
399	n/a	# or a non-indenting comment line.
400	n/a	if _junkre(str, p):
401	n/a	i = i-1
402	n/a	else:
403	n/a	break
404	n/a	if i == 0:
405	n/a	# nothing but junk!
406	n/a	assert p == 0
407	n/a	q = p
408	n/a	self.stmt_start, self.stmt_end = p, q
409	n/a
410	n/a	# Analyze this stmt, to find the last open bracket (if any)
411	n/a	# and last interesting character (if any).
412	n/a	lastch = ""
413	n/a	stack = [] # stack of open bracket indices
414	n/a	push_stack = stack.append
415	n/a	bracketing = [(p, 0)]
416	n/a	while p < q:
417	n/a	# suck up all except ()[]{}'"#\\
418	n/a	m = _chew_ordinaryre(str, p, q)
419	n/a	if m:
420	n/a	# we skipped at least one boring char
421	n/a	newp = m.end()
422	n/a	# back up over totally boring whitespace
423	n/a	i = newp - 1 # index of last boring char
424	n/a	while i >= p and str[i] in " \t\n":
425	n/a	i = i-1
426	n/a	if i >= p:
427	n/a	lastch = str[i]
428	n/a	p = newp
429	n/a	if p >= q:
430	n/a	break
431	n/a
432	n/a	ch = str[p]
433	n/a
434	n/a	if ch in "([{":
435	n/a	push_stack(p)
436	n/a	bracketing.append((p, len(stack)))
437	n/a	lastch = ch
438	n/a	p = p+1
439	n/a	continue
440	n/a
441	n/a	if ch in ")]}":
442	n/a	if stack:
443	n/a	del stack[-1]
444	n/a	lastch = ch
445	n/a	p = p+1
446	n/a	bracketing.append((p, len(stack)))
447	n/a	continue
448	n/a
449	n/a	if ch == '"' or ch == "'":
450	n/a	# consume string
451	n/a	# Note that study1 did this with a Python loop, but
452	n/a	# we use a regexp here; the reason is speed in both
453	n/a	# cases; the string may be huge, but study1 pre-squashed
454	n/a	# strings to a couple of characters per line. study1
455	n/a	# also needed to keep track of newlines, and we don't
456	n/a	# have to.
457	n/a	bracketing.append((p, len(stack)+1))
458	n/a	lastch = ch
459	n/a	p = _match_stringre(str, p, q).end()
460	n/a	bracketing.append((p, len(stack)))
461	n/a	continue
462	n/a
463	n/a	if ch == '#':
464	n/a	# consume comment and trailing newline
465	n/a	bracketing.append((p, len(stack)+1))
466	n/a	p = str.find('\n', p, q) + 1
467	n/a	assert p > 0
468	n/a	bracketing.append((p, len(stack)))
469	n/a	continue
470	n/a
471	n/a	assert ch == '\\'
472	n/a	p = p+1 # beyond backslash
473	n/a	assert p < q
474	n/a	if str[p] != '\n':
475	n/a	# the program is invalid, but can't complain
476	n/a	lastch = ch + str[p]
477	n/a	p = p+1 # beyond escaped char
478	n/a
479	n/a	# end while p < q:
480	n/a
481	n/a	self.lastch = lastch
482	n/a	if stack:
483	n/a	self.lastopenbracketpos = stack[-1]
484	n/a	self.stmt_bracketing = tuple(bracketing)
485	n/a
486	n/a	# Assuming continuation is C_BRACKET, return the number
487	n/a	# of spaces the next line should be indented.
488	n/a
489	n/a	def compute_bracket_indent(self):
490	n/a	self._study2()
491	n/a	assert self.continuation == C_BRACKET
492	n/a	j = self.lastopenbracketpos
493	n/a	str = self.str
494	n/a	n = len(str)
495	n/a	origi = i = str.rfind('\n', 0, j) + 1
496	n/a	j = j+1 # one beyond open bracket
497	n/a	# find first list item; set i to start of its line
498	n/a	while j < n:
499	n/a	m = _itemre(str, j)
500	n/a	if m:
501	n/a	j = m.end() - 1 # index of first interesting char
502	n/a	extra = 0
503	n/a	break
504	n/a	else:
505	n/a	# this line is junk; advance to next line
506	n/a	i = j = str.find('\n', j) + 1
507	n/a	else:
508	n/a	# nothing interesting follows the bracket;
509	n/a	# reproduce the bracket line's indentation + a level
510	n/a	j = i = origi
511	n/a	while str[j] in " \t":
512	n/a	j = j+1
513	n/a	extra = self.indentwidth
514	n/a	return len(str[i:j].expandtabs(self.tabwidth)) + extra
515	n/a
516	n/a	# Return number of physical lines in last stmt (whether or not
517	n/a	# it's an interesting stmt! this is intended to be called when
518	n/a	# continuation is C_BACKSLASH).
519	n/a
520	n/a	def get_num_lines_in_stmt(self):
521	n/a	self._study1()
522	n/a	goodlines = self.goodlines
523	n/a	return goodlines[-1] - goodlines[-2]
524	n/a
525	n/a	# Assuming continuation is C_BACKSLASH, return the number of spaces
526	n/a	# the next line should be indented. Also assuming the new line is
527	n/a	# the first one following the initial line of the stmt.
528	n/a
529	n/a	def compute_backslash_indent(self):
530	n/a	self._study2()
531	n/a	assert self.continuation == C_BACKSLASH
532	n/a	str = self.str
533	n/a	i = self.stmt_start
534	n/a	while str[i] in " \t":
535	n/a	i = i+1
536	n/a	startpos = i
537	n/a
538	n/a	# See whether the initial line starts an assignment stmt; i.e.,
539	n/a	# look for an = operator
540	n/a	endpos = str.find('\n', startpos) + 1
541	n/a	found = level = 0
542	n/a	while i < endpos:
543	n/a	ch = str[i]
544	n/a	if ch in "([{":
545	n/a	level = level + 1
546	n/a	i = i+1
547	n/a	elif ch in ")]}":
548	n/a	if level:
549	n/a	level = level - 1
550	n/a	i = i+1
551	n/a	elif ch == '"' or ch == "'":
552	n/a	i = _match_stringre(str, i, endpos).end()
553	n/a	elif ch == '#':
554	n/a	break
555	n/a	elif level == 0 and ch == '=' and \
556	n/a	(i == 0 or str[i-1] not in "=<>!") and \
557	n/a	str[i+1] != '=':
558	n/a	found = 1
559	n/a	break
560	n/a	else:
561	n/a	i = i+1
562	n/a
563	n/a	if found:
564	n/a	# found a legit =, but it may be the last interesting
565	n/a	# thing on the line
566	n/a	i = i+1 # move beyond the =
567	n/a	found = re.match(r"\s*\\", str[i:endpos]) is None
568	n/a
569	n/a	if not found:
570	n/a	# oh well ... settle for moving beyond the first chunk
571	n/a	# of non-whitespace chars
572	n/a	i = startpos
573	n/a	while str[i] not in " \t\n":
574	n/a	i = i+1
575	n/a
576	n/a	return len(str[self.stmt_start:i].expandtabs(\
577	n/a	self.tabwidth)) + 1
578	n/a
579	n/a	# Return the leading whitespace on the initial line of the last
580	n/a	# interesting stmt.
581	n/a
582	n/a	def get_base_indent_string(self):
583	n/a	self._study2()
584	n/a	i, n = self.stmt_start, self.stmt_end
585	n/a	j = i
586	n/a	str = self.str
587	n/a	while j < n and str[j] in " \t":
588	n/a	j = j + 1
589	n/a	return str[i:j]
590	n/a
591	n/a	# Did the last interesting stmt open a block?
592	n/a
593	n/a	def is_block_opener(self):
594	n/a	self._study2()
595	n/a	return self.lastch == ':'
596	n/a
597	n/a	# Did the last interesting stmt close a block?
598	n/a
599	n/a	def is_block_closer(self):
600	n/a	self._study2()
601	n/a	return _closere(self.str, self.stmt_start) is not None
602	n/a
603	n/a	# index of last open bracket ({[, or None if none
604	n/a	lastopenbracketpos = None
605	n/a
606	n/a	def get_last_open_bracket_pos(self):
607	n/a	self._study2()
608	n/a	return self.lastopenbracketpos
609	n/a
610	n/a	# the structure of the bracketing of the last interesting statement,
611	n/a	# in the format defined in _study2, or None if the text didn't contain
612	n/a	# anything
613	n/a	stmt_bracketing = None
614	n/a
615	n/a	def get_last_stmt_bracketing(self):
616	n/a	self._study2()
617	n/a	return self.stmt_bracketing