Python code coverage for Lib/idlelib/PyParse.py

#	count	content
1	n/a	import re
2	n/a	import sys
3	n/a
4	n/a	# Reason last stmt is continued (or C_NONE if it's not).
5	n/a	(C_NONE, C_BACKSLASH, C_STRING_FIRST_LINE,
6	n/a	C_STRING_NEXT_LINES, C_BRACKET) = range(5)
7	n/a
8	n/a	if 0: # for throwaway debugging output
9	n/a	def dump(*stuff):
10	n/a	sys.__stdout__.write(" ".join(map(str, stuff)) + "\n")
11	n/a
12	n/a	# Find what looks like the start of a popular stmt.
13	n/a
14	n/a	_synchre = re.compile(r"""
15	n/a	^
16	n/a	[ \t]*
17	n/a	(?: while
18	n/a	\| else
19	n/a	\| def
20	n/a	\| return
21	n/a	\| assert
22	n/a	\| break
23	n/a	\| class
24	n/a	\| continue
25	n/a	\| elif
26	n/a	\| try
27	n/a	\| except
28	n/a	\| raise
29	n/a	\| import
30	n/a	\| yield
31	n/a	)
32	n/a	\b
33	n/a	""", re.VERBOSE \| re.MULTILINE).search
34	n/a
35	n/a	# Match blank line or non-indenting comment line.
36	n/a
37	n/a	_junkre = re.compile(r"""
38	n/a	[ \t]*
39	n/a	(?: \# \S .* )?
40	n/a	\n
41	n/a	""", re.VERBOSE).match
42	n/a
43	n/a	# Match any flavor of string; the terminating quote is optional
44	n/a	# so that we're robust in the face of incomplete program text.
45	n/a
46	n/a	_match_stringre = re.compile(r"""
47	n/a	\""" [^"\\]* (?:
48	n/a	(?: \\. \| "(?!"") )
49	n/a	[^"\\]*
50	n/a	)*
51	n/a	(?: \""" )?
52	n/a
53	n/a	\| " [^"\\\n]* (?: \\. [^"\\\n]* )* "?
54	n/a
55	n/a	\| ''' [^'\\]* (?:
56	n/a	(?: \\. \| '(?!'') )
57	n/a	[^'\\]*
58	n/a	)*
59	n/a	(?: ''' )?
60	n/a
61	n/a	\| ' [^'\\\n]* (?: \\. [^'\\\n]* )* '?
62	n/a	""", re.VERBOSE \| re.DOTALL).match
63	n/a
64	n/a	# Match a line that starts with something interesting;
65	n/a	# used to find the first item of a bracket structure.
66	n/a
67	n/a	_itemre = re.compile(r"""
68	n/a	[ \t]*
69	n/a	[^\s#\\] # if we match, m.end()-1 is the interesting char
70	n/a	""", re.VERBOSE).match
71	n/a
72	n/a	# Match start of stmts that should be followed by a dedent.
73	n/a
74	n/a	_closere = re.compile(r"""
75	n/a	\s*
76	n/a	(?: return
77	n/a	\| break
78	n/a	\| continue
79	n/a	\| raise
80	n/a	\| pass
81	n/a	)
82	n/a	\b
83	n/a	""", re.VERBOSE).match
84	n/a
85	n/a	# Chew up non-special chars as quickly as possible. If match is
86	n/a	# successful, m.end() less 1 is the index of the last boring char
87	n/a	# matched. If match is unsuccessful, the string starts with an
88	n/a	# interesting char.
89	n/a
90	n/a	_chew_ordinaryre = re.compile(r"""
91	n/a	[^[\](){}#'"\\]+
92	n/a	""", re.VERBOSE).match
93	n/a
94	n/a	# Build translation table to map uninteresting chars to "x", open
95	n/a	# brackets to "(", and close brackets to ")".
96	n/a
97	n/a	_tran = {}
98	n/a	for i in range(256):
99	n/a	_tran[i] = 'x'
100	n/a	for ch in "({[":
101	n/a	_tran[ord(ch)] = '('
102	n/a	for ch in ")}]":
103	n/a	_tran[ord(ch)] = ')'
104	n/a	for ch in "\"'\\\n#":
105	n/a	_tran[ord(ch)] = ch
106	n/a	del i, ch
107	n/a
108	n/a	class Parser:
109	n/a
110	n/a	def __init__(self, indentwidth, tabwidth):
111	n/a	self.indentwidth = indentwidth
112	n/a	self.tabwidth = tabwidth
113	n/a
114	n/a	def set_str(self, s):
115	n/a	assert len(s) == 0 or s[-1] == '\n'
116	n/a	if isinstance(s, str):
117	n/a	# The parse functions have no idea what to do with Unicode, so
118	n/a	# replace all Unicode characters with "x". This is "safe"
119	n/a	# so long as the only characters germane to parsing the structure
120	n/a	# of Python are 7-bit ASCII. It's necessary because Unicode
121	n/a	# strings don't have a .translate() method that supports
122	n/a	# deletechars.
123	n/a	uniphooey = s
124	n/a	s = []
125	n/a	push = s.append
126	n/a	for raw in map(ord, uniphooey):
127	n/a	push(raw < 127 and chr(raw) or "x")
128	n/a	s = "".join(s)
129	n/a	self.str = s
130	n/a	self.study_level = 0
131	n/a
132	n/a	# Return index of a good place to begin parsing, as close to the
133	n/a	# end of the string as possible. This will be the start of some
134	n/a	# popular stmt like "if" or "def". Return None if none found:
135	n/a	# the caller should pass more prior context then, if possible, or
136	n/a	# if not (the entire program text up until the point of interest
137	n/a	# has already been tried) pass 0 to set_lo.
138	n/a	#
139	n/a	# This will be reliable iff given a reliable is_char_in_string
140	n/a	# function, meaning that when it says "no", it's absolutely
141	n/a	# guaranteed that the char is not in a string.
142	n/a
143	n/a	def find_good_parse_start(self, is_char_in_string=None,
144	n/a	_synchre=_synchre):
145	n/a	str, pos = self.str, None
146	n/a
147	n/a	if not is_char_in_string:
148	n/a	# no clue -- make the caller pass everything
149	n/a	return None
150	n/a
151	n/a	# Peek back from the end for a good place to start,
152	n/a	# but don't try too often; pos will be left None, or
153	n/a	# bumped to a legitimate synch point.
154	n/a	limit = len(str)
155	n/a	for tries in range(5):
156	n/a	i = str.rfind(":\n", 0, limit)
157	n/a	if i < 0:
158	n/a	break
159	n/a	i = str.rfind('\n', 0, i) + 1 # start of colon line
160	n/a	m = _synchre(str, i, limit)
161	n/a	if m and not is_char_in_string(m.start()):
162	n/a	pos = m.start()
163	n/a	break
164	n/a	limit = i
165	n/a	if pos is None:
166	n/a	# Nothing looks like a block-opener, or stuff does
167	n/a	# but is_char_in_string keeps returning true; most likely
168	n/a	# we're in or near a giant string, the colorizer hasn't
169	n/a	# caught up enough to be helpful, or there simply aren't
170	n/a	# any interesting stmts. In any of these cases we're
171	n/a	# going to have to parse the whole thing to be sure, so
172	n/a	# give it one last try from the start, but stop wasting
173	n/a	# time here regardless of the outcome.
174	n/a	m = _synchre(str)
175	n/a	if m and not is_char_in_string(m.start()):
176	n/a	pos = m.start()
177	n/a	return pos
178	n/a
179	n/a	# Peeking back worked; look forward until _synchre no longer
180	n/a	# matches.
181	n/a	i = pos + 1
182	n/a	while 1:
183	n/a	m = _synchre(str, i)
184	n/a	if m:
185	n/a	s, i = m.span()
186	n/a	if not is_char_in_string(s):
187	n/a	pos = s
188	n/a	else:
189	n/a	break
190	n/a	return pos
191	n/a
192	n/a	# Throw away the start of the string. Intended to be called with
193	n/a	# find_good_parse_start's result.
194	n/a
195	n/a	def set_lo(self, lo):
196	n/a	assert lo == 0 or self.str[lo-1] == '\n'
197	n/a	if lo > 0:
198	n/a	self.str = self.str[lo:]
199	n/a
200	n/a	# As quickly as humanly possible <wink>, find the line numbers (0-
201	n/a	# based) of the non-continuation lines.
202	n/a	# Creates self.{goodlines, continuation}.
203	n/a
204	n/a	def _study1(self):
205	n/a	if self.study_level >= 1:
206	n/a	return
207	n/a	self.study_level = 1
208	n/a
209	n/a	# Map all uninteresting characters to "x", all open brackets
210	n/a	# to "(", all close brackets to ")", then collapse runs of
211	n/a	# uninteresting characters. This can cut the number of chars
212	n/a	# by a factor of 10-40, and so greatly speed the following loop.
213	n/a	str = self.str
214	n/a	str = str.translate(_tran)
215	n/a	str = str.replace('xxxxxxxx', 'x')
216	n/a	str = str.replace('xxxx', 'x')
217	n/a	str = str.replace('xx', 'x')
218	n/a	str = str.replace('xx', 'x')
219	n/a	str = str.replace('\nx', '\n')
220	n/a	# note that replacing x\n with \n would be incorrect, because
221	n/a	# x may be preceded by a backslash
222	n/a
223	n/a	# March over the squashed version of the program, accumulating
224	n/a	# the line numbers of non-continued stmts, and determining
225	n/a	# whether & why the last stmt is a continuation.
226	n/a	continuation = C_NONE
227	n/a	level = lno = 0 # level is nesting level; lno is line number
228	n/a	self.goodlines = goodlines = [0]
229	n/a	push_good = goodlines.append
230	n/a	i, n = 0, len(str)
231	n/a	while i < n:
232	n/a	ch = str[i]
233	n/a	i = i+1
234	n/a
235	n/a	# cases are checked in decreasing order of frequency
236	n/a	if ch == 'x':
237	n/a	continue
238	n/a
239	n/a	if ch == '\n':
240	n/a	lno = lno + 1
241	n/a	if level == 0:
242	n/a	push_good(lno)
243	n/a	# else we're in an unclosed bracket structure
244	n/a	continue
245	n/a
246	n/a	if ch == '(':
247	n/a	level = level + 1
248	n/a	continue
249	n/a
250	n/a	if ch == ')':
251	n/a	if level:
252	n/a	level = level - 1
253	n/a	# else the program is invalid, but we can't complain
254	n/a	continue
255	n/a
256	n/a	if ch == '"' or ch == "'":
257	n/a	# consume the string
258	n/a	quote = ch
259	n/a	if str[i-1:i+2] == quote * 3:
260	n/a	quote = quote * 3
261	n/a	firstlno = lno
262	n/a	w = len(quote) - 1
263	n/a	i = i+w
264	n/a	while i < n:
265	n/a	ch = str[i]
266	n/a	i = i+1
267	n/a
268	n/a	if ch == 'x':
269	n/a	continue
270	n/a
271	n/a	if str[i-1:i+w] == quote:
272	n/a	i = i+w
273	n/a	break
274	n/a
275	n/a	if ch == '\n':
276	n/a	lno = lno + 1
277	n/a	if w == 0:
278	n/a	# unterminated single-quoted string
279	n/a	if level == 0:
280	n/a	push_good(lno)
281	n/a	break
282	n/a	continue
283	n/a
284	n/a	if ch == '\\':
285	n/a	assert i < n
286	n/a	if str[i] == '\n':
287	n/a	lno = lno + 1
288	n/a	i = i+1
289	n/a	continue
290	n/a
291	n/a	# else comment char or paren inside string
292	n/a
293	n/a	else:
294	n/a	# didn't break out of the loop, so we're still
295	n/a	# inside a string
296	n/a	if (lno - 1) == firstlno:
297	n/a	# before the previous \n in str, we were in the first
298	n/a	# line of the string
299	n/a	continuation = C_STRING_FIRST_LINE
300	n/a	else:
301	n/a	continuation = C_STRING_NEXT_LINES
302	n/a	continue # with outer loop
303	n/a
304	n/a	if ch == '#':
305	n/a	# consume the comment
306	n/a	i = str.find('\n', i)
307	n/a	assert i >= 0
308	n/a	continue
309	n/a
310	n/a	assert ch == '\\'
311	n/a	assert i < n
312	n/a	if str[i] == '\n':
313	n/a	lno = lno + 1
314	n/a	if i+1 == n:
315	n/a	continuation = C_BACKSLASH
316	n/a	i = i+1
317	n/a
318	n/a	# The last stmt may be continued for all 3 reasons.
319	n/a	# String continuation takes precedence over bracket
320	n/a	# continuation, which beats backslash continuation.
321	n/a	if (continuation != C_STRING_FIRST_LINE
322	n/a	and continuation != C_STRING_NEXT_LINES and level > 0):
323	n/a	continuation = C_BRACKET
324	n/a	self.continuation = continuation
325	n/a
326	n/a	# Push the final line number as a sentinel value, regardless of
327	n/a	# whether it's continued.
328	n/a	assert (continuation == C_NONE) == (goodlines[-1] == lno)
329	n/a	if goodlines[-1] != lno:
330	n/a	push_good(lno)
331	n/a
332	n/a	def get_continuation_type(self):
333	n/a	self._study1()
334	n/a	return self.continuation
335	n/a
336	n/a	# study1 was sufficient to determine the continuation status,
337	n/a	# but doing more requires looking at every character. study2
338	n/a	# does this for the last interesting statement in the block.
339	n/a	# Creates:
340	n/a	# self.stmt_start, stmt_end
341	n/a	# slice indices of last interesting stmt
342	n/a	# self.stmt_bracketing
343	n/a	# the bracketing structure of the last interesting stmt;
344	n/a	# for example, for the statement "say(boo) or die", stmt_bracketing
345	n/a	# will be [(0, 0), (3, 1), (8, 0)]. Strings and comments are
346	n/a	# treated as brackets, for the matter.
347	n/a	# self.lastch
348	n/a	# last non-whitespace character before optional trailing
349	n/a	# comment
350	n/a	# self.lastopenbracketpos
351	n/a	# if continuation is C_BRACKET, index of last open bracket
352	n/a
353	n/a	def _study2(self):
354	n/a	if self.study_level >= 2:
355	n/a	return
356	n/a	self._study1()
357	n/a	self.study_level = 2
358	n/a
359	n/a	# Set p and q to slice indices of last interesting stmt.
360	n/a	str, goodlines = self.str, self.goodlines
361	n/a	i = len(goodlines) - 1
362	n/a	p = len(str) # index of newest line
363	n/a	while i:
364	n/a	assert p
365	n/a	# p is the index of the stmt at line number goodlines[i].
366	n/a	# Move p back to the stmt at line number goodlines[i-1].
367	n/a	q = p
368	n/a	for nothing in range(goodlines[i-1], goodlines[i]):
369	n/a	# tricky: sets p to 0 if no preceding newline
370	n/a	p = str.rfind('\n', 0, p-1) + 1
371	n/a	# The stmt str[p:q] isn't a continuation, but may be blank
372	n/a	# or a non-indenting comment line.
373	n/a	if _junkre(str, p):
374	n/a	i = i-1
375	n/a	else:
376	n/a	break
377	n/a	if i == 0:
378	n/a	# nothing but junk!
379	n/a	assert p == 0
380	n/a	q = p
381	n/a	self.stmt_start, self.stmt_end = p, q
382	n/a
383	n/a	# Analyze this stmt, to find the last open bracket (if any)
384	n/a	# and last interesting character (if any).
385	n/a	lastch = ""
386	n/a	stack = [] # stack of open bracket indices
387	n/a	push_stack = stack.append
388	n/a	bracketing = [(p, 0)]
389	n/a	while p < q:
390	n/a	# suck up all except ()[]{}'"#\\
391	n/a	m = _chew_ordinaryre(str, p, q)
392	n/a	if m:
393	n/a	# we skipped at least one boring char
394	n/a	newp = m.end()
395	n/a	# back up over totally boring whitespace
396	n/a	i = newp - 1 # index of last boring char
397	n/a	while i >= p and str[i] in " \t\n":
398	n/a	i = i-1
399	n/a	if i >= p:
400	n/a	lastch = str[i]
401	n/a	p = newp
402	n/a	if p >= q:
403	n/a	break
404	n/a
405	n/a	ch = str[p]
406	n/a
407	n/a	if ch in "([{":
408	n/a	push_stack(p)
409	n/a	bracketing.append((p, len(stack)))
410	n/a	lastch = ch
411	n/a	p = p+1
412	n/a	continue
413	n/a
414	n/a	if ch in ")]}":
415	n/a	if stack:
416	n/a	del stack[-1]
417	n/a	lastch = ch
418	n/a	p = p+1
419	n/a	bracketing.append((p, len(stack)))
420	n/a	continue
421	n/a
422	n/a	if ch == '"' or ch == "'":
423	n/a	# consume string
424	n/a	# Note that study1 did this with a Python loop, but
425	n/a	# we use a regexp here; the reason is speed in both
426	n/a	# cases; the string may be huge, but study1 pre-squashed
427	n/a	# strings to a couple of characters per line. study1
428	n/a	# also needed to keep track of newlines, and we don't
429	n/a	# have to.
430	n/a	bracketing.append((p, len(stack)+1))
431	n/a	lastch = ch
432	n/a	p = _match_stringre(str, p, q).end()
433	n/a	bracketing.append((p, len(stack)))
434	n/a	continue
435	n/a
436	n/a	if ch == '#':
437	n/a	# consume comment and trailing newline
438	n/a	bracketing.append((p, len(stack)+1))
439	n/a	p = str.find('\n', p, q) + 1
440	n/a	assert p > 0
441	n/a	bracketing.append((p, len(stack)))
442	n/a	continue
443	n/a
444	n/a	assert ch == '\\'
445	n/a	p = p+1 # beyond backslash
446	n/a	assert p < q
447	n/a	if str[p] != '\n':
448	n/a	# the program is invalid, but can't complain
449	n/a	lastch = ch + str[p]
450	n/a	p = p+1 # beyond escaped char
451	n/a
452	n/a	# end while p < q:
453	n/a
454	n/a	self.lastch = lastch
455	n/a	if stack:
456	n/a	self.lastopenbracketpos = stack[-1]
457	n/a	self.stmt_bracketing = tuple(bracketing)
458	n/a
459	n/a	# Assuming continuation is C_BRACKET, return the number
460	n/a	# of spaces the next line should be indented.
461	n/a
462	n/a	def compute_bracket_indent(self):
463	n/a	self._study2()
464	n/a	assert self.continuation == C_BRACKET
465	n/a	j = self.lastopenbracketpos
466	n/a	str = self.str
467	n/a	n = len(str)
468	n/a	origi = i = str.rfind('\n', 0, j) + 1
469	n/a	j = j+1 # one beyond open bracket
470	n/a	# find first list item; set i to start of its line
471	n/a	while j < n:
472	n/a	m = _itemre(str, j)
473	n/a	if m:
474	n/a	j = m.end() - 1 # index of first interesting char
475	n/a	extra = 0
476	n/a	break
477	n/a	else:
478	n/a	# this line is junk; advance to next line
479	n/a	i = j = str.find('\n', j) + 1
480	n/a	else:
481	n/a	# nothing interesting follows the bracket;
482	n/a	# reproduce the bracket line's indentation + a level
483	n/a	j = i = origi
484	n/a	while str[j] in " \t":
485	n/a	j = j+1
486	n/a	extra = self.indentwidth
487	n/a	return len(str[i:j].expandtabs(self.tabwidth)) + extra
488	n/a
489	n/a	# Return number of physical lines in last stmt (whether or not
490	n/a	# it's an interesting stmt! this is intended to be called when
491	n/a	# continuation is C_BACKSLASH).
492	n/a
493	n/a	def get_num_lines_in_stmt(self):
494	n/a	self._study1()
495	n/a	goodlines = self.goodlines
496	n/a	return goodlines[-1] - goodlines[-2]
497	n/a
498	n/a	# Assuming continuation is C_BACKSLASH, return the number of spaces
499	n/a	# the next line should be indented. Also assuming the new line is
500	n/a	# the first one following the initial line of the stmt.
501	n/a
502	n/a	def compute_backslash_indent(self):
503	n/a	self._study2()
504	n/a	assert self.continuation == C_BACKSLASH
505	n/a	str = self.str
506	n/a	i = self.stmt_start
507	n/a	while str[i] in " \t":
508	n/a	i = i+1
509	n/a	startpos = i
510	n/a
511	n/a	# See whether the initial line starts an assignment stmt; i.e.,
512	n/a	# look for an = operator
513	n/a	endpos = str.find('\n', startpos) + 1
514	n/a	found = level = 0
515	n/a	while i < endpos:
516	n/a	ch = str[i]
517	n/a	if ch in "([{":
518	n/a	level = level + 1
519	n/a	i = i+1
520	n/a	elif ch in ")]}":
521	n/a	if level:
522	n/a	level = level - 1
523	n/a	i = i+1
524	n/a	elif ch == '"' or ch == "'":
525	n/a	i = _match_stringre(str, i, endpos).end()
526	n/a	elif ch == '#':
527	n/a	break
528	n/a	elif level == 0 and ch == '=' and \
529	n/a	(i == 0 or str[i-1] not in "=<>!") and \
530	n/a	str[i+1] != '=':
531	n/a	found = 1
532	n/a	break
533	n/a	else:
534	n/a	i = i+1
535	n/a
536	n/a	if found:
537	n/a	# found a legit =, but it may be the last interesting
538	n/a	# thing on the line
539	n/a	i = i+1 # move beyond the =
540	n/a	found = re.match(r"\s*\\", str[i:endpos]) is None
541	n/a
542	n/a	if not found:
543	n/a	# oh well ... settle for moving beyond the first chunk
544	n/a	# of non-whitespace chars
545	n/a	i = startpos
546	n/a	while str[i] not in " \t\n":
547	n/a	i = i+1
548	n/a
549	n/a	return len(str[self.stmt_start:i].expandtabs(\
550	n/a	self.tabwidth)) + 1
551	n/a
552	n/a	# Return the leading whitespace on the initial line of the last
553	n/a	# interesting stmt.
554	n/a
555	n/a	def get_base_indent_string(self):
556	n/a	self._study2()
557	n/a	i, n = self.stmt_start, self.stmt_end
558	n/a	j = i
559	n/a	str = self.str
560	n/a	while j < n and str[j] in " \t":
561	n/a	j = j + 1
562	n/a	return str[i:j]
563	n/a
564	n/a	# Did the last interesting stmt open a block?
565	n/a
566	n/a	def is_block_opener(self):
567	n/a	self._study2()
568	n/a	return self.lastch == ':'
569	n/a
570	n/a	# Did the last interesting stmt close a block?
571	n/a
572	n/a	def is_block_closer(self):
573	n/a	self._study2()
574	n/a	return _closere(self.str, self.stmt_start) is not None
575	n/a
576	n/a	# index of last open bracket ({[, or None if none
577	n/a	lastopenbracketpos = None
578	n/a
579	n/a	def get_last_open_bracket_pos(self):
580	n/a	self._study2()
581	n/a	return self.lastopenbracketpos
582	n/a
583	n/a	# the structure of the bracketing of the last interesting statement,
584	n/a	# in the format defined in _study2, or None if the text didn't contain
585	n/a	# anything
586	n/a	stmt_bracketing = None
587	n/a
588	n/a	def get_last_stmt_bracketing(self):
589	n/a	self._study2()
590	n/a	return self.stmt_bracketing