Python code coverage for Lib/idlelib/hyperparser.py

#	count	content
1	n/a	"""Provide advanced parsing abilities for ParenMatch and other extensions.
2	n/a
3	n/a	HyperParser uses PyParser. PyParser mostly gives information on the
4	n/a	proper indentation of code. HyperParser gives additional information on
5	n/a	the structure of code.
6	n/a	"""
7	n/a	from keyword import iskeyword
8	n/a	import string
9	n/a
10	n/a	from idlelib import pyparse
11	n/a
12	n/a	# all ASCII chars that may be in an identifier
13	n/a	_ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_")
14	n/a	# all ASCII chars that may be the first char of an identifier
15	n/a	_ASCII_ID_FIRST_CHARS = frozenset(string.ascii_letters + "_")
16	n/a
17	n/a	# lookup table for whether 7-bit ASCII chars are valid in a Python identifier
18	n/a	_IS_ASCII_ID_CHAR = [(chr(x) in _ASCII_ID_CHARS) for x in range(128)]
19	n/a	# lookup table for whether 7-bit ASCII chars are valid as the first
20	n/a	# char in a Python identifier
21	n/a	_IS_ASCII_ID_FIRST_CHAR = \
22	n/a	[(chr(x) in _ASCII_ID_FIRST_CHARS) for x in range(128)]
23	n/a
24	n/a
25	n/a	class HyperParser:
26	n/a	def __init__(self, editwin, index):
27	n/a	"To initialize, analyze the surroundings of the given index."
28	n/a
29	n/a	self.editwin = editwin
30	n/a	self.text = text = editwin.text
31	n/a
32	n/a	parser = pyparse.Parser(editwin.indentwidth, editwin.tabwidth)
33	n/a
34	n/a	def index2line(index):
35	n/a	return int(float(index))
36	n/a	lno = index2line(text.index(index))
37	n/a
38	n/a	if not editwin.context_use_ps1:
39	n/a	for context in editwin.num_context_lines:
40	n/a	startat = max(lno - context, 1)
41	n/a	startatindex = repr(startat) + ".0"
42	n/a	stopatindex = "%d.end" % lno
43	n/a	# We add the newline because PyParse requires a newline
44	n/a	# at end. We add a space so that index won't be at end
45	n/a	# of line, so that its status will be the same as the
46	n/a	# char before it, if should.
47	n/a	parser.set_str(text.get(startatindex, stopatindex)+' \n')
48	n/a	bod = parser.find_good_parse_start(
49	n/a	editwin._build_char_in_string_func(startatindex))
50	n/a	if bod is not None or startat == 1:
51	n/a	break
52	n/a	parser.set_lo(bod or 0)
53	n/a	else:
54	n/a	r = text.tag_prevrange("console", index)
55	n/a	if r:
56	n/a	startatindex = r[1]
57	n/a	else:
58	n/a	startatindex = "1.0"
59	n/a	stopatindex = "%d.end" % lno
60	n/a	# We add the newline because PyParse requires it. We add a
61	n/a	# space so that index won't be at end of line, so that its
62	n/a	# status will be the same as the char before it, if should.
63	n/a	parser.set_str(text.get(startatindex, stopatindex)+' \n')
64	n/a	parser.set_lo(0)
65	n/a
66	n/a	# We want what the parser has, minus the last newline and space.
67	n/a	self.rawtext = parser.str[:-2]
68	n/a	# Parser.str apparently preserves the statement we are in, so
69	n/a	# that stopatindex can be used to synchronize the string with
70	n/a	# the text box indices.
71	n/a	self.stopatindex = stopatindex
72	n/a	self.bracketing = parser.get_last_stmt_bracketing()
73	n/a	# find which pairs of bracketing are openers. These always
74	n/a	# correspond to a character of rawtext.
75	n/a	self.isopener = [i>0 and self.bracketing[i][1] >
76	n/a	self.bracketing[i-1][1]
77	n/a	for i in range(len(self.bracketing))]
78	n/a
79	n/a	self.set_index(index)
80	n/a
81	n/a	def set_index(self, index):
82	n/a	"""Set the index to which the functions relate.
83	n/a
84	n/a	The index must be in the same statement.
85	n/a	"""
86	n/a	indexinrawtext = (len(self.rawtext) -
87	n/a	len(self.text.get(index, self.stopatindex)))
88	n/a	if indexinrawtext < 0:
89	n/a	raise ValueError("Index %s precedes the analyzed statement"
90	n/a	% index)
91	n/a	self.indexinrawtext = indexinrawtext
92	n/a	# find the rightmost bracket to which index belongs
93	n/a	self.indexbracket = 0
94	n/a	while (self.indexbracket < len(self.bracketing)-1 and
95	n/a	self.bracketing[self.indexbracket+1][0] < self.indexinrawtext):
96	n/a	self.indexbracket += 1
97	n/a	if (self.indexbracket < len(self.bracketing)-1 and
98	n/a	self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and
99	n/a	not self.isopener[self.indexbracket+1]):
100	n/a	self.indexbracket += 1
101	n/a
102	n/a	def is_in_string(self):
103	n/a	"""Is the index given to the HyperParser in a string?"""
104	n/a	# The bracket to which we belong should be an opener.
105	n/a	# If it's an opener, it has to have a character.
106	n/a	return (self.isopener[self.indexbracket] and
107	n/a	self.rawtext[self.bracketing[self.indexbracket][0]]
108	n/a	in ('"', "'"))
109	n/a
110	n/a	def is_in_code(self):
111	n/a	"""Is the index given to the HyperParser in normal code?"""
112	n/a	return (not self.isopener[self.indexbracket] or
113	n/a	self.rawtext[self.bracketing[self.indexbracket][0]]
114	n/a	not in ('#', '"', "'"))
115	n/a
116	n/a	def get_surrounding_brackets(self, openers='([{', mustclose=False):
117	n/a	"""Return bracket indexes or None.
118	n/a
119	n/a	If the index given to the HyperParser is surrounded by a
120	n/a	bracket defined in openers (or at least has one before it),
121	n/a	return the indices of the opening bracket and the closing
122	n/a	bracket (or the end of line, whichever comes first).
123	n/a
124	n/a	If it is not surrounded by brackets, or the end of line comes
125	n/a	before the closing bracket and mustclose is True, returns None.
126	n/a	"""
127	n/a
128	n/a	bracketinglevel = self.bracketing[self.indexbracket][1]
129	n/a	before = self.indexbracket
130	n/a	while (not self.isopener[before] or
131	n/a	self.rawtext[self.bracketing[before][0]] not in openers or
132	n/a	self.bracketing[before][1] > bracketinglevel):
133	n/a	before -= 1
134	n/a	if before < 0:
135	n/a	return None
136	n/a	bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
137	n/a	after = self.indexbracket + 1
138	n/a	while (after < len(self.bracketing) and
139	n/a	self.bracketing[after][1] >= bracketinglevel):
140	n/a	after += 1
141	n/a
142	n/a	beforeindex = self.text.index("%s-%dc" %
143	n/a	(self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
144	n/a	if (after >= len(self.bracketing) or
145	n/a	self.bracketing[after][0] > len(self.rawtext)):
146	n/a	if mustclose:
147	n/a	return None
148	n/a	afterindex = self.stopatindex
149	n/a	else:
150	n/a	# We are after a real char, so it is a ')' and we give the
151	n/a	# index before it.
152	n/a	afterindex = self.text.index(
153	n/a	"%s-%dc" % (self.stopatindex,
154	n/a	len(self.rawtext)-(self.bracketing[after][0]-1)))
155	n/a
156	n/a	return beforeindex, afterindex
157	n/a
158	n/a	# the set of built-in identifiers which are also keywords,
159	n/a	# i.e. keyword.iskeyword() returns True for them
160	n/a	_ID_KEYWORDS = frozenset({"True", "False", "None"})
161	n/a
162	n/a	@classmethod
163	n/a	def _eat_identifier(cls, str, limit, pos):
164	n/a	"""Given a string and pos, return the number of chars in the
165	n/a	identifier which ends at pos, or 0 if there is no such one.
166	n/a
167	n/a	This ignores non-identifier eywords are not identifiers.
168	n/a	"""
169	n/a	is_ascii_id_char = _IS_ASCII_ID_CHAR
170	n/a
171	n/a	# Start at the end (pos) and work backwards.
172	n/a	i = pos
173	n/a
174	n/a	# Go backwards as long as the characters are valid ASCII
175	n/a	# identifier characters. This is an optimization, since it
176	n/a	# is faster in the common case where most of the characters
177	n/a	# are ASCII.
178	n/a	while i > limit and (
179	n/a	ord(str[i - 1]) < 128 and
180	n/a	is_ascii_id_char[ord(str[i - 1])]
181	n/a	):
182	n/a	i -= 1
183	n/a
184	n/a	# If the above loop ended due to reaching a non-ASCII
185	n/a	# character, continue going backwards using the most generic
186	n/a	# test for whether a string contains only valid identifier
187	n/a	# characters.
188	n/a	if i > limit and ord(str[i - 1]) >= 128:
189	n/a	while i - 4 >= limit and ('a' + str[i - 4:pos]).isidentifier():
190	n/a	i -= 4
191	n/a	if i - 2 >= limit and ('a' + str[i - 2:pos]).isidentifier():
192	n/a	i -= 2
193	n/a	if i - 1 >= limit and ('a' + str[i - 1:pos]).isidentifier():
194	n/a	i -= 1
195	n/a
196	n/a	# The identifier candidate starts here. If it isn't a valid
197	n/a	# identifier, don't eat anything. At this point that is only
198	n/a	# possible if the first character isn't a valid first
199	n/a	# character for an identifier.
200	n/a	if not str[i:pos].isidentifier():
201	n/a	return 0
202	n/a	elif i < pos:
203	n/a	# All characters in str[i:pos] are valid ASCII identifier
204	n/a	# characters, so it is enough to check that the first is
205	n/a	# valid as the first character of an identifier.
206	n/a	if not _IS_ASCII_ID_FIRST_CHAR[ord(str[i])]:
207	n/a	return 0
208	n/a
209	n/a	# All keywords are valid identifiers, but should not be
210	n/a	# considered identifiers here, except for True, False and None.
211	n/a	if i < pos and (
212	n/a	iskeyword(str[i:pos]) and
213	n/a	str[i:pos] not in cls._ID_KEYWORDS
214	n/a	):
215	n/a	return 0
216	n/a
217	n/a	return pos - i
218	n/a
219	n/a	# This string includes all chars that may be in a white space
220	n/a	_whitespace_chars = " \t\n\\"
221	n/a
222	n/a	def get_expression(self):
223	n/a	"""Return a string with the Python expression which ends at the
224	n/a	given index, which is empty if there is no real one.
225	n/a	"""
226	n/a	if not self.is_in_code():
227	n/a	raise ValueError("get_expression should only be called"
228	n/a	"if index is inside a code.")
229	n/a
230	n/a	rawtext = self.rawtext
231	n/a	bracketing = self.bracketing
232	n/a
233	n/a	brck_index = self.indexbracket
234	n/a	brck_limit = bracketing[brck_index][0]
235	n/a	pos = self.indexinrawtext
236	n/a
237	n/a	last_identifier_pos = pos
238	n/a	postdot_phase = True
239	n/a
240	n/a	while 1:
241	n/a	# Eat whitespaces, comments, and if postdot_phase is False - a dot
242	n/a	while 1:
243	n/a	if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
244	n/a	# Eat a whitespace
245	n/a	pos -= 1
246	n/a	elif (not postdot_phase and
247	n/a	pos > brck_limit and rawtext[pos-1] == '.'):
248	n/a	# Eat a dot
249	n/a	pos -= 1
250	n/a	postdot_phase = True
251	n/a	# The next line will fail if we are inside a comment,
252	n/a	# but we shouldn't be.
253	n/a	elif (pos == brck_limit and brck_index > 0 and
254	n/a	rawtext[bracketing[brck_index-1][0]] == '#'):
255	n/a	# Eat a comment
256	n/a	brck_index -= 2
257	n/a	brck_limit = bracketing[brck_index][0]
258	n/a	pos = bracketing[brck_index+1][0]
259	n/a	else:
260	n/a	# If we didn't eat anything, quit.
261	n/a	break
262	n/a
263	n/a	if not postdot_phase:
264	n/a	# We didn't find a dot, so the expression end at the
265	n/a	# last identifier pos.
266	n/a	break
267	n/a
268	n/a	ret = self._eat_identifier(rawtext, brck_limit, pos)
269	n/a	if ret:
270	n/a	# There is an identifier to eat
271	n/a	pos = pos - ret
272	n/a	last_identifier_pos = pos
273	n/a	# Now, to continue the search, we must find a dot.
274	n/a	postdot_phase = False
275	n/a	# (the loop continues now)
276	n/a
277	n/a	elif pos == brck_limit:
278	n/a	# We are at a bracketing limit. If it is a closing
279	n/a	# bracket, eat the bracket, otherwise, stop the search.
280	n/a	level = bracketing[brck_index][1]
281	n/a	while brck_index > 0 and bracketing[brck_index-1][1] > level:
282	n/a	brck_index -= 1
283	n/a	if bracketing[brck_index][0] == brck_limit:
284	n/a	# We were not at the end of a closing bracket
285	n/a	break
286	n/a	pos = bracketing[brck_index][0]
287	n/a	brck_index -= 1
288	n/a	brck_limit = bracketing[brck_index][0]
289	n/a	last_identifier_pos = pos
290	n/a	if rawtext[pos] in "([":
291	n/a	# [] and () may be used after an identifier, so we
292	n/a	# continue. postdot_phase is True, so we don't allow a dot.
293	n/a	pass
294	n/a	else:
295	n/a	# We can't continue after other types of brackets
296	n/a	if rawtext[pos] in "'\"":
297	n/a	# Scan a string prefix
298	n/a	while pos > 0 and rawtext[pos - 1] in "rRbBuU":
299	n/a	pos -= 1
300	n/a	last_identifier_pos = pos
301	n/a	break
302	n/a
303	n/a	else:
304	n/a	# We've found an operator or something.
305	n/a	break
306	n/a
307	n/a	return rawtext[last_identifier_pos:self.indexinrawtext]
308	n/a
309	n/a
310	n/a	if __name__ == '__main__':
311	n/a	import unittest
312	n/a	unittest.main('idlelib.idle_test.test_hyperparser', verbosity=2)