ยปCore Development>Code coverage>Lib/idlelib/hyperparser.py

Python code coverage for Lib/idlelib/hyperparser.py

#countcontent
1n/a"""Provide advanced parsing abilities for ParenMatch and other extensions.
2n/a
3n/aHyperParser uses PyParser. PyParser mostly gives information on the
4n/aproper indentation of code. HyperParser gives additional information on
5n/athe structure of code.
6n/a"""
7n/afrom keyword import iskeyword
8n/aimport string
9n/a
10n/afrom idlelib import pyparse
11n/a
12n/a# all ASCII chars that may be in an identifier
13n/a_ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_")
14n/a# all ASCII chars that may be the first char of an identifier
15n/a_ASCII_ID_FIRST_CHARS = frozenset(string.ascii_letters + "_")
16n/a
17n/a# lookup table for whether 7-bit ASCII chars are valid in a Python identifier
18n/a_IS_ASCII_ID_CHAR = [(chr(x) in _ASCII_ID_CHARS) for x in range(128)]
19n/a# lookup table for whether 7-bit ASCII chars are valid as the first
20n/a# char in a Python identifier
21n/a_IS_ASCII_ID_FIRST_CHAR = \
22n/a [(chr(x) in _ASCII_ID_FIRST_CHARS) for x in range(128)]
23n/a
24n/a
25n/aclass HyperParser:
26n/a def __init__(self, editwin, index):
27n/a "To initialize, analyze the surroundings of the given index."
28n/a
29n/a self.editwin = editwin
30n/a self.text = text = editwin.text
31n/a
32n/a parser = pyparse.Parser(editwin.indentwidth, editwin.tabwidth)
33n/a
34n/a def index2line(index):
35n/a return int(float(index))
36n/a lno = index2line(text.index(index))
37n/a
38n/a if not editwin.context_use_ps1:
39n/a for context in editwin.num_context_lines:
40n/a startat = max(lno - context, 1)
41n/a startatindex = repr(startat) + ".0"
42n/a stopatindex = "%d.end" % lno
43n/a # We add the newline because PyParse requires a newline
44n/a # at end. We add a space so that index won't be at end
45n/a # of line, so that its status will be the same as the
46n/a # char before it, if should.
47n/a parser.set_str(text.get(startatindex, stopatindex)+' \n')
48n/a bod = parser.find_good_parse_start(
49n/a editwin._build_char_in_string_func(startatindex))
50n/a if bod is not None or startat == 1:
51n/a break
52n/a parser.set_lo(bod or 0)
53n/a else:
54n/a r = text.tag_prevrange("console", index)
55n/a if r:
56n/a startatindex = r[1]
57n/a else:
58n/a startatindex = "1.0"
59n/a stopatindex = "%d.end" % lno
60n/a # We add the newline because PyParse requires it. We add a
61n/a # space so that index won't be at end of line, so that its
62n/a # status will be the same as the char before it, if should.
63n/a parser.set_str(text.get(startatindex, stopatindex)+' \n')
64n/a parser.set_lo(0)
65n/a
66n/a # We want what the parser has, minus the last newline and space.
67n/a self.rawtext = parser.str[:-2]
68n/a # Parser.str apparently preserves the statement we are in, so
69n/a # that stopatindex can be used to synchronize the string with
70n/a # the text box indices.
71n/a self.stopatindex = stopatindex
72n/a self.bracketing = parser.get_last_stmt_bracketing()
73n/a # find which pairs of bracketing are openers. These always
74n/a # correspond to a character of rawtext.
75n/a self.isopener = [i>0 and self.bracketing[i][1] >
76n/a self.bracketing[i-1][1]
77n/a for i in range(len(self.bracketing))]
78n/a
79n/a self.set_index(index)
80n/a
81n/a def set_index(self, index):
82n/a """Set the index to which the functions relate.
83n/a
84n/a The index must be in the same statement.
85n/a """
86n/a indexinrawtext = (len(self.rawtext) -
87n/a len(self.text.get(index, self.stopatindex)))
88n/a if indexinrawtext < 0:
89n/a raise ValueError("Index %s precedes the analyzed statement"
90n/a % index)
91n/a self.indexinrawtext = indexinrawtext
92n/a # find the rightmost bracket to which index belongs
93n/a self.indexbracket = 0
94n/a while (self.indexbracket < len(self.bracketing)-1 and
95n/a self.bracketing[self.indexbracket+1][0] < self.indexinrawtext):
96n/a self.indexbracket += 1
97n/a if (self.indexbracket < len(self.bracketing)-1 and
98n/a self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and
99n/a not self.isopener[self.indexbracket+1]):
100n/a self.indexbracket += 1
101n/a
102n/a def is_in_string(self):
103n/a """Is the index given to the HyperParser in a string?"""
104n/a # The bracket to which we belong should be an opener.
105n/a # If it's an opener, it has to have a character.
106n/a return (self.isopener[self.indexbracket] and
107n/a self.rawtext[self.bracketing[self.indexbracket][0]]
108n/a in ('"', "'"))
109n/a
110n/a def is_in_code(self):
111n/a """Is the index given to the HyperParser in normal code?"""
112n/a return (not self.isopener[self.indexbracket] or
113n/a self.rawtext[self.bracketing[self.indexbracket][0]]
114n/a not in ('#', '"', "'"))
115n/a
116n/a def get_surrounding_brackets(self, openers='([{', mustclose=False):
117n/a """Return bracket indexes or None.
118n/a
119n/a If the index given to the HyperParser is surrounded by a
120n/a bracket defined in openers (or at least has one before it),
121n/a return the indices of the opening bracket and the closing
122n/a bracket (or the end of line, whichever comes first).
123n/a
124n/a If it is not surrounded by brackets, or the end of line comes
125n/a before the closing bracket and mustclose is True, returns None.
126n/a """
127n/a
128n/a bracketinglevel = self.bracketing[self.indexbracket][1]
129n/a before = self.indexbracket
130n/a while (not self.isopener[before] or
131n/a self.rawtext[self.bracketing[before][0]] not in openers or
132n/a self.bracketing[before][1] > bracketinglevel):
133n/a before -= 1
134n/a if before < 0:
135n/a return None
136n/a bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
137n/a after = self.indexbracket + 1
138n/a while (after < len(self.bracketing) and
139n/a self.bracketing[after][1] >= bracketinglevel):
140n/a after += 1
141n/a
142n/a beforeindex = self.text.index("%s-%dc" %
143n/a (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
144n/a if (after >= len(self.bracketing) or
145n/a self.bracketing[after][0] > len(self.rawtext)):
146n/a if mustclose:
147n/a return None
148n/a afterindex = self.stopatindex
149n/a else:
150n/a # We are after a real char, so it is a ')' and we give the
151n/a # index before it.
152n/a afterindex = self.text.index(
153n/a "%s-%dc" % (self.stopatindex,
154n/a len(self.rawtext)-(self.bracketing[after][0]-1)))
155n/a
156n/a return beforeindex, afterindex
157n/a
158n/a # the set of built-in identifiers which are also keywords,
159n/a # i.e. keyword.iskeyword() returns True for them
160n/a _ID_KEYWORDS = frozenset({"True", "False", "None"})
161n/a
162n/a @classmethod
163n/a def _eat_identifier(cls, str, limit, pos):
164n/a """Given a string and pos, return the number of chars in the
165n/a identifier which ends at pos, or 0 if there is no such one.
166n/a
167n/a This ignores non-identifier eywords are not identifiers.
168n/a """
169n/a is_ascii_id_char = _IS_ASCII_ID_CHAR
170n/a
171n/a # Start at the end (pos) and work backwards.
172n/a i = pos
173n/a
174n/a # Go backwards as long as the characters are valid ASCII
175n/a # identifier characters. This is an optimization, since it
176n/a # is faster in the common case where most of the characters
177n/a # are ASCII.
178n/a while i > limit and (
179n/a ord(str[i - 1]) < 128 and
180n/a is_ascii_id_char[ord(str[i - 1])]
181n/a ):
182n/a i -= 1
183n/a
184n/a # If the above loop ended due to reaching a non-ASCII
185n/a # character, continue going backwards using the most generic
186n/a # test for whether a string contains only valid identifier
187n/a # characters.
188n/a if i > limit and ord(str[i - 1]) >= 128:
189n/a while i - 4 >= limit and ('a' + str[i - 4:pos]).isidentifier():
190n/a i -= 4
191n/a if i - 2 >= limit and ('a' + str[i - 2:pos]).isidentifier():
192n/a i -= 2
193n/a if i - 1 >= limit and ('a' + str[i - 1:pos]).isidentifier():
194n/a i -= 1
195n/a
196n/a # The identifier candidate starts here. If it isn't a valid
197n/a # identifier, don't eat anything. At this point that is only
198n/a # possible if the first character isn't a valid first
199n/a # character for an identifier.
200n/a if not str[i:pos].isidentifier():
201n/a return 0
202n/a elif i < pos:
203n/a # All characters in str[i:pos] are valid ASCII identifier
204n/a # characters, so it is enough to check that the first is
205n/a # valid as the first character of an identifier.
206n/a if not _IS_ASCII_ID_FIRST_CHAR[ord(str[i])]:
207n/a return 0
208n/a
209n/a # All keywords are valid identifiers, but should not be
210n/a # considered identifiers here, except for True, False and None.
211n/a if i < pos and (
212n/a iskeyword(str[i:pos]) and
213n/a str[i:pos] not in cls._ID_KEYWORDS
214n/a ):
215n/a return 0
216n/a
217n/a return pos - i
218n/a
219n/a # This string includes all chars that may be in a white space
220n/a _whitespace_chars = " \t\n\\"
221n/a
222n/a def get_expression(self):
223n/a """Return a string with the Python expression which ends at the
224n/a given index, which is empty if there is no real one.
225n/a """
226n/a if not self.is_in_code():
227n/a raise ValueError("get_expression should only be called"
228n/a "if index is inside a code.")
229n/a
230n/a rawtext = self.rawtext
231n/a bracketing = self.bracketing
232n/a
233n/a brck_index = self.indexbracket
234n/a brck_limit = bracketing[brck_index][0]
235n/a pos = self.indexinrawtext
236n/a
237n/a last_identifier_pos = pos
238n/a postdot_phase = True
239n/a
240n/a while 1:
241n/a # Eat whitespaces, comments, and if postdot_phase is False - a dot
242n/a while 1:
243n/a if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
244n/a # Eat a whitespace
245n/a pos -= 1
246n/a elif (not postdot_phase and
247n/a pos > brck_limit and rawtext[pos-1] == '.'):
248n/a # Eat a dot
249n/a pos -= 1
250n/a postdot_phase = True
251n/a # The next line will fail if we are *inside* a comment,
252n/a # but we shouldn't be.
253n/a elif (pos == brck_limit and brck_index > 0 and
254n/a rawtext[bracketing[brck_index-1][0]] == '#'):
255n/a # Eat a comment
256n/a brck_index -= 2
257n/a brck_limit = bracketing[brck_index][0]
258n/a pos = bracketing[brck_index+1][0]
259n/a else:
260n/a # If we didn't eat anything, quit.
261n/a break
262n/a
263n/a if not postdot_phase:
264n/a # We didn't find a dot, so the expression end at the
265n/a # last identifier pos.
266n/a break
267n/a
268n/a ret = self._eat_identifier(rawtext, brck_limit, pos)
269n/a if ret:
270n/a # There is an identifier to eat
271n/a pos = pos - ret
272n/a last_identifier_pos = pos
273n/a # Now, to continue the search, we must find a dot.
274n/a postdot_phase = False
275n/a # (the loop continues now)
276n/a
277n/a elif pos == brck_limit:
278n/a # We are at a bracketing limit. If it is a closing
279n/a # bracket, eat the bracket, otherwise, stop the search.
280n/a level = bracketing[brck_index][1]
281n/a while brck_index > 0 and bracketing[brck_index-1][1] > level:
282n/a brck_index -= 1
283n/a if bracketing[brck_index][0] == brck_limit:
284n/a # We were not at the end of a closing bracket
285n/a break
286n/a pos = bracketing[brck_index][0]
287n/a brck_index -= 1
288n/a brck_limit = bracketing[brck_index][0]
289n/a last_identifier_pos = pos
290n/a if rawtext[pos] in "([":
291n/a # [] and () may be used after an identifier, so we
292n/a # continue. postdot_phase is True, so we don't allow a dot.
293n/a pass
294n/a else:
295n/a # We can't continue after other types of brackets
296n/a if rawtext[pos] in "'\"":
297n/a # Scan a string prefix
298n/a while pos > 0 and rawtext[pos - 1] in "rRbBuU":
299n/a pos -= 1
300n/a last_identifier_pos = pos
301n/a break
302n/a
303n/a else:
304n/a # We've found an operator or something.
305n/a break
306n/a
307n/a return rawtext[last_identifier_pos:self.indexinrawtext]
308n/a
309n/a
310n/aif __name__ == '__main__':
311n/a import unittest
312n/a unittest.main('idlelib.idle_test.test_hyperparser', verbosity=2)