ยปCore Development>Code coverage>Lib/lib2to3/pgen2/parse.py

Python code coverage for Lib/lib2to3/pgen2/parse.py

#countcontent
1n/a# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2n/a# Licensed to PSF under a Contributor Agreement.
3n/a
4n/a"""Parser engine for the grammar tables generated by pgen.
5n/a
6n/aThe grammar table must be loaded first.
7n/a
8n/aSee Parser/parser.c in the Python distribution for additional info on
9n/ahow this parsing engine works.
10n/a
11n/a"""
12n/a
13n/a# Local imports
14n/afrom . import token
15n/a
16n/aclass ParseError(Exception):
17n/a """Exception to signal the parser is stuck."""
18n/a
19n/a def __init__(self, msg, type, value, context):
20n/a Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
21n/a (msg, type, value, context))
22n/a self.msg = msg
23n/a self.type = type
24n/a self.value = value
25n/a self.context = context
26n/a
27n/aclass Parser(object):
28n/a """Parser engine.
29n/a
30n/a The proper usage sequence is:
31n/a
32n/a p = Parser(grammar, [converter]) # create instance
33n/a p.setup([start]) # prepare for parsing
34n/a <for each input token>:
35n/a if p.addtoken(...): # parse a token; may raise ParseError
36n/a break
37n/a root = p.rootnode # root of abstract syntax tree
38n/a
39n/a A Parser instance may be reused by calling setup() repeatedly.
40n/a
41n/a A Parser instance contains state pertaining to the current token
42n/a sequence, and should not be used concurrently by different threads
43n/a to parse separate token sequences.
44n/a
45n/a See driver.py for how to get input tokens by tokenizing a file or
46n/a string.
47n/a
48n/a Parsing is complete when addtoken() returns True; the root of the
49n/a abstract syntax tree can then be retrieved from the rootnode
50n/a instance variable. When a syntax error occurs, addtoken() raises
51n/a the ParseError exception. There is no error recovery; the parser
52n/a cannot be used after a syntax error was reported (but it can be
53n/a reinitialized by calling setup()).
54n/a
55n/a """
56n/a
57n/a def __init__(self, grammar, convert=None):
58n/a """Constructor.
59n/a
60n/a The grammar argument is a grammar.Grammar instance; see the
61n/a grammar module for more information.
62n/a
63n/a The parser is not ready yet for parsing; you must call the
64n/a setup() method to get it started.
65n/a
66n/a The optional convert argument is a function mapping concrete
67n/a syntax tree nodes to abstract syntax tree nodes. If not
68n/a given, no conversion is done and the syntax tree produced is
69n/a the concrete syntax tree. If given, it must be a function of
70n/a two arguments, the first being the grammar (a grammar.Grammar
71n/a instance), and the second being the concrete syntax tree node
72n/a to be converted. The syntax tree is converted from the bottom
73n/a up.
74n/a
75n/a A concrete syntax tree node is a (type, value, context, nodes)
76n/a tuple, where type is the node type (a token or symbol number),
77n/a value is None for symbols and a string for tokens, context is
78n/a None or an opaque value used for error reporting (typically a
79n/a (lineno, offset) pair), and nodes is a list of children for
80n/a symbols, and None for tokens.
81n/a
82n/a An abstract syntax tree node may be anything; this is entirely
83n/a up to the converter function.
84n/a
85n/a """
86n/a self.grammar = grammar
87n/a self.convert = convert or (lambda grammar, node: node)
88n/a
89n/a def setup(self, start=None):
90n/a """Prepare for parsing.
91n/a
92n/a This *must* be called before starting to parse.
93n/a
94n/a The optional argument is an alternative start symbol; it
95n/a defaults to the grammar's start symbol.
96n/a
97n/a You can use a Parser instance to parse any number of programs;
98n/a each time you call setup() the parser is reset to an initial
99n/a state determined by the (implicit or explicit) start symbol.
100n/a
101n/a """
102n/a if start is None:
103n/a start = self.grammar.start
104n/a # Each stack entry is a tuple: (dfa, state, node).
105n/a # A node is a tuple: (type, value, context, children),
106n/a # where children is a list of nodes or None, and context may be None.
107n/a newnode = (start, None, None, [])
108n/a stackentry = (self.grammar.dfas[start], 0, newnode)
109n/a self.stack = [stackentry]
110n/a self.rootnode = None
111n/a self.used_names = set() # Aliased to self.rootnode.used_names in pop()
112n/a
113n/a def addtoken(self, type, value, context):
114n/a """Add a token; return True iff this is the end of the program."""
115n/a # Map from token to label
116n/a ilabel = self.classify(type, value, context)
117n/a # Loop until the token is shifted; may raise exceptions
118n/a while True:
119n/a dfa, state, node = self.stack[-1]
120n/a states, first = dfa
121n/a arcs = states[state]
122n/a # Look for a state with this label
123n/a for i, newstate in arcs:
124n/a t, v = self.grammar.labels[i]
125n/a if ilabel == i:
126n/a # Look it up in the list of labels
127n/a assert t < 256
128n/a # Shift a token; we're done with it
129n/a self.shift(type, value, newstate, context)
130n/a # Pop while we are in an accept-only state
131n/a state = newstate
132n/a while states[state] == [(0, state)]:
133n/a self.pop()
134n/a if not self.stack:
135n/a # Done parsing!
136n/a return True
137n/a dfa, state, node = self.stack[-1]
138n/a states, first = dfa
139n/a # Done with this token
140n/a return False
141n/a elif t >= 256:
142n/a # See if it's a symbol and if we're in its first set
143n/a itsdfa = self.grammar.dfas[t]
144n/a itsstates, itsfirst = itsdfa
145n/a if ilabel in itsfirst:
146n/a # Push a symbol
147n/a self.push(t, self.grammar.dfas[t], newstate, context)
148n/a break # To continue the outer while loop
149n/a else:
150n/a if (0, state) in arcs:
151n/a # An accepting state, pop it and try something else
152n/a self.pop()
153n/a if not self.stack:
154n/a # Done parsing, but another token is input
155n/a raise ParseError("too much input",
156n/a type, value, context)
157n/a else:
158n/a # No success finding a transition
159n/a raise ParseError("bad input", type, value, context)
160n/a
161n/a def classify(self, type, value, context):
162n/a """Turn a token into a label. (Internal)"""
163n/a if type == token.NAME:
164n/a # Keep a listing of all used names
165n/a self.used_names.add(value)
166n/a # Check for reserved words
167n/a ilabel = self.grammar.keywords.get(value)
168n/a if ilabel is not None:
169n/a return ilabel
170n/a ilabel = self.grammar.tokens.get(type)
171n/a if ilabel is None:
172n/a raise ParseError("bad token", type, value, context)
173n/a return ilabel
174n/a
175n/a def shift(self, type, value, newstate, context):
176n/a """Shift a token. (Internal)"""
177n/a dfa, state, node = self.stack[-1]
178n/a newnode = (type, value, context, None)
179n/a newnode = self.convert(self.grammar, newnode)
180n/a if newnode is not None:
181n/a node[-1].append(newnode)
182n/a self.stack[-1] = (dfa, newstate, node)
183n/a
184n/a def push(self, type, newdfa, newstate, context):
185n/a """Push a nonterminal. (Internal)"""
186n/a dfa, state, node = self.stack[-1]
187n/a newnode = (type, None, context, [])
188n/a self.stack[-1] = (dfa, newstate, node)
189n/a self.stack.append((newdfa, 0, newnode))
190n/a
191n/a def pop(self):
192n/a """Pop a nonterminal. (Internal)"""
193n/a popdfa, popstate, popnode = self.stack.pop()
194n/a newnode = self.convert(self.grammar, popnode)
195n/a if newnode is not None:
196n/a if self.stack:
197n/a dfa, state, node = self.stack[-1]
198n/a node[-1].append(newnode)
199n/a else:
200n/a self.rootnode = newnode
201n/a self.rootnode.used_names = self.used_names