Python code coverage for Lib/lib2to3/pgen2/parse.py

#	count	content
1	n/a	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2	n/a	# Licensed to PSF under a Contributor Agreement.
3	n/a
4	n/a	"""Parser engine for the grammar tables generated by pgen.
5	n/a
6	n/a	The grammar table must be loaded first.
7	n/a
8	n/a	See Parser/parser.c in the Python distribution for additional info on
9	n/a	how this parsing engine works.
10	n/a
11	n/a	"""
12	n/a
13	n/a	# Local imports
14	n/a	from . import token
15	n/a
16	n/a	class ParseError(Exception):
17	n/a	"""Exception to signal the parser is stuck."""
18	n/a
19	n/a	def __init__(self, msg, type, value, context):
20	n/a	Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
21	n/a	(msg, type, value, context))
22	n/a	self.msg = msg
23	n/a	self.type = type
24	n/a	self.value = value
25	n/a	self.context = context
26	n/a
27	n/a	class Parser(object):
28	n/a	"""Parser engine.
29	n/a
30	n/a	The proper usage sequence is:
31	n/a
32	n/a	p = Parser(grammar, [converter]) # create instance
33	n/a	p.setup([start]) # prepare for parsing
34	n/a	<for each input token>:
35	n/a	if p.addtoken(...): # parse a token; may raise ParseError
36	n/a	break
37	n/a	root = p.rootnode # root of abstract syntax tree
38	n/a
39	n/a	A Parser instance may be reused by calling setup() repeatedly.
40	n/a
41	n/a	A Parser instance contains state pertaining to the current token
42	n/a	sequence, and should not be used concurrently by different threads
43	n/a	to parse separate token sequences.
44	n/a
45	n/a	See driver.py for how to get input tokens by tokenizing a file or
46	n/a	string.
47	n/a
48	n/a	Parsing is complete when addtoken() returns True; the root of the
49	n/a	abstract syntax tree can then be retrieved from the rootnode
50	n/a	instance variable. When a syntax error occurs, addtoken() raises
51	n/a	the ParseError exception. There is no error recovery; the parser
52	n/a	cannot be used after a syntax error was reported (but it can be
53	n/a	reinitialized by calling setup()).
54	n/a
55	n/a	"""
56	n/a
57	n/a	def __init__(self, grammar, convert=None):
58	n/a	"""Constructor.
59	n/a
60	n/a	The grammar argument is a grammar.Grammar instance; see the
61	n/a	grammar module for more information.
62	n/a
63	n/a	The parser is not ready yet for parsing; you must call the
64	n/a	setup() method to get it started.
65	n/a
66	n/a	The optional convert argument is a function mapping concrete
67	n/a	syntax tree nodes to abstract syntax tree nodes. If not
68	n/a	given, no conversion is done and the syntax tree produced is
69	n/a	the concrete syntax tree. If given, it must be a function of
70	n/a	two arguments, the first being the grammar (a grammar.Grammar
71	n/a	instance), and the second being the concrete syntax tree node
72	n/a	to be converted. The syntax tree is converted from the bottom
73	n/a	up.
74	n/a
75	n/a	A concrete syntax tree node is a (type, value, context, nodes)
76	n/a	tuple, where type is the node type (a token or symbol number),
77	n/a	value is None for symbols and a string for tokens, context is
78	n/a	None or an opaque value used for error reporting (typically a
79	n/a	(lineno, offset) pair), and nodes is a list of children for
80	n/a	symbols, and None for tokens.
81	n/a
82	n/a	An abstract syntax tree node may be anything; this is entirely
83	n/a	up to the converter function.
84	n/a
85	n/a	"""
86	n/a	self.grammar = grammar
87	n/a	self.convert = convert or (lambda grammar, node: node)
88	n/a
89	n/a	def setup(self, start=None):
90	n/a	"""Prepare for parsing.
91	n/a
92	n/a	This must be called before starting to parse.
93	n/a
94	n/a	The optional argument is an alternative start symbol; it
95	n/a	defaults to the grammar's start symbol.
96	n/a
97	n/a	You can use a Parser instance to parse any number of programs;
98	n/a	each time you call setup() the parser is reset to an initial
99	n/a	state determined by the (implicit or explicit) start symbol.
100	n/a
101	n/a	"""
102	n/a	if start is None:
103	n/a	start = self.grammar.start
104	n/a	# Each stack entry is a tuple: (dfa, state, node).
105	n/a	# A node is a tuple: (type, value, context, children),
106	n/a	# where children is a list of nodes or None, and context may be None.
107	n/a	newnode = (start, None, None, [])
108	n/a	stackentry = (self.grammar.dfas[start], 0, newnode)
109	n/a	self.stack = [stackentry]
110	n/a	self.rootnode = None
111	n/a	self.used_names = set() # Aliased to self.rootnode.used_names in pop()
112	n/a
113	n/a	def addtoken(self, type, value, context):
114	n/a	"""Add a token; return True iff this is the end of the program."""
115	n/a	# Map from token to label
116	n/a	ilabel = self.classify(type, value, context)
117	n/a	# Loop until the token is shifted; may raise exceptions
118	n/a	while True:
119	n/a	dfa, state, node = self.stack[-1]
120	n/a	states, first = dfa
121	n/a	arcs = states[state]
122	n/a	# Look for a state with this label
123	n/a	for i, newstate in arcs:
124	n/a	t, v = self.grammar.labels[i]
125	n/a	if ilabel == i:
126	n/a	# Look it up in the list of labels
127	n/a	assert t < 256
128	n/a	# Shift a token; we're done with it
129	n/a	self.shift(type, value, newstate, context)
130	n/a	# Pop while we are in an accept-only state
131	n/a	state = newstate
132	n/a	while states[state] == [(0, state)]:
133	n/a	self.pop()
134	n/a	if not self.stack:
135	n/a	# Done parsing!
136	n/a	return True
137	n/a	dfa, state, node = self.stack[-1]
138	n/a	states, first = dfa
139	n/a	# Done with this token
140	n/a	return False
141	n/a	elif t >= 256:
142	n/a	# See if it's a symbol and if we're in its first set
143	n/a	itsdfa = self.grammar.dfas[t]
144	n/a	itsstates, itsfirst = itsdfa
145	n/a	if ilabel in itsfirst:
146	n/a	# Push a symbol
147	n/a	self.push(t, self.grammar.dfas[t], newstate, context)
148	n/a	break # To continue the outer while loop
149	n/a	else:
150	n/a	if (0, state) in arcs:
151	n/a	# An accepting state, pop it and try something else
152	n/a	self.pop()
153	n/a	if not self.stack:
154	n/a	# Done parsing, but another token is input
155	n/a	raise ParseError("too much input",
156	n/a	type, value, context)
157	n/a	else:
158	n/a	# No success finding a transition
159	n/a	raise ParseError("bad input", type, value, context)
160	n/a
161	n/a	def classify(self, type, value, context):
162	n/a	"""Turn a token into a label. (Internal)"""
163	n/a	if type == token.NAME:
164	n/a	# Keep a listing of all used names
165	n/a	self.used_names.add(value)
166	n/a	# Check for reserved words
167	n/a	ilabel = self.grammar.keywords.get(value)
168	n/a	if ilabel is not None:
169	n/a	return ilabel
170	n/a	ilabel = self.grammar.tokens.get(type)
171	n/a	if ilabel is None:
172	n/a	raise ParseError("bad token", type, value, context)
173	n/a	return ilabel
174	n/a
175	n/a	def shift(self, type, value, newstate, context):
176	n/a	"""Shift a token. (Internal)"""
177	n/a	dfa, state, node = self.stack[-1]
178	n/a	newnode = (type, value, context, None)
179	n/a	newnode = self.convert(self.grammar, newnode)
180	n/a	if newnode is not None:
181	n/a	node[-1].append(newnode)
182	n/a	self.stack[-1] = (dfa, newstate, node)
183	n/a
184	n/a	def push(self, type, newdfa, newstate, context):
185	n/a	"""Push a nonterminal. (Internal)"""
186	n/a	dfa, state, node = self.stack[-1]
187	n/a	newnode = (type, None, context, [])
188	n/a	self.stack[-1] = (dfa, newstate, node)
189	n/a	self.stack.append((newdfa, 0, newnode))
190	n/a
191	n/a	def pop(self):
192	n/a	"""Pop a nonterminal. (Internal)"""
193	n/a	popdfa, popstate, popnode = self.stack.pop()
194	n/a	newnode = self.convert(self.grammar, popnode)
195	n/a	if newnode is not None:
196	n/a	if self.stack:
197	n/a	dfa, state, node = self.stack[-1]
198	n/a	node[-1].append(newnode)
199	n/a	else:
200	n/a	self.rootnode = newnode
201	n/a	self.rootnode.used_names = self.used_names