Python code coverage for Lib/lib2to3/pgen2/grammar.py

#	count	content
1	n/a	# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2	n/a	# Licensed to PSF under a Contributor Agreement.
3	n/a
4	n/a	"""This module defines the data structures used to represent a grammar.
5	n/a
6	n/a	These are a bit arcane because they are derived from the data
7	n/a	structures used by Python's 'pgen' parser generator.
8	n/a
9	n/a	There's also a table here mapping operators to their names in the
10	n/a	token module; the Python tokenize module reports all operators as the
11	n/a	fallback token code OP, but the parser needs the actual token code.
12	n/a
13	n/a	"""
14	n/a
15	n/a	# Python imports
16	n/a	import collections
17	n/a	import pickle
18	n/a
19	n/a	# Local imports
20	n/a	from . import token, tokenize
21	n/a
22	n/a
23	n/a	class Grammar(object):
24	n/a	"""Pgen parsing tables conversion class.
25	n/a
26	n/a	Once initialized, this class supplies the grammar tables for the
27	n/a	parsing engine implemented by parse.py. The parsing engine
28	n/a	accesses the instance variables directly. The class here does not
29	n/a	provide initialization of the tables; several subclasses exist to
30	n/a	do this (see the conv and pgen modules).
31	n/a
32	n/a	The load() method reads the tables from a pickle file, which is
33	n/a	much faster than the other ways offered by subclasses. The pickle
34	n/a	file is written by calling dump() (after loading the grammar
35	n/a	tables using a subclass). The report() method prints a readable
36	n/a	representation of the tables to stdout, for debugging.
37	n/a
38	n/a	The instance variables are as follows:
39	n/a
40	n/a	symbol2number -- a dict mapping symbol names to numbers. Symbol
41	n/a	numbers are always 256 or higher, to distinguish
42	n/a	them from token numbers, which are between 0 and
43	n/a	255 (inclusive).
44	n/a
45	n/a	number2symbol -- a dict mapping numbers to symbol names;
46	n/a	these two are each other's inverse.
47	n/a
48	n/a	states -- a list of DFAs, where each DFA is a list of
49	n/a	states, each state is a list of arcs, and each
50	n/a	arc is a (i, j) pair where i is a label and j is
51	n/a	a state number. The DFA number is the index into
52	n/a	this list. (This name is slightly confusing.)
53	n/a	Final states are represented by a special arc of
54	n/a	the form (0, j) where j is its own state number.
55	n/a
56	n/a	dfas -- a dict mapping symbol numbers to (DFA, first)
57	n/a	pairs, where DFA is an item from the states list
58	n/a	above, and first is a set of tokens that can
59	n/a	begin this grammar rule (represented by a dict
60	n/a	whose values are always 1).
61	n/a
62	n/a	labels -- a list of (x, y) pairs where x is either a token
63	n/a	number or a symbol number, and y is either None
64	n/a	or a string; the strings are keywords. The label
65	n/a	number is the index in this list; label numbers
66	n/a	are used to mark state transitions (arcs) in the
67	n/a	DFAs.
68	n/a
69	n/a	start -- the number of the grammar's start symbol.
70	n/a
71	n/a	keywords -- a dict mapping keyword strings to arc labels.
72	n/a
73	n/a	tokens -- a dict mapping token numbers to arc labels.
74	n/a
75	n/a	"""
76	n/a
77	n/a	def __init__(self):
78	n/a	self.symbol2number = {}
79	n/a	self.number2symbol = {}
80	n/a	self.states = []
81	n/a	self.dfas = {}
82	n/a	self.labels = [(0, "EMPTY")]
83	n/a	self.keywords = {}
84	n/a	self.tokens = {}
85	n/a	self.symbol2label = {}
86	n/a	self.start = 256
87	n/a
88	n/a	def dump(self, filename):
89	n/a	"""Dump the grammar tables to a pickle file.
90	n/a
91	n/a	dump() recursively changes all dict to OrderedDict, so the pickled file
92	n/a	is not exactly the same as what was passed in to dump(). load() uses the
93	n/a	pickled file to create the tables, but only changes OrderedDict to dict
94	n/a	at the top level; it does not recursively change OrderedDict to dict.
95	n/a	So, the loaded tables are different from the original tables that were
96	n/a	passed to load() in that some of the OrderedDict (from the pickled file)
97	n/a	are not changed back to dict. For parsing, this has no effect on
98	n/a	performance because OrderedDict uses dict's __getitem__ with nothing in
99	n/a	between.
100	n/a	"""
101	n/a	with open(filename, "wb") as f:
102	n/a	d = _make_deterministic(self.__dict__)
103	n/a	pickle.dump(d, f, 2)
104	n/a
105	n/a	def load(self, filename):
106	n/a	"""Load the grammar tables from a pickle file."""
107	n/a	with open(filename, "rb") as f:
108	n/a	d = pickle.load(f)
109	n/a	self.__dict__.update(d)
110	n/a
111	n/a	def copy(self):
112	n/a	"""
113	n/a	Copy the grammar.
114	n/a	"""
115	n/a	new = self.__class__()
116	n/a	for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
117	n/a	"tokens", "symbol2label"):
118	n/a	setattr(new, dict_attr, getattr(self, dict_attr).copy())
119	n/a	new.labels = self.labels[:]
120	n/a	new.states = self.states[:]
121	n/a	new.start = self.start
122	n/a	return new
123	n/a
124	n/a	def report(self):
125	n/a	"""Dump the grammar tables to standard output, for debugging."""
126	n/a	from pprint import pprint
127	n/a	print("s2n")
128	n/a	pprint(self.symbol2number)
129	n/a	print("n2s")
130	n/a	pprint(self.number2symbol)
131	n/a	print("states")
132	n/a	pprint(self.states)
133	n/a	print("dfas")
134	n/a	pprint(self.dfas)
135	n/a	print("labels")
136	n/a	pprint(self.labels)
137	n/a	print("start", self.start)
138	n/a
139	n/a
140	n/a	def _make_deterministic(top):
141	n/a	if isinstance(top, dict):
142	n/a	return collections.OrderedDict(
143	n/a	sorted(((k, _make_deterministic(v)) for k, v in top.items())))
144	n/a	if isinstance(top, list):
145	n/a	return [_make_deterministic(e) for e in top]
146	n/a	if isinstance(top, tuple):
147	n/a	return tuple(_make_deterministic(e) for e in top)
148	n/a	return top
149	n/a
150	n/a
151	n/a	# Map from operator to number (since tokenize doesn't do this)
152	n/a
153	n/a	opmap_raw = """
154	n/a	( LPAR
155	n/a	) RPAR
156	n/a	[ LSQB
157	n/a	] RSQB
158	n/a	: COLON
159	n/a	, COMMA
160	n/a	; SEMI
161	n/a	+ PLUS
162	n/a	- MINUS
163	n/a	* STAR
164	n/a	/ SLASH
165	n/a	\| VBAR
166	n/a	& AMPER
167	n/a	< LESS
168	n/a	> GREATER
169	n/a	= EQUAL
170	n/a	. DOT
171	n/a	% PERCENT
172	n/a	` BACKQUOTE
173	n/a	{ LBRACE
174	n/a	} RBRACE
175	n/a	@ AT
176	n/a	@= ATEQUAL
177	n/a	== EQEQUAL
178	n/a	!= NOTEQUAL
179	n/a	<> NOTEQUAL
180	n/a	<= LESSEQUAL
181	n/a	>= GREATEREQUAL
182	n/a	~ TILDE
183	n/a	^ CIRCUMFLEX
184	n/a	<< LEFTSHIFT
185	n/a	>> RIGHTSHIFT
186	n/a	** DOUBLESTAR
187	n/a	+= PLUSEQUAL
188	n/a	-= MINEQUAL
189	n/a	*= STAREQUAL
190	n/a	/= SLASHEQUAL
191	n/a	%= PERCENTEQUAL
192	n/a	&= AMPEREQUAL
193	n/a	\|= VBAREQUAL
194	n/a	^= CIRCUMFLEXEQUAL
195	n/a	<<= LEFTSHIFTEQUAL
196	n/a	>>= RIGHTSHIFTEQUAL
197	n/a	**= DOUBLESTAREQUAL
198	n/a	// DOUBLESLASH
199	n/a	//= DOUBLESLASHEQUAL
200	n/a	-> RARROW
201	n/a	"""
202	n/a
203	n/a	opmap = {}
204	n/a	for line in opmap_raw.splitlines():
205	n/a	if line:
206	n/a	op, name = line.split()
207	n/a	opmap[op] = getattr(token, name)