ยปCore Development>Code coverage>Lib/lib2to3/pgen2/conv.py

Python code coverage for Lib/lib2to3/pgen2/conv.py

#countcontent
1n/a# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2n/a# Licensed to PSF under a Contributor Agreement.
3n/a
4n/a"""Convert graminit.[ch] spit out by pgen to Python code.
5n/a
6n/aPgen is the Python parser generator. It is useful to quickly create a
7n/aparser from a grammar file in Python's grammar notation. But I don't
8n/awant my parsers to be written in C (yet), so I'm translating the
9n/aparsing tables to Python data structures and writing a Python parse
10n/aengine.
11n/a
12n/aNote that the token numbers are constants determined by the standard
13n/aPython tokenizer. The standard token module defines these numbers and
14n/atheir names (the names are not used much). The token numbers are
15n/ahardcoded into the Python tokenizer and into pgen. A Python
16n/aimplementation of the Python tokenizer is also available, in the
17n/astandard tokenize module.
18n/a
19n/aOn the other hand, symbol numbers (representing the grammar's
20n/anon-terminals) are assigned by pgen based on the actual grammar
21n/ainput.
22n/a
23n/aNote: this module is pretty much obsolete; the pgen module generates
24n/aequivalent grammar tables directly from the Grammar.txt input file
25n/awithout having to invoke the Python pgen C program.
26n/a
27n/a"""
28n/a
29n/a# Python imports
30n/aimport re
31n/a
32n/a# Local imports
33n/afrom pgen2 import grammar, token
34n/a
35n/a
36n/aclass Converter(grammar.Grammar):
37n/a """Grammar subclass that reads classic pgen output files.
38n/a
39n/a The run() method reads the tables as produced by the pgen parser
40n/a generator, typically contained in two C files, graminit.h and
41n/a graminit.c. The other methods are for internal use only.
42n/a
43n/a See the base class for more documentation.
44n/a
45n/a """
46n/a
47n/a def run(self, graminit_h, graminit_c):
48n/a """Load the grammar tables from the text files written by pgen."""
49n/a self.parse_graminit_h(graminit_h)
50n/a self.parse_graminit_c(graminit_c)
51n/a self.finish_off()
52n/a
53n/a def parse_graminit_h(self, filename):
54n/a """Parse the .h file written by pgen. (Internal)
55n/a
56n/a This file is a sequence of #define statements defining the
57n/a nonterminals of the grammar as numbers. We build two tables
58n/a mapping the numbers to names and back.
59n/a
60n/a """
61n/a try:
62n/a f = open(filename)
63n/a except OSError as err:
64n/a print("Can't open %s: %s" % (filename, err))
65n/a return False
66n/a self.symbol2number = {}
67n/a self.number2symbol = {}
68n/a lineno = 0
69n/a for line in f:
70n/a lineno += 1
71n/a mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
72n/a if not mo and line.strip():
73n/a print("%s(%s): can't parse %s" % (filename, lineno,
74n/a line.strip()))
75n/a else:
76n/a symbol, number = mo.groups()
77n/a number = int(number)
78n/a assert symbol not in self.symbol2number
79n/a assert number not in self.number2symbol
80n/a self.symbol2number[symbol] = number
81n/a self.number2symbol[number] = symbol
82n/a return True
83n/a
84n/a def parse_graminit_c(self, filename):
85n/a """Parse the .c file written by pgen. (Internal)
86n/a
87n/a The file looks as follows. The first two lines are always this:
88n/a
89n/a #include "pgenheaders.h"
90n/a #include "grammar.h"
91n/a
92n/a After that come four blocks:
93n/a
94n/a 1) one or more state definitions
95n/a 2) a table defining dfas
96n/a 3) a table defining labels
97n/a 4) a struct defining the grammar
98n/a
99n/a A state definition has the following form:
100n/a - one or more arc arrays, each of the form:
101n/a static arc arcs_<n>_<m>[<k>] = {
102n/a {<i>, <j>},
103n/a ...
104n/a };
105n/a - followed by a state array, of the form:
106n/a static state states_<s>[<t>] = {
107n/a {<k>, arcs_<n>_<m>},
108n/a ...
109n/a };
110n/a
111n/a """
112n/a try:
113n/a f = open(filename)
114n/a except OSError as err:
115n/a print("Can't open %s: %s" % (filename, err))
116n/a return False
117n/a # The code below essentially uses f's iterator-ness!
118n/a lineno = 0
119n/a
120n/a # Expect the two #include lines
121n/a lineno, line = lineno+1, next(f)
122n/a assert line == '#include "pgenheaders.h"\n', (lineno, line)
123n/a lineno, line = lineno+1, next(f)
124n/a assert line == '#include "grammar.h"\n', (lineno, line)
125n/a
126n/a # Parse the state definitions
127n/a lineno, line = lineno+1, next(f)
128n/a allarcs = {}
129n/a states = []
130n/a while line.startswith("static arc "):
131n/a while line.startswith("static arc "):
132n/a mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",
133n/a line)
134n/a assert mo, (lineno, line)
135n/a n, m, k = list(map(int, mo.groups()))
136n/a arcs = []
137n/a for _ in range(k):
138n/a lineno, line = lineno+1, next(f)
139n/a mo = re.match(r"\s+{(\d+), (\d+)},$", line)
140n/a assert mo, (lineno, line)
141n/a i, j = list(map(int, mo.groups()))
142n/a arcs.append((i, j))
143n/a lineno, line = lineno+1, next(f)
144n/a assert line == "};\n", (lineno, line)
145n/a allarcs[(n, m)] = arcs
146n/a lineno, line = lineno+1, next(f)
147n/a mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)
148n/a assert mo, (lineno, line)
149n/a s, t = list(map(int, mo.groups()))
150n/a assert s == len(states), (lineno, line)
151n/a state = []
152n/a for _ in range(t):
153n/a lineno, line = lineno+1, next(f)
154n/a mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)
155n/a assert mo, (lineno, line)
156n/a k, n, m = list(map(int, mo.groups()))
157n/a arcs = allarcs[n, m]
158n/a assert k == len(arcs), (lineno, line)
159n/a state.append(arcs)
160n/a states.append(state)
161n/a lineno, line = lineno+1, next(f)
162n/a assert line == "};\n", (lineno, line)
163n/a lineno, line = lineno+1, next(f)
164n/a self.states = states
165n/a
166n/a # Parse the dfas
167n/a dfas = {}
168n/a mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)
169n/a assert mo, (lineno, line)
170n/a ndfas = int(mo.group(1))
171n/a for i in range(ndfas):
172n/a lineno, line = lineno+1, next(f)
173n/a mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',
174n/a line)
175n/a assert mo, (lineno, line)
176n/a symbol = mo.group(2)
177n/a number, x, y, z = list(map(int, mo.group(1, 3, 4, 5)))
178n/a assert self.symbol2number[symbol] == number, (lineno, line)
179n/a assert self.number2symbol[number] == symbol, (lineno, line)
180n/a assert x == 0, (lineno, line)
181n/a state = states[z]
182n/a assert y == len(state), (lineno, line)
183n/a lineno, line = lineno+1, next(f)
184n/a mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)
185n/a assert mo, (lineno, line)
186n/a first = {}
187n/a rawbitset = eval(mo.group(1))
188n/a for i, c in enumerate(rawbitset):
189n/a byte = ord(c)
190n/a for j in range(8):
191n/a if byte & (1<<j):
192n/a first[i*8 + j] = 1
193n/a dfas[number] = (state, first)
194n/a lineno, line = lineno+1, next(f)
195n/a assert line == "};\n", (lineno, line)
196n/a self.dfas = dfas
197n/a
198n/a # Parse the labels
199n/a labels = []
200n/a lineno, line = lineno+1, next(f)
201n/a mo = re.match(r"static label labels\[(\d+)\] = {$", line)
202n/a assert mo, (lineno, line)
203n/a nlabels = int(mo.group(1))
204n/a for i in range(nlabels):
205n/a lineno, line = lineno+1, next(f)
206n/a mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)
207n/a assert mo, (lineno, line)
208n/a x, y = mo.groups()
209n/a x = int(x)
210n/a if y == "0":
211n/a y = None
212n/a else:
213n/a y = eval(y)
214n/a labels.append((x, y))
215n/a lineno, line = lineno+1, next(f)
216n/a assert line == "};\n", (lineno, line)
217n/a self.labels = labels
218n/a
219n/a # Parse the grammar struct
220n/a lineno, line = lineno+1, next(f)
221n/a assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)
222n/a lineno, line = lineno+1, next(f)
223n/a mo = re.match(r"\s+(\d+),$", line)
224n/a assert mo, (lineno, line)
225n/a ndfas = int(mo.group(1))
226n/a assert ndfas == len(self.dfas)
227n/a lineno, line = lineno+1, next(f)
228n/a assert line == "\tdfas,\n", (lineno, line)
229n/a lineno, line = lineno+1, next(f)
230n/a mo = re.match(r"\s+{(\d+), labels},$", line)
231n/a assert mo, (lineno, line)
232n/a nlabels = int(mo.group(1))
233n/a assert nlabels == len(self.labels), (lineno, line)
234n/a lineno, line = lineno+1, next(f)
235n/a mo = re.match(r"\s+(\d+)$", line)
236n/a assert mo, (lineno, line)
237n/a start = int(mo.group(1))
238n/a assert start in self.number2symbol, (lineno, line)
239n/a self.start = start
240n/a lineno, line = lineno+1, next(f)
241n/a assert line == "};\n", (lineno, line)
242n/a try:
243n/a lineno, line = lineno+1, next(f)
244n/a except StopIteration:
245n/a pass
246n/a else:
247n/a assert 0, (lineno, line)
248n/a
249n/a def finish_off(self):
250n/a """Create additional useful structures. (Internal)."""
251n/a self.keywords = {} # map from keyword strings to arc labels
252n/a self.tokens = {} # map from numeric token values to arc labels
253n/a for ilabel, (type, value) in enumerate(self.labels):
254n/a if type == token.NAME and value is not None:
255n/a self.keywords[value] = ilabel
256n/a elif value is None:
257n/a self.tokens[type] = ilabel