ยปCore Development>Code coverage>Lib/lib2to3/pgen2/grammar.py

Python code coverage for Lib/lib2to3/pgen2/grammar.py

#countcontent
1n/a# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2n/a# Licensed to PSF under a Contributor Agreement.
3n/a
4n/a"""This module defines the data structures used to represent a grammar.
5n/a
6n/aThese are a bit arcane because they are derived from the data
7n/astructures used by Python's 'pgen' parser generator.
8n/a
9n/aThere's also a table here mapping operators to their names in the
10n/atoken module; the Python tokenize module reports all operators as the
11n/afallback token code OP, but the parser needs the actual token code.
12n/a
13n/a"""
14n/a
15n/a# Python imports
16n/aimport collections
17n/aimport pickle
18n/a
19n/a# Local imports
20n/afrom . import token, tokenize
21n/a
22n/a
23n/aclass Grammar(object):
24n/a """Pgen parsing tables conversion class.
25n/a
26n/a Once initialized, this class supplies the grammar tables for the
27n/a parsing engine implemented by parse.py. The parsing engine
28n/a accesses the instance variables directly. The class here does not
29n/a provide initialization of the tables; several subclasses exist to
30n/a do this (see the conv and pgen modules).
31n/a
32n/a The load() method reads the tables from a pickle file, which is
33n/a much faster than the other ways offered by subclasses. The pickle
34n/a file is written by calling dump() (after loading the grammar
35n/a tables using a subclass). The report() method prints a readable
36n/a representation of the tables to stdout, for debugging.
37n/a
38n/a The instance variables are as follows:
39n/a
40n/a symbol2number -- a dict mapping symbol names to numbers. Symbol
41n/a numbers are always 256 or higher, to distinguish
42n/a them from token numbers, which are between 0 and
43n/a 255 (inclusive).
44n/a
45n/a number2symbol -- a dict mapping numbers to symbol names;
46n/a these two are each other's inverse.
47n/a
48n/a states -- a list of DFAs, where each DFA is a list of
49n/a states, each state is a list of arcs, and each
50n/a arc is a (i, j) pair where i is a label and j is
51n/a a state number. The DFA number is the index into
52n/a this list. (This name is slightly confusing.)
53n/a Final states are represented by a special arc of
54n/a the form (0, j) where j is its own state number.
55n/a
56n/a dfas -- a dict mapping symbol numbers to (DFA, first)
57n/a pairs, where DFA is an item from the states list
58n/a above, and first is a set of tokens that can
59n/a begin this grammar rule (represented by a dict
60n/a whose values are always 1).
61n/a
62n/a labels -- a list of (x, y) pairs where x is either a token
63n/a number or a symbol number, and y is either None
64n/a or a string; the strings are keywords. The label
65n/a number is the index in this list; label numbers
66n/a are used to mark state transitions (arcs) in the
67n/a DFAs.
68n/a
69n/a start -- the number of the grammar's start symbol.
70n/a
71n/a keywords -- a dict mapping keyword strings to arc labels.
72n/a
73n/a tokens -- a dict mapping token numbers to arc labels.
74n/a
75n/a """
76n/a
77n/a def __init__(self):
78n/a self.symbol2number = {}
79n/a self.number2symbol = {}
80n/a self.states = []
81n/a self.dfas = {}
82n/a self.labels = [(0, "EMPTY")]
83n/a self.keywords = {}
84n/a self.tokens = {}
85n/a self.symbol2label = {}
86n/a self.start = 256
87n/a
88n/a def dump(self, filename):
89n/a """Dump the grammar tables to a pickle file.
90n/a
91n/a dump() recursively changes all dict to OrderedDict, so the pickled file
92n/a is not exactly the same as what was passed in to dump(). load() uses the
93n/a pickled file to create the tables, but only changes OrderedDict to dict
94n/a at the top level; it does not recursively change OrderedDict to dict.
95n/a So, the loaded tables are different from the original tables that were
96n/a passed to load() in that some of the OrderedDict (from the pickled file)
97n/a are not changed back to dict. For parsing, this has no effect on
98n/a performance because OrderedDict uses dict's __getitem__ with nothing in
99n/a between.
100n/a """
101n/a with open(filename, "wb") as f:
102n/a d = _make_deterministic(self.__dict__)
103n/a pickle.dump(d, f, 2)
104n/a
105n/a def load(self, filename):
106n/a """Load the grammar tables from a pickle file."""
107n/a with open(filename, "rb") as f:
108n/a d = pickle.load(f)
109n/a self.__dict__.update(d)
110n/a
111n/a def copy(self):
112n/a """
113n/a Copy the grammar.
114n/a """
115n/a new = self.__class__()
116n/a for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
117n/a "tokens", "symbol2label"):
118n/a setattr(new, dict_attr, getattr(self, dict_attr).copy())
119n/a new.labels = self.labels[:]
120n/a new.states = self.states[:]
121n/a new.start = self.start
122n/a return new
123n/a
124n/a def report(self):
125n/a """Dump the grammar tables to standard output, for debugging."""
126n/a from pprint import pprint
127n/a print("s2n")
128n/a pprint(self.symbol2number)
129n/a print("n2s")
130n/a pprint(self.number2symbol)
131n/a print("states")
132n/a pprint(self.states)
133n/a print("dfas")
134n/a pprint(self.dfas)
135n/a print("labels")
136n/a pprint(self.labels)
137n/a print("start", self.start)
138n/a
139n/a
140n/adef _make_deterministic(top):
141n/a if isinstance(top, dict):
142n/a return collections.OrderedDict(
143n/a sorted(((k, _make_deterministic(v)) for k, v in top.items())))
144n/a if isinstance(top, list):
145n/a return [_make_deterministic(e) for e in top]
146n/a if isinstance(top, tuple):
147n/a return tuple(_make_deterministic(e) for e in top)
148n/a return top
149n/a
150n/a
151n/a# Map from operator to number (since tokenize doesn't do this)
152n/a
153n/aopmap_raw = """
154n/a( LPAR
155n/a) RPAR
156n/a[ LSQB
157n/a] RSQB
158n/a: COLON
159n/a, COMMA
160n/a; SEMI
161n/a+ PLUS
162n/a- MINUS
163n/a* STAR
164n/a/ SLASH
165n/a| VBAR
166n/a& AMPER
167n/a< LESS
168n/a> GREATER
169n/a= EQUAL
170n/a. DOT
171n/a% PERCENT
172n/a` BACKQUOTE
173n/a{ LBRACE
174n/a} RBRACE
175n/a@ AT
176n/a@= ATEQUAL
177n/a== EQEQUAL
178n/a!= NOTEQUAL
179n/a<> NOTEQUAL
180n/a<= LESSEQUAL
181n/a>= GREATEREQUAL
182n/a~ TILDE
183n/a^ CIRCUMFLEX
184n/a<< LEFTSHIFT
185n/a>> RIGHTSHIFT
186n/a** DOUBLESTAR
187n/a+= PLUSEQUAL
188n/a-= MINEQUAL
189n/a*= STAREQUAL
190n/a/= SLASHEQUAL
191n/a%= PERCENTEQUAL
192n/a&= AMPEREQUAL
193n/a|= VBAREQUAL
194n/a^= CIRCUMFLEXEQUAL
195n/a<<= LEFTSHIFTEQUAL
196n/a>>= RIGHTSHIFTEQUAL
197n/a**= DOUBLESTAREQUAL
198n/a// DOUBLESLASH
199n/a//= DOUBLESLASHEQUAL
200n/a-> RARROW
201n/a"""
202n/a
203n/aopmap = {}
204n/afor line in opmap_raw.splitlines():
205n/a if line:
206n/a op, name = line.split()
207n/a opmap[op] = getattr(token, name)