ยปCore Development>Code coverage>Parser/asdl.py

Python code coverage for Parser/asdl.py

#countcontent
1n/a#-------------------------------------------------------------------------------
2n/a# Parser for ASDL [1] definition files. Reads in an ASDL description and parses
3n/a# it into an AST that describes it.
4n/a#
5n/a# The EBNF we're parsing here: Figure 1 of the paper [1]. Extended to support
6n/a# modules and attributes after a product. Words starting with Capital letters
7n/a# are terminals. Literal tokens are in "double quotes". Others are
8n/a# non-terminals. Id is either TokenId or ConstructorId.
9n/a#
10n/a# module ::= "module" Id "{" [definitions] "}"
11n/a# definitions ::= { TypeId "=" type }
12n/a# type ::= product | sum
13n/a# product ::= fields ["attributes" fields]
14n/a# fields ::= "(" { field, "," } field ")"
15n/a# field ::= TypeId ["?" | "*"] [Id]
16n/a# sum ::= constructor { "|" constructor } ["attributes" fields]
17n/a# constructor ::= ConstructorId [fields]
18n/a#
19n/a# [1] "The Zephyr Abstract Syntax Description Language" by Wang, et. al. See
20n/a# http://asdl.sourceforge.net/
21n/a#-------------------------------------------------------------------------------
22n/afrom collections import namedtuple
23n/aimport re
24n/a
25n/a__all__ = [
26n/a 'builtin_types', 'parse', 'AST', 'Module', 'Type', 'Constructor',
27n/a 'Field', 'Sum', 'Product', 'VisitorBase', 'Check', 'check']
28n/a
29n/a# The following classes define nodes into which the ASDL description is parsed.
30n/a# Note: this is a "meta-AST". ASDL files (such as Python.asdl) describe the AST
31n/a# structure used by a programming language. But ASDL files themselves need to be
32n/a# parsed. This module parses ASDL files and uses a simple AST to represent them.
33n/a# See the EBNF at the top of the file to understand the logical connection
34n/a# between the various node types.
35n/a
36n/abuiltin_types = {'identifier', 'string', 'bytes', 'int', 'object', 'singleton',
37n/a 'constant'}
38n/a
39n/aclass AST:
40n/a def __repr__(self):
41n/a raise NotImplementedError
42n/a
43n/aclass Module(AST):
44n/a def __init__(self, name, dfns):
45n/a self.name = name
46n/a self.dfns = dfns
47n/a self.types = {type.name: type.value for type in dfns}
48n/a
49n/a def __repr__(self):
50n/a return 'Module({0.name}, {0.dfns})'.format(self)
51n/a
52n/aclass Type(AST):
53n/a def __init__(self, name, value):
54n/a self.name = name
55n/a self.value = value
56n/a
57n/a def __repr__(self):
58n/a return 'Type({0.name}, {0.value})'.format(self)
59n/a
60n/aclass Constructor(AST):
61n/a def __init__(self, name, fields=None):
62n/a self.name = name
63n/a self.fields = fields or []
64n/a
65n/a def __repr__(self):
66n/a return 'Constructor({0.name}, {0.fields})'.format(self)
67n/a
68n/aclass Field(AST):
69n/a def __init__(self, type, name=None, seq=False, opt=False):
70n/a self.type = type
71n/a self.name = name
72n/a self.seq = seq
73n/a self.opt = opt
74n/a
75n/a def __repr__(self):
76n/a if self.seq:
77n/a extra = ", seq=True"
78n/a elif self.opt:
79n/a extra = ", opt=True"
80n/a else:
81n/a extra = ""
82n/a if self.name is None:
83n/a return 'Field({0.type}{1})'.format(self, extra)
84n/a else:
85n/a return 'Field({0.type}, {0.name}{1})'.format(self, extra)
86n/a
87n/aclass Sum(AST):
88n/a def __init__(self, types, attributes=None):
89n/a self.types = types
90n/a self.attributes = attributes or []
91n/a
92n/a def __repr__(self):
93n/a if self.attributes:
94n/a return 'Sum({0.types}, {0.attributes})'.format(self)
95n/a else:
96n/a return 'Sum({0.types})'.format(self)
97n/a
98n/aclass Product(AST):
99n/a def __init__(self, fields, attributes=None):
100n/a self.fields = fields
101n/a self.attributes = attributes or []
102n/a
103n/a def __repr__(self):
104n/a if self.attributes:
105n/a return 'Product({0.fields}, {0.attributes})'.format(self)
106n/a else:
107n/a return 'Product({0.fields})'.format(self)
108n/a
109n/a# A generic visitor for the meta-AST that describes ASDL. This can be used by
110n/a# emitters. Note that this visitor does not provide a generic visit method, so a
111n/a# subclass needs to define visit methods from visitModule to as deep as the
112n/a# interesting node.
113n/a# We also define a Check visitor that makes sure the parsed ASDL is well-formed.
114n/a
115n/aclass VisitorBase(object):
116n/a """Generic tree visitor for ASTs."""
117n/a def __init__(self):
118n/a self.cache = {}
119n/a
120n/a def visit(self, obj, *args):
121n/a klass = obj.__class__
122n/a meth = self.cache.get(klass)
123n/a if meth is None:
124n/a methname = "visit" + klass.__name__
125n/a meth = getattr(self, methname, None)
126n/a self.cache[klass] = meth
127n/a if meth:
128n/a try:
129n/a meth(obj, *args)
130n/a except Exception as e:
131n/a print("Error visiting %r: %s" % (obj, e))
132n/a raise
133n/a
134n/aclass Check(VisitorBase):
135n/a """A visitor that checks a parsed ASDL tree for correctness.
136n/a
137n/a Errors are printed and accumulated.
138n/a """
139n/a def __init__(self):
140n/a super(Check, self).__init__()
141n/a self.cons = {}
142n/a self.errors = 0
143n/a self.types = {}
144n/a
145n/a def visitModule(self, mod):
146n/a for dfn in mod.dfns:
147n/a self.visit(dfn)
148n/a
149n/a def visitType(self, type):
150n/a self.visit(type.value, str(type.name))
151n/a
152n/a def visitSum(self, sum, name):
153n/a for t in sum.types:
154n/a self.visit(t, name)
155n/a
156n/a def visitConstructor(self, cons, name):
157n/a key = str(cons.name)
158n/a conflict = self.cons.get(key)
159n/a if conflict is None:
160n/a self.cons[key] = name
161n/a else:
162n/a print('Redefinition of constructor {}'.format(key))
163n/a print('Defined in {} and {}'.format(conflict, name))
164n/a self.errors += 1
165n/a for f in cons.fields:
166n/a self.visit(f, key)
167n/a
168n/a def visitField(self, field, name):
169n/a key = str(field.type)
170n/a l = self.types.setdefault(key, [])
171n/a l.append(name)
172n/a
173n/a def visitProduct(self, prod, name):
174n/a for f in prod.fields:
175n/a self.visit(f, name)
176n/a
177n/adef check(mod):
178n/a """Check the parsed ASDL tree for correctness.
179n/a
180n/a Return True if success. For failure, the errors are printed out and False
181n/a is returned.
182n/a """
183n/a v = Check()
184n/a v.visit(mod)
185n/a
186n/a for t in v.types:
187n/a if t not in mod.types and not t in builtin_types:
188n/a v.errors += 1
189n/a uses = ", ".join(v.types[t])
190n/a print('Undefined type {}, used in {}'.format(t, uses))
191n/a return not v.errors
192n/a
193n/a# The ASDL parser itself comes next. The only interesting external interface
194n/a# here is the top-level parse function.
195n/a
196n/adef parse(filename):
197n/a """Parse ASDL from the given file and return a Module node describing it."""
198n/a with open(filename) as f:
199n/a parser = ASDLParser()
200n/a return parser.parse(f.read())
201n/a
202n/a# Types for describing tokens in an ASDL specification.
203n/aclass TokenKind:
204n/a """TokenKind is provides a scope for enumerated token kinds."""
205n/a (ConstructorId, TypeId, Equals, Comma, Question, Pipe, Asterisk,
206n/a LParen, RParen, LBrace, RBrace) = range(11)
207n/a
208n/a operator_table = {
209n/a '=': Equals, ',': Comma, '?': Question, '|': Pipe, '(': LParen,
210n/a ')': RParen, '*': Asterisk, '{': LBrace, '}': RBrace}
211n/a
212n/aToken = namedtuple('Token', 'kind value lineno')
213n/a
214n/aclass ASDLSyntaxError(Exception):
215n/a def __init__(self, msg, lineno=None):
216n/a self.msg = msg
217n/a self.lineno = lineno or '<unknown>'
218n/a
219n/a def __str__(self):
220n/a return 'Syntax error on line {0.lineno}: {0.msg}'.format(self)
221n/a
222n/adef tokenize_asdl(buf):
223n/a """Tokenize the given buffer. Yield Token objects."""
224n/a for lineno, line in enumerate(buf.splitlines(), 1):
225n/a for m in re.finditer(r'\s*(\w+|--.*|.)', line.strip()):
226n/a c = m.group(1)
227n/a if c[0].isalpha():
228n/a # Some kind of identifier
229n/a if c[0].isupper():
230n/a yield Token(TokenKind.ConstructorId, c, lineno)
231n/a else:
232n/a yield Token(TokenKind.TypeId, c, lineno)
233n/a elif c[:2] == '--':
234n/a # Comment
235n/a break
236n/a else:
237n/a # Operators
238n/a try:
239n/a op_kind = TokenKind.operator_table[c]
240n/a except KeyError:
241n/a raise ASDLSyntaxError('Invalid operator %s' % c, lineno)
242n/a yield Token(op_kind, c, lineno)
243n/a
244n/aclass ASDLParser:
245n/a """Parser for ASDL files.
246n/a
247n/a Create, then call the parse method on a buffer containing ASDL.
248n/a This is a simple recursive descent parser that uses tokenize_asdl for the
249n/a lexing.
250n/a """
251n/a def __init__(self):
252n/a self._tokenizer = None
253n/a self.cur_token = None
254n/a
255n/a def parse(self, buf):
256n/a """Parse the ASDL in the buffer and return an AST with a Module root.
257n/a """
258n/a self._tokenizer = tokenize_asdl(buf)
259n/a self._advance()
260n/a return self._parse_module()
261n/a
262n/a def _parse_module(self):
263n/a if self._at_keyword('module'):
264n/a self._advance()
265n/a else:
266n/a raise ASDLSyntaxError(
267n/a 'Expected "module" (found {})'.format(self.cur_token.value),
268n/a self.cur_token.lineno)
269n/a name = self._match(self._id_kinds)
270n/a self._match(TokenKind.LBrace)
271n/a defs = self._parse_definitions()
272n/a self._match(TokenKind.RBrace)
273n/a return Module(name, defs)
274n/a
275n/a def _parse_definitions(self):
276n/a defs = []
277n/a while self.cur_token.kind == TokenKind.TypeId:
278n/a typename = self._advance()
279n/a self._match(TokenKind.Equals)
280n/a type = self._parse_type()
281n/a defs.append(Type(typename, type))
282n/a return defs
283n/a
284n/a def _parse_type(self):
285n/a if self.cur_token.kind == TokenKind.LParen:
286n/a # If we see a (, it's a product
287n/a return self._parse_product()
288n/a else:
289n/a # Otherwise it's a sum. Look for ConstructorId
290n/a sumlist = [Constructor(self._match(TokenKind.ConstructorId),
291n/a self._parse_optional_fields())]
292n/a while self.cur_token.kind == TokenKind.Pipe:
293n/a # More constructors
294n/a self._advance()
295n/a sumlist.append(Constructor(
296n/a self._match(TokenKind.ConstructorId),
297n/a self._parse_optional_fields()))
298n/a return Sum(sumlist, self._parse_optional_attributes())
299n/a
300n/a def _parse_product(self):
301n/a return Product(self._parse_fields(), self._parse_optional_attributes())
302n/a
303n/a def _parse_fields(self):
304n/a fields = []
305n/a self._match(TokenKind.LParen)
306n/a while self.cur_token.kind == TokenKind.TypeId:
307n/a typename = self._advance()
308n/a is_seq, is_opt = self._parse_optional_field_quantifier()
309n/a id = (self._advance() if self.cur_token.kind in self._id_kinds
310n/a else None)
311n/a fields.append(Field(typename, id, seq=is_seq, opt=is_opt))
312n/a if self.cur_token.kind == TokenKind.RParen:
313n/a break
314n/a elif self.cur_token.kind == TokenKind.Comma:
315n/a self._advance()
316n/a self._match(TokenKind.RParen)
317n/a return fields
318n/a
319n/a def _parse_optional_fields(self):
320n/a if self.cur_token.kind == TokenKind.LParen:
321n/a return self._parse_fields()
322n/a else:
323n/a return None
324n/a
325n/a def _parse_optional_attributes(self):
326n/a if self._at_keyword('attributes'):
327n/a self._advance()
328n/a return self._parse_fields()
329n/a else:
330n/a return None
331n/a
332n/a def _parse_optional_field_quantifier(self):
333n/a is_seq, is_opt = False, False
334n/a if self.cur_token.kind == TokenKind.Asterisk:
335n/a is_seq = True
336n/a self._advance()
337n/a elif self.cur_token.kind == TokenKind.Question:
338n/a is_opt = True
339n/a self._advance()
340n/a return is_seq, is_opt
341n/a
342n/a def _advance(self):
343n/a """ Return the value of the current token and read the next one into
344n/a self.cur_token.
345n/a """
346n/a cur_val = None if self.cur_token is None else self.cur_token.value
347n/a try:
348n/a self.cur_token = next(self._tokenizer)
349n/a except StopIteration:
350n/a self.cur_token = None
351n/a return cur_val
352n/a
353n/a _id_kinds = (TokenKind.ConstructorId, TokenKind.TypeId)
354n/a
355n/a def _match(self, kind):
356n/a """The 'match' primitive of RD parsers.
357n/a
358n/a * Verifies that the current token is of the given kind (kind can
359n/a be a tuple, in which the kind must match one of its members).
360n/a * Returns the value of the current token
361n/a * Reads in the next token
362n/a """
363n/a if (isinstance(kind, tuple) and self.cur_token.kind in kind or
364n/a self.cur_token.kind == kind
365n/a ):
366n/a value = self.cur_token.value
367n/a self._advance()
368n/a return value
369n/a else:
370n/a raise ASDLSyntaxError(
371n/a 'Unmatched {} (found {})'.format(kind, self.cur_token.kind),
372n/a self.cur_token.lineno)
373n/a
374n/a def _at_keyword(self, keyword):
375n/a return (self.cur_token.kind == TokenKind.TypeId and
376n/a self.cur_token.value == keyword)