ยปCore Development>Code coverage>Parser/spark.py

Python code coverage for Parser/spark.py

#countcontent
1n/a# Copyright (c) 1998-2002 John Aycock
2n/a#
3n/a# Permission is hereby granted, free of charge, to any person obtaining
4n/a# a copy of this software and associated documentation files (the
5n/a# "Software"), to deal in the Software without restriction, including
6n/a# without limitation the rights to use, copy, modify, merge, publish,
7n/a# distribute, sublicense, and/or sell copies of the Software, and to
8n/a# permit persons to whom the Software is furnished to do so, subject to
9n/a# the following conditions:
10n/a#
11n/a# The above copyright notice and this permission notice shall be
12n/a# included in all copies or substantial portions of the Software.
13n/a#
14n/a# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15n/a# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16n/a# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17n/a# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18n/a# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19n/a# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20n/a# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21n/a
22n/a__version__ = 'SPARK-0.7 (pre-alpha-5)'
23n/a
24n/aimport re
25n/a
26n/a# Compatibility with older pythons.
27n/adef output(string='', end='\n'):
28n/a sys.stdout.write(string + end)
29n/a
30n/atry:
31n/a sorted
32n/aexcept NameError:
33n/a def sorted(seq):
34n/a seq2 = seq[:]
35n/a seq2.sort()
36n/a return seq2
37n/a
38n/adef _namelist(instance):
39n/a namelist, namedict, classlist = [], {}, [instance.__class__]
40n/a for c in classlist:
41n/a for b in c.__bases__:
42n/a classlist.append(b)
43n/a for name in c.__dict__.keys():
44n/a if name not in namedict:
45n/a namelist.append(name)
46n/a namedict[name] = 1
47n/a return namelist
48n/a
49n/aclass GenericScanner:
50n/a def __init__(self, flags=0):
51n/a pattern = self.reflect()
52n/a self.re = re.compile(pattern, re.VERBOSE|flags)
53n/a
54n/a self.index2func = {}
55n/a for name, number in self.re.groupindex.items():
56n/a self.index2func[number-1] = getattr(self, 't_' + name)
57n/a
58n/a def makeRE(self, name):
59n/a doc = getattr(self, name).__doc__
60n/a rv = '(?P<%s>%s)' % (name[2:], doc)
61n/a return rv
62n/a
63n/a def reflect(self):
64n/a rv = []
65n/a for name in _namelist(self):
66n/a if name[:2] == 't_' and name != 't_default':
67n/a rv.append(self.makeRE(name))
68n/a
69n/a rv.append(self.makeRE('t_default'))
70n/a return '|'.join(rv)
71n/a
72n/a def error(self, s, pos):
73n/a output("Lexical error at position %s" % pos)
74n/a raise SystemExit
75n/a
76n/a def tokenize(self, s):
77n/a pos = 0
78n/a n = len(s)
79n/a while pos < n:
80n/a m = self.re.match(s, pos)
81n/a if m is None:
82n/a self.error(s, pos)
83n/a
84n/a groups = m.groups()
85n/a for i in range(len(groups)):
86n/a if groups[i] and i in self.index2func:
87n/a self.index2func[i](groups[i])
88n/a pos = m.end()
89n/a
90n/a def t_default(self, s):
91n/a r'( . | \n )+'
92n/a output("Specification error: unmatched input")
93n/a raise SystemExit
94n/a
95n/a#
96n/a# Extracted from GenericParser and made global so that [un]picking works.
97n/a#
98n/aclass _State:
99n/a def __init__(self, stateno, items):
100n/a self.T, self.complete, self.items = [], [], items
101n/a self.stateno = stateno
102n/a
103n/aclass GenericParser:
104n/a #
105n/a # An Earley parser, as per J. Earley, "An Efficient Context-Free
106n/a # Parsing Algorithm", CACM 13(2), pp. 94-102. Also J. C. Earley,
107n/a # "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis,
108n/a # Carnegie-Mellon University, August 1968. New formulation of
109n/a # the parser according to J. Aycock, "Practical Earley Parsing
110n/a # and the SPARK Toolkit", Ph.D. thesis, University of Victoria,
111n/a # 2001, and J. Aycock and R. N. Horspool, "Practical Earley
112n/a # Parsing", unpublished paper, 2001.
113n/a #
114n/a
115n/a def __init__(self, start):
116n/a self.rules = {}
117n/a self.rule2func = {}
118n/a self.rule2name = {}
119n/a self.collectRules()
120n/a self.augment(start)
121n/a self.ruleschanged = 1
122n/a
123n/a _NULLABLE = '\e_'
124n/a _START = 'START'
125n/a _BOF = '|-'
126n/a
127n/a #
128n/a # When pickling, take the time to generate the full state machine;
129n/a # some information is then extraneous, too. Unfortunately we
130n/a # can't save the rule2func map.
131n/a #
132n/a def __getstate__(self):
133n/a if self.ruleschanged:
134n/a #
135n/a # XXX - duplicated from parse()
136n/a #
137n/a self.computeNull()
138n/a self.newrules = {}
139n/a self.new2old = {}
140n/a self.makeNewRules()
141n/a self.ruleschanged = 0
142n/a self.edges, self.cores = {}, {}
143n/a self.states = { 0: self.makeState0() }
144n/a self.makeState(0, self._BOF)
145n/a #
146n/a # XXX - should find a better way to do this..
147n/a #
148n/a changes = 1
149n/a while changes:
150n/a changes = 0
151n/a for k, v in self.edges.items():
152n/a if v is None:
153n/a state, sym = k
154n/a if state in self.states:
155n/a self.goto(state, sym)
156n/a changes = 1
157n/a rv = self.__dict__.copy()
158n/a for s in self.states.values():
159n/a del s.items
160n/a del rv['rule2func']
161n/a del rv['nullable']
162n/a del rv['cores']
163n/a return rv
164n/a
165n/a def __setstate__(self, D):
166n/a self.rules = {}
167n/a self.rule2func = {}
168n/a self.rule2name = {}
169n/a self.collectRules()
170n/a start = D['rules'][self._START][0][1][1] # Blech.
171n/a self.augment(start)
172n/a D['rule2func'] = self.rule2func
173n/a D['makeSet'] = self.makeSet_fast
174n/a self.__dict__ = D
175n/a
176n/a #
177n/a # A hook for GenericASTBuilder and GenericASTMatcher. Mess
178n/a # thee not with this; nor shall thee toucheth the _preprocess
179n/a # argument to addRule.
180n/a #
181n/a def preprocess(self, rule, func): return rule, func
182n/a
183n/a def addRule(self, doc, func, _preprocess=1):
184n/a fn = func
185n/a rules = doc.split()
186n/a
187n/a index = []
188n/a for i in range(len(rules)):
189n/a if rules[i] == '::=':
190n/a index.append(i-1)
191n/a index.append(len(rules))
192n/a
193n/a for i in range(len(index)-1):
194n/a lhs = rules[index[i]]
195n/a rhs = rules[index[i]+2:index[i+1]]
196n/a rule = (lhs, tuple(rhs))
197n/a
198n/a if _preprocess:
199n/a rule, fn = self.preprocess(rule, func)
200n/a
201n/a if lhs in self.rules:
202n/a self.rules[lhs].append(rule)
203n/a else:
204n/a self.rules[lhs] = [ rule ]
205n/a self.rule2func[rule] = fn
206n/a self.rule2name[rule] = func.__name__[2:]
207n/a self.ruleschanged = 1
208n/a
209n/a def collectRules(self):
210n/a for name in _namelist(self):
211n/a if name[:2] == 'p_':
212n/a func = getattr(self, name)
213n/a doc = func.__doc__
214n/a self.addRule(doc, func)
215n/a
216n/a def augment(self, start):
217n/a rule = '%s ::= %s %s' % (self._START, self._BOF, start)
218n/a self.addRule(rule, lambda args: args[1], 0)
219n/a
220n/a def computeNull(self):
221n/a self.nullable = {}
222n/a tbd = []
223n/a
224n/a for rulelist in self.rules.values():
225n/a lhs = rulelist[0][0]
226n/a self.nullable[lhs] = 0
227n/a for rule in rulelist:
228n/a rhs = rule[1]
229n/a if len(rhs) == 0:
230n/a self.nullable[lhs] = 1
231n/a continue
232n/a #
233n/a # We only need to consider rules which
234n/a # consist entirely of nonterminal symbols.
235n/a # This should be a savings on typical
236n/a # grammars.
237n/a #
238n/a for sym in rhs:
239n/a if sym not in self.rules:
240n/a break
241n/a else:
242n/a tbd.append(rule)
243n/a changes = 1
244n/a while changes:
245n/a changes = 0
246n/a for lhs, rhs in tbd:
247n/a if self.nullable[lhs]:
248n/a continue
249n/a for sym in rhs:
250n/a if not self.nullable[sym]:
251n/a break
252n/a else:
253n/a self.nullable[lhs] = 1
254n/a changes = 1
255n/a
256n/a def makeState0(self):
257n/a s0 = _State(0, [])
258n/a for rule in self.newrules[self._START]:
259n/a s0.items.append((rule, 0))
260n/a return s0
261n/a
262n/a def finalState(self, tokens):
263n/a #
264n/a # Yuck.
265n/a #
266n/a if len(self.newrules[self._START]) == 2 and len(tokens) == 0:
267n/a return 1
268n/a start = self.rules[self._START][0][1][1]
269n/a return self.goto(1, start)
270n/a
271n/a def makeNewRules(self):
272n/a worklist = []
273n/a for rulelist in self.rules.values():
274n/a for rule in rulelist:
275n/a worklist.append((rule, 0, 1, rule))
276n/a
277n/a for rule, i, candidate, oldrule in worklist:
278n/a lhs, rhs = rule
279n/a n = len(rhs)
280n/a while i < n:
281n/a sym = rhs[i]
282n/a if sym not in self.rules or \
283n/a not self.nullable[sym]:
284n/a candidate = 0
285n/a i = i + 1
286n/a continue
287n/a
288n/a newrhs = list(rhs)
289n/a newrhs[i] = self._NULLABLE+sym
290n/a newrule = (lhs, tuple(newrhs))
291n/a worklist.append((newrule, i+1,
292n/a candidate, oldrule))
293n/a candidate = 0
294n/a i = i + 1
295n/a else:
296n/a if candidate:
297n/a lhs = self._NULLABLE+lhs
298n/a rule = (lhs, rhs)
299n/a if lhs in self.newrules:
300n/a self.newrules[lhs].append(rule)
301n/a else:
302n/a self.newrules[lhs] = [ rule ]
303n/a self.new2old[rule] = oldrule
304n/a
305n/a def typestring(self, token):
306n/a return None
307n/a
308n/a def error(self, token):
309n/a output("Syntax error at or near `%s' token" % token)
310n/a raise SystemExit
311n/a
312n/a def parse(self, tokens):
313n/a sets = [ [(1,0), (2,0)] ]
314n/a self.links = {}
315n/a
316n/a if self.ruleschanged:
317n/a self.computeNull()
318n/a self.newrules = {}
319n/a self.new2old = {}
320n/a self.makeNewRules()
321n/a self.ruleschanged = 0
322n/a self.edges, self.cores = {}, {}
323n/a self.states = { 0: self.makeState0() }
324n/a self.makeState(0, self._BOF)
325n/a
326n/a for i in range(len(tokens)):
327n/a sets.append([])
328n/a
329n/a if sets[i] == []:
330n/a break
331n/a self.makeSet(tokens[i], sets, i)
332n/a else:
333n/a sets.append([])
334n/a self.makeSet(None, sets, len(tokens))
335n/a
336n/a #_dump(tokens, sets, self.states)
337n/a
338n/a finalitem = (self.finalState(tokens), 0)
339n/a if finalitem not in sets[-2]:
340n/a if len(tokens) > 0:
341n/a self.error(tokens[i-1])
342n/a else:
343n/a self.error(None)
344n/a
345n/a return self.buildTree(self._START, finalitem,
346n/a tokens, len(sets)-2)
347n/a
348n/a def isnullable(self, sym):
349n/a #
350n/a # For symbols in G_e only. If we weren't supporting 1.5,
351n/a # could just use sym.startswith().
352n/a #
353n/a return self._NULLABLE == sym[0:len(self._NULLABLE)]
354n/a
355n/a def skip(self, hs, pos=0):
356n/a n = len(hs[1])
357n/a while pos < n:
358n/a if not self.isnullable(hs[1][pos]):
359n/a break
360n/a pos = pos + 1
361n/a return pos
362n/a
363n/a def makeState(self, state, sym):
364n/a assert sym is not None
365n/a #
366n/a # Compute \epsilon-kernel state's core and see if
367n/a # it exists already.
368n/a #
369n/a kitems = []
370n/a for rule, pos in self.states[state].items:
371n/a lhs, rhs = rule
372n/a if rhs[pos:pos+1] == (sym,):
373n/a kitems.append((rule, self.skip(rule, pos+1)))
374n/a core = kitems
375n/a
376n/a core.sort()
377n/a tcore = tuple(core)
378n/a if tcore in self.cores:
379n/a return self.cores[tcore]
380n/a #
381n/a # Nope, doesn't exist. Compute it and the associated
382n/a # \epsilon-nonkernel state together; we'll need it right away.
383n/a #
384n/a k = self.cores[tcore] = len(self.states)
385n/a K, NK = _State(k, kitems), _State(k+1, [])
386n/a self.states[k] = K
387n/a predicted = {}
388n/a
389n/a edges = self.edges
390n/a rules = self.newrules
391n/a for X in K, NK:
392n/a worklist = X.items
393n/a for item in worklist:
394n/a rule, pos = item
395n/a lhs, rhs = rule
396n/a if pos == len(rhs):
397n/a X.complete.append(rule)
398n/a continue
399n/a
400n/a nextSym = rhs[pos]
401n/a key = (X.stateno, nextSym)
402n/a if nextSym not in rules:
403n/a if key not in edges:
404n/a edges[key] = None
405n/a X.T.append(nextSym)
406n/a else:
407n/a edges[key] = None
408n/a if nextSym not in predicted:
409n/a predicted[nextSym] = 1
410n/a for prule in rules[nextSym]:
411n/a ppos = self.skip(prule)
412n/a new = (prule, ppos)
413n/a NK.items.append(new)
414n/a #
415n/a # Problem: we know K needs generating, but we
416n/a # don't yet know about NK. Can't commit anything
417n/a # regarding NK to self.edges until we're sure. Should
418n/a # we delay committing on both K and NK to avoid this
419n/a # hacky code? This creates other problems..
420n/a #
421n/a if X is K:
422n/a edges = {}
423n/a
424n/a if NK.items == []:
425n/a return k
426n/a
427n/a #
428n/a # Check for \epsilon-nonkernel's core. Unfortunately we
429n/a # need to know the entire set of predicted nonterminals
430n/a # to do this without accidentally duplicating states.
431n/a #
432n/a core = sorted(predicted.keys())
433n/a tcore = tuple(core)
434n/a if tcore in self.cores:
435n/a self.edges[(k, None)] = self.cores[tcore]
436n/a return k
437n/a
438n/a nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno
439n/a self.edges.update(edges)
440n/a self.states[nk] = NK
441n/a return k
442n/a
443n/a def goto(self, state, sym):
444n/a key = (state, sym)
445n/a if key not in self.edges:
446n/a #
447n/a # No transitions from state on sym.
448n/a #
449n/a return None
450n/a
451n/a rv = self.edges[key]
452n/a if rv is None:
453n/a #
454n/a # Target state isn't generated yet. Remedy this.
455n/a #
456n/a rv = self.makeState(state, sym)
457n/a self.edges[key] = rv
458n/a return rv
459n/a
460n/a def gotoT(self, state, t):
461n/a return [self.goto(state, t)]
462n/a
463n/a def gotoST(self, state, st):
464n/a rv = []
465n/a for t in self.states[state].T:
466n/a if st == t:
467n/a rv.append(self.goto(state, t))
468n/a return rv
469n/a
470n/a def add(self, set, item, i=None, predecessor=None, causal=None):
471n/a if predecessor is None:
472n/a if item not in set:
473n/a set.append(item)
474n/a else:
475n/a key = (item, i)
476n/a if item not in set:
477n/a self.links[key] = []
478n/a set.append(item)
479n/a self.links[key].append((predecessor, causal))
480n/a
481n/a def makeSet(self, token, sets, i):
482n/a cur, next = sets[i], sets[i+1]
483n/a
484n/a ttype = token is not None and self.typestring(token) or None
485n/a if ttype is not None:
486n/a fn, arg = self.gotoT, ttype
487n/a else:
488n/a fn, arg = self.gotoST, token
489n/a
490n/a for item in cur:
491n/a ptr = (item, i)
492n/a state, parent = item
493n/a add = fn(state, arg)
494n/a for k in add:
495n/a if k is not None:
496n/a self.add(next, (k, parent), i+1, ptr)
497n/a nk = self.goto(k, None)
498n/a if nk is not None:
499n/a self.add(next, (nk, i+1))
500n/a
501n/a if parent == i:
502n/a continue
503n/a
504n/a for rule in self.states[state].complete:
505n/a lhs, rhs = rule
506n/a for pitem in sets[parent]:
507n/a pstate, pparent = pitem
508n/a k = self.goto(pstate, lhs)
509n/a if k is not None:
510n/a why = (item, i, rule)
511n/a pptr = (pitem, parent)
512n/a self.add(cur, (k, pparent),
513n/a i, pptr, why)
514n/a nk = self.goto(k, None)
515n/a if nk is not None:
516n/a self.add(cur, (nk, i))
517n/a
518n/a def makeSet_fast(self, token, sets, i):
519n/a #
520n/a # Call *only* when the entire state machine has been built!
521n/a # It relies on self.edges being filled in completely, and
522n/a # then duplicates and inlines code to boost speed at the
523n/a # cost of extreme ugliness.
524n/a #
525n/a cur, next = sets[i], sets[i+1]
526n/a ttype = token is not None and self.typestring(token) or None
527n/a
528n/a for item in cur:
529n/a ptr = (item, i)
530n/a state, parent = item
531n/a if ttype is not None:
532n/a k = self.edges.get((state, ttype), None)
533n/a if k is not None:
534n/a #self.add(next, (k, parent), i+1, ptr)
535n/a #INLINED --v
536n/a new = (k, parent)
537n/a key = (new, i+1)
538n/a if new not in next:
539n/a self.links[key] = []
540n/a next.append(new)
541n/a self.links[key].append((ptr, None))
542n/a #INLINED --^
543n/a #nk = self.goto(k, None)
544n/a nk = self.edges.get((k, None), None)
545n/a if nk is not None:
546n/a #self.add(next, (nk, i+1))
547n/a #INLINED --v
548n/a new = (nk, i+1)
549n/a if new not in next:
550n/a next.append(new)
551n/a #INLINED --^
552n/a else:
553n/a add = self.gotoST(state, token)
554n/a for k in add:
555n/a if k is not None:
556n/a self.add(next, (k, parent), i+1, ptr)
557n/a #nk = self.goto(k, None)
558n/a nk = self.edges.get((k, None), None)
559n/a if nk is not None:
560n/a self.add(next, (nk, i+1))
561n/a
562n/a if parent == i:
563n/a continue
564n/a
565n/a for rule in self.states[state].complete:
566n/a lhs, rhs = rule
567n/a for pitem in sets[parent]:
568n/a pstate, pparent = pitem
569n/a #k = self.goto(pstate, lhs)
570n/a k = self.edges.get((pstate, lhs), None)
571n/a if k is not None:
572n/a why = (item, i, rule)
573n/a pptr = (pitem, parent)
574n/a #self.add(cur, (k, pparent),
575n/a # i, pptr, why)
576n/a #INLINED --v
577n/a new = (k, pparent)
578n/a key = (new, i)
579n/a if new not in cur:
580n/a self.links[key] = []
581n/a cur.append(new)
582n/a self.links[key].append((pptr, why))
583n/a #INLINED --^
584n/a #nk = self.goto(k, None)
585n/a nk = self.edges.get((k, None), None)
586n/a if nk is not None:
587n/a #self.add(cur, (nk, i))
588n/a #INLINED --v
589n/a new = (nk, i)
590n/a if new not in cur:
591n/a cur.append(new)
592n/a #INLINED --^
593n/a
594n/a def predecessor(self, key, causal):
595n/a for p, c in self.links[key]:
596n/a if c == causal:
597n/a return p
598n/a assert 0
599n/a
600n/a def causal(self, key):
601n/a links = self.links[key]
602n/a if len(links) == 1:
603n/a return links[0][1]
604n/a choices = []
605n/a rule2cause = {}
606n/a for p, c in links:
607n/a rule = c[2]
608n/a choices.append(rule)
609n/a rule2cause[rule] = c
610n/a return rule2cause[self.ambiguity(choices)]
611n/a
612n/a def deriveEpsilon(self, nt):
613n/a if len(self.newrules[nt]) > 1:
614n/a rule = self.ambiguity(self.newrules[nt])
615n/a else:
616n/a rule = self.newrules[nt][0]
617n/a #output(rule)
618n/a
619n/a rhs = rule[1]
620n/a attr = [None] * len(rhs)
621n/a
622n/a for i in range(len(rhs)-1, -1, -1):
623n/a attr[i] = self.deriveEpsilon(rhs[i])
624n/a return self.rule2func[self.new2old[rule]](attr)
625n/a
626n/a def buildTree(self, nt, item, tokens, k):
627n/a state, parent = item
628n/a
629n/a choices = []
630n/a for rule in self.states[state].complete:
631n/a if rule[0] == nt:
632n/a choices.append(rule)
633n/a rule = choices[0]
634n/a if len(choices) > 1:
635n/a rule = self.ambiguity(choices)
636n/a #output(rule)
637n/a
638n/a rhs = rule[1]
639n/a attr = [None] * len(rhs)
640n/a
641n/a for i in range(len(rhs)-1, -1, -1):
642n/a sym = rhs[i]
643n/a if sym not in self.newrules:
644n/a if sym != self._BOF:
645n/a attr[i] = tokens[k-1]
646n/a key = (item, k)
647n/a item, k = self.predecessor(key, None)
648n/a #elif self.isnullable(sym):
649n/a elif self._NULLABLE == sym[0:len(self._NULLABLE)]:
650n/a attr[i] = self.deriveEpsilon(sym)
651n/a else:
652n/a key = (item, k)
653n/a why = self.causal(key)
654n/a attr[i] = self.buildTree(sym, why[0],
655n/a tokens, why[1])
656n/a item, k = self.predecessor(key, why)
657n/a return self.rule2func[self.new2old[rule]](attr)
658n/a
659n/a def ambiguity(self, rules):
660n/a #
661n/a # XXX - problem here and in collectRules() if the same rule
662n/a # appears in >1 method. Also undefined results if rules
663n/a # causing the ambiguity appear in the same method.
664n/a #
665n/a sortlist = []
666n/a name2index = {}
667n/a for i in range(len(rules)):
668n/a lhs, rhs = rule = rules[i]
669n/a name = self.rule2name[self.new2old[rule]]
670n/a sortlist.append((len(rhs), name))
671n/a name2index[name] = i
672n/a sortlist.sort()
673n/a list = [b for a, b in sortlist]
674n/a return rules[name2index[self.resolve(list)]]
675n/a
676n/a def resolve(self, list):
677n/a #
678n/a # Resolve ambiguity in favor of the shortest RHS.
679n/a # Since we walk the tree from the top down, this
680n/a # should effectively resolve in favor of a "shift".
681n/a #
682n/a return list[0]
683n/a
684n/a#
685n/a# GenericASTBuilder automagically constructs a concrete/abstract syntax tree
686n/a# for a given input. The extra argument is a class (not an instance!)
687n/a# which supports the "__setslice__" and "__len__" methods.
688n/a#
689n/a# XXX - silently overrides any user code in methods.
690n/a#
691n/a
692n/aclass GenericASTBuilder(GenericParser):
693n/a def __init__(self, AST, start):
694n/a GenericParser.__init__(self, start)
695n/a self.AST = AST
696n/a
697n/a def preprocess(self, rule, func):
698n/a rebind = lambda lhs, self=self: \
699n/a lambda args, lhs=lhs, self=self: \
700n/a self.buildASTNode(args, lhs)
701n/a lhs, rhs = rule
702n/a return rule, rebind(lhs)
703n/a
704n/a def buildASTNode(self, args, lhs):
705n/a children = []
706n/a for arg in args:
707n/a if isinstance(arg, self.AST):
708n/a children.append(arg)
709n/a else:
710n/a children.append(self.terminal(arg))
711n/a return self.nonterminal(lhs, children)
712n/a
713n/a def terminal(self, token): return token
714n/a
715n/a def nonterminal(self, type, args):
716n/a rv = self.AST(type)
717n/a rv[:len(args)] = args
718n/a return rv
719n/a
720n/a#
721n/a# GenericASTTraversal is a Visitor pattern according to Design Patterns. For
722n/a# each node it attempts to invoke the method n_<node type>, falling
723n/a# back onto the default() method if the n_* can't be found. The preorder
724n/a# traversal also looks for an exit hook named n_<node type>_exit (no default
725n/a# routine is called if it's not found). To prematurely halt traversal
726n/a# of a subtree, call the prune() method -- this only makes sense for a
727n/a# preorder traversal. Node type is determined via the typestring() method.
728n/a#
729n/a
730n/aclass GenericASTTraversalPruningException:
731n/a pass
732n/a
733n/aclass GenericASTTraversal:
734n/a def __init__(self, ast):
735n/a self.ast = ast
736n/a
737n/a def typestring(self, node):
738n/a return node.type
739n/a
740n/a def prune(self):
741n/a raise GenericASTTraversalPruningException
742n/a
743n/a def preorder(self, node=None):
744n/a if node is None:
745n/a node = self.ast
746n/a
747n/a try:
748n/a name = 'n_' + self.typestring(node)
749n/a if hasattr(self, name):
750n/a func = getattr(self, name)
751n/a func(node)
752n/a else:
753n/a self.default(node)
754n/a except GenericASTTraversalPruningException:
755n/a return
756n/a
757n/a for kid in node:
758n/a self.preorder(kid)
759n/a
760n/a name = name + '_exit'
761n/a if hasattr(self, name):
762n/a func = getattr(self, name)
763n/a func(node)
764n/a
765n/a def postorder(self, node=None):
766n/a if node is None:
767n/a node = self.ast
768n/a
769n/a for kid in node:
770n/a self.postorder(kid)
771n/a
772n/a name = 'n_' + self.typestring(node)
773n/a if hasattr(self, name):
774n/a func = getattr(self, name)
775n/a func(node)
776n/a else:
777n/a self.default(node)
778n/a
779n/a
780n/a def default(self, node):
781n/a pass
782n/a
783n/a#
784n/a# GenericASTMatcher. AST nodes must have "__getitem__" and "__cmp__"
785n/a# implemented.
786n/a#
787n/a# XXX - makes assumptions about how GenericParser walks the parse tree.
788n/a#
789n/a
790n/aclass GenericASTMatcher(GenericParser):
791n/a def __init__(self, start, ast):
792n/a GenericParser.__init__(self, start)
793n/a self.ast = ast
794n/a
795n/a def preprocess(self, rule, func):
796n/a rebind = lambda func, self=self: \
797n/a lambda args, func=func, self=self: \
798n/a self.foundMatch(args, func)
799n/a lhs, rhs = rule
800n/a rhslist = list(rhs)
801n/a rhslist.reverse()
802n/a
803n/a return (lhs, tuple(rhslist)), rebind(func)
804n/a
805n/a def foundMatch(self, args, func):
806n/a func(args[-1])
807n/a return args[-1]
808n/a
809n/a def match_r(self, node):
810n/a self.input.insert(0, node)
811n/a children = 0
812n/a
813n/a for child in node:
814n/a if children == 0:
815n/a self.input.insert(0, '(')
816n/a children = children + 1
817n/a self.match_r(child)
818n/a
819n/a if children > 0:
820n/a self.input.insert(0, ')')
821n/a
822n/a def match(self, ast=None):
823n/a if ast is None:
824n/a ast = self.ast
825n/a self.input = []
826n/a
827n/a self.match_r(ast)
828n/a self.parse(self.input)
829n/a
830n/a def resolve(self, list):
831n/a #
832n/a # Resolve ambiguity in favor of the longest RHS.
833n/a #
834n/a return list[-1]
835n/a
836n/adef _dump(tokens, sets, states):
837n/a for i in range(len(sets)):
838n/a output('set %d' % i)
839n/a for item in sets[i]:
840n/a output('\t', item)
841n/a for (lhs, rhs), pos in states[item[0]].items:
842n/a output('\t\t', lhs, '::=', end='')
843n/a output(' '.join(rhs[:pos]), end='')
844n/a output('.', end='')
845n/a output(' '.join(rhs[pos:]))
846n/a if i < len(tokens):
847n/a output()
848n/a output('token %s' % str(tokens[i]))
849n/a output()