ยปCore Development>Code coverage>Parser/parser.c

Python code coverage for Parser/parser.c

#countcontent
1n/a
2n/a/* Parser implementation */
3n/a
4n/a/* For a description, see the comments at end of this file */
5n/a
6n/a/* XXX To do: error recovery */
7n/a
8n/a#include "Python.h"
9n/a#include "pgenheaders.h"
10n/a#include "token.h"
11n/a#include "grammar.h"
12n/a#include "node.h"
13n/a#include "parser.h"
14n/a#include "errcode.h"
15n/a
16n/a
17n/a#ifdef Py_DEBUG
18n/aextern int Py_DebugFlag;
19n/a#define D(x) if (!Py_DebugFlag); else x
20n/a#else
21n/a#define D(x)
22n/a#endif
23n/a
24n/a
25n/a/* STACK DATA TYPE */
26n/a
27n/astatic void s_reset(stack *);
28n/a
29n/astatic void
30n/as_reset(stack *s)
31n/a{
32n/a s->s_top = &s->s_base[MAXSTACK];
33n/a}
34n/a
35n/a#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36n/a
37n/astatic int
38n/as_push(stack *s, dfa *d, node *parent)
39n/a{
40n/a stackentry *top;
41n/a if (s->s_top == s->s_base) {
42n/a fprintf(stderr, "s_push: parser stack overflow\n");
43n/a return E_NOMEM;
44n/a }
45n/a top = --s->s_top;
46n/a top->s_dfa = d;
47n/a top->s_parent = parent;
48n/a top->s_state = 0;
49n/a return 0;
50n/a}
51n/a
52n/a#ifdef Py_DEBUG
53n/a
54n/astatic void
55n/as_pop(stack *s)
56n/a{
57n/a if (s_empty(s))
58n/a Py_FatalError("s_pop: parser stack underflow -- FATAL");
59n/a s->s_top++;
60n/a}
61n/a
62n/a#else /* !Py_DEBUG */
63n/a
64n/a#define s_pop(s) (s)->s_top++
65n/a
66n/a#endif
67n/a
68n/a
69n/a/* PARSER CREATION */
70n/a
71n/aparser_state *
72n/aPyParser_New(grammar *g, int start)
73n/a{
74n/a parser_state *ps;
75n/a
76n/a if (!g->g_accel)
77n/a PyGrammar_AddAccelerators(g);
78n/a ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
79n/a if (ps == NULL)
80n/a return NULL;
81n/a ps->p_grammar = g;
82n/a#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
83n/a ps->p_flags = 0;
84n/a#endif
85n/a ps->p_tree = PyNode_New(start);
86n/a if (ps->p_tree == NULL) {
87n/a PyMem_FREE(ps);
88n/a return NULL;
89n/a }
90n/a s_reset(&ps->p_stack);
91n/a (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
92n/a return ps;
93n/a}
94n/a
95n/avoid
96n/aPyParser_Delete(parser_state *ps)
97n/a{
98n/a /* NB If you want to save the parse tree,
99n/a you must set p_tree to NULL before calling delparser! */
100n/a PyNode_Free(ps->p_tree);
101n/a PyMem_FREE(ps);
102n/a}
103n/a
104n/a
105n/a/* PARSER STACK OPERATIONS */
106n/a
107n/astatic int
108n/ashift(stack *s, int type, char *str, int newstate, int lineno, int col_offset)
109n/a{
110n/a int err;
111n/a assert(!s_empty(s));
112n/a err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
113n/a if (err)
114n/a return err;
115n/a s->s_top->s_state = newstate;
116n/a return 0;
117n/a}
118n/a
119n/astatic int
120n/apush(stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
121n/a{
122n/a int err;
123n/a node *n;
124n/a n = s->s_top->s_parent;
125n/a assert(!s_empty(s));
126n/a err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
127n/a if (err)
128n/a return err;
129n/a s->s_top->s_state = newstate;
130n/a return s_push(s, d, CHILD(n, NCH(n)-1));
131n/a}
132n/a
133n/a
134n/a/* PARSER PROPER */
135n/a
136n/astatic int
137n/aclassify(parser_state *ps, int type, const char *str)
138n/a{
139n/a grammar *g = ps->p_grammar;
140n/a int n = g->g_ll.ll_nlabels;
141n/a
142n/a if (type == NAME) {
143n/a label *l = g->g_ll.ll_label;
144n/a int i;
145n/a for (i = n; i > 0; i--, l++) {
146n/a if (l->lb_type != NAME || l->lb_str == NULL ||
147n/a l->lb_str[0] != str[0] ||
148n/a strcmp(l->lb_str, str) != 0)
149n/a continue;
150n/a#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
151n/a#if 0
152n/a /* Leaving this in as an example */
153n/a if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
154n/a if (str[0] == 'w' && strcmp(str, "with") == 0)
155n/a break; /* not a keyword yet */
156n/a else if (str[0] == 'a' && strcmp(str, "as") == 0)
157n/a break; /* not a keyword yet */
158n/a }
159n/a#endif
160n/a#endif
161n/a D(printf("It's a keyword\n"));
162n/a return n - i;
163n/a }
164n/a }
165n/a
166n/a {
167n/a label *l = g->g_ll.ll_label;
168n/a int i;
169n/a for (i = n; i > 0; i--, l++) {
170n/a if (l->lb_type == type && l->lb_str == NULL) {
171n/a D(printf("It's a token we know\n"));
172n/a return n - i;
173n/a }
174n/a }
175n/a }
176n/a
177n/a D(printf("Illegal token\n"));
178n/a return -1;
179n/a}
180n/a
181n/a#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
182n/a#if 0
183n/a/* Leaving this in as an example */
184n/astatic void
185n/afuture_hack(parser_state *ps)
186n/a{
187n/a node *n = ps->p_stack.s_top->s_parent;
188n/a node *ch, *cch;
189n/a int i;
190n/a
191n/a /* from __future__ import ..., must have at least 4 children */
192n/a n = CHILD(n, 0);
193n/a if (NCH(n) < 4)
194n/a return;
195n/a ch = CHILD(n, 0);
196n/a if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
197n/a return;
198n/a ch = CHILD(n, 1);
199n/a if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
200n/a strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
201n/a return;
202n/a ch = CHILD(n, 3);
203n/a /* ch can be a star, a parenthesis or import_as_names */
204n/a if (TYPE(ch) == STAR)
205n/a return;
206n/a if (TYPE(ch) == LPAR)
207n/a ch = CHILD(n, 4);
208n/a
209n/a for (i = 0; i < NCH(ch); i += 2) {
210n/a cch = CHILD(ch, i);
211n/a if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
212n/a char *str_ch = STR(CHILD(cch, 0));
213n/a if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
214n/a ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
215n/a } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
216n/a ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
217n/a } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
218n/a ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
219n/a }
220n/a }
221n/a }
222n/a}
223n/a#endif
224n/a#endif /* future keyword */
225n/a
226n/aint
227n/aPyParser_AddToken(parser_state *ps, int type, char *str,
228n/a int lineno, int col_offset, int *expected_ret)
229n/a{
230n/a int ilabel;
231n/a int err;
232n/a
233n/a D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
234n/a
235n/a /* Find out which label this token is */
236n/a ilabel = classify(ps, type, str);
237n/a if (ilabel < 0)
238n/a return E_SYNTAX;
239n/a
240n/a /* Loop until the token is shifted or an error occurred */
241n/a for (;;) {
242n/a /* Fetch the current dfa and state */
243n/a dfa *d = ps->p_stack.s_top->s_dfa;
244n/a state *s = &d->d_state[ps->p_stack.s_top->s_state];
245n/a
246n/a D(printf(" DFA '%s', state %d:",
247n/a d->d_name, ps->p_stack.s_top->s_state));
248n/a
249n/a /* Check accelerator */
250n/a if (s->s_lower <= ilabel && ilabel < s->s_upper) {
251n/a int x = s->s_accel[ilabel - s->s_lower];
252n/a if (x != -1) {
253n/a if (x & (1<<7)) {
254n/a /* Push non-terminal */
255n/a int nt = (x >> 8) + NT_OFFSET;
256n/a int arrow = x & ((1<<7)-1);
257n/a dfa *d1 = PyGrammar_FindDFA(
258n/a ps->p_grammar, nt);
259n/a if ((err = push(&ps->p_stack, nt, d1,
260n/a arrow, lineno, col_offset)) > 0) {
261n/a D(printf(" MemError: push\n"));
262n/a return err;
263n/a }
264n/a D(printf(" Push ...\n"));
265n/a continue;
266n/a }
267n/a
268n/a /* Shift the token */
269n/a if ((err = shift(&ps->p_stack, type, str,
270n/a x, lineno, col_offset)) > 0) {
271n/a D(printf(" MemError: shift.\n"));
272n/a return err;
273n/a }
274n/a D(printf(" Shift.\n"));
275n/a /* Pop while we are in an accept-only state */
276n/a while (s = &d->d_state
277n/a [ps->p_stack.s_top->s_state],
278n/a s->s_accept && s->s_narcs == 1) {
279n/a D(printf(" DFA '%s', state %d: "
280n/a "Direct pop.\n",
281n/a d->d_name,
282n/a ps->p_stack.s_top->s_state));
283n/a#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
284n/a#if 0
285n/a if (d->d_name[0] == 'i' &&
286n/a strcmp(d->d_name,
287n/a "import_stmt") == 0)
288n/a future_hack(ps);
289n/a#endif
290n/a#endif
291n/a s_pop(&ps->p_stack);
292n/a if (s_empty(&ps->p_stack)) {
293n/a D(printf(" ACCEPT.\n"));
294n/a return E_DONE;
295n/a }
296n/a d = ps->p_stack.s_top->s_dfa;
297n/a }
298n/a return E_OK;
299n/a }
300n/a }
301n/a
302n/a if (s->s_accept) {
303n/a#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
304n/a#if 0
305n/a if (d->d_name[0] == 'i' &&
306n/a strcmp(d->d_name, "import_stmt") == 0)
307n/a future_hack(ps);
308n/a#endif
309n/a#endif
310n/a /* Pop this dfa and try again */
311n/a s_pop(&ps->p_stack);
312n/a D(printf(" Pop ...\n"));
313n/a if (s_empty(&ps->p_stack)) {
314n/a D(printf(" Error: bottom of stack.\n"));
315n/a return E_SYNTAX;
316n/a }
317n/a continue;
318n/a }
319n/a
320n/a /* Stuck, report syntax error */
321n/a D(printf(" Error.\n"));
322n/a if (expected_ret) {
323n/a if (s->s_lower == s->s_upper - 1) {
324n/a /* Only one possible expected token */
325n/a *expected_ret = ps->p_grammar->
326n/a g_ll.ll_label[s->s_lower].lb_type;
327n/a }
328n/a else
329n/a *expected_ret = -1;
330n/a }
331n/a return E_SYNTAX;
332n/a }
333n/a}
334n/a
335n/a
336n/a#ifdef Py_DEBUG
337n/a
338n/a/* DEBUG OUTPUT */
339n/a
340n/avoid
341n/adumptree(grammar *g, node *n)
342n/a{
343n/a int i;
344n/a
345n/a if (n == NULL)
346n/a printf("NIL");
347n/a else {
348n/a label l;
349n/a l.lb_type = TYPE(n);
350n/a l.lb_str = STR(n);
351n/a printf("%s", PyGrammar_LabelRepr(&l));
352n/a if (ISNONTERMINAL(TYPE(n))) {
353n/a printf("(");
354n/a for (i = 0; i < NCH(n); i++) {
355n/a if (i > 0)
356n/a printf(",");
357n/a dumptree(g, CHILD(n, i));
358n/a }
359n/a printf(")");
360n/a }
361n/a }
362n/a}
363n/a
364n/avoid
365n/ashowtree(grammar *g, node *n)
366n/a{
367n/a int i;
368n/a
369n/a if (n == NULL)
370n/a return;
371n/a if (ISNONTERMINAL(TYPE(n))) {
372n/a for (i = 0; i < NCH(n); i++)
373n/a showtree(g, CHILD(n, i));
374n/a }
375n/a else if (ISTERMINAL(TYPE(n))) {
376n/a printf("%s", _PyParser_TokenNames[TYPE(n)]);
377n/a if (TYPE(n) == NUMBER || TYPE(n) == NAME)
378n/a printf("(%s)", STR(n));
379n/a printf(" ");
380n/a }
381n/a else
382n/a printf("? ");
383n/a}
384n/a
385n/avoid
386n/aprinttree(parser_state *ps)
387n/a{
388n/a if (Py_DebugFlag) {
389n/a printf("Parse tree:\n");
390n/a dumptree(ps->p_grammar, ps->p_tree);
391n/a printf("\n");
392n/a printf("Tokens:\n");
393n/a showtree(ps->p_grammar, ps->p_tree);
394n/a printf("\n");
395n/a }
396n/a printf("Listing:\n");
397n/a PyNode_ListTree(ps->p_tree);
398n/a printf("\n");
399n/a}
400n/a
401n/a#endif /* Py_DEBUG */
402n/a
403n/a/*
404n/a
405n/aDescription
406n/a-----------
407n/a
408n/aThe parser's interface is different than usual: the function addtoken()
409n/amust be called for each token in the input. This makes it possible to
410n/aturn it into an incremental parsing system later. The parsing system
411n/aconstructs a parse tree as it goes.
412n/a
413n/aA parsing rule is represented as a Deterministic Finite-state Automaton
414n/a(DFA). A node in a DFA represents a state of the parser; an arc represents
415n/aa transition. Transitions are either labeled with terminal symbols or
416n/awith non-terminals. When the parser decides to follow an arc labeled
417n/awith a non-terminal, it is invoked recursively with the DFA representing
418n/athe parsing rule for that as its initial state; when that DFA accepts,
419n/athe parser that invoked it continues. The parse tree constructed by the
420n/arecursively called parser is inserted as a child in the current parse tree.
421n/a
422n/aThe DFA's can be constructed automatically from a more conventional
423n/alanguage description. An extended LL(1) grammar (ELL(1)) is suitable.
424n/aCertain restrictions make the parser's life easier: rules that can produce
425n/athe empty string should be outlawed (there are other ways to put loops
426n/aor optional parts in the language). To avoid the need to construct
427n/aFIRST sets, we can require that all but the last alternative of a rule
428n/a(really: arc going out of a DFA's state) must begin with a terminal
429n/asymbol.
430n/a
431n/aAs an example, consider this grammar:
432n/a
433n/aexpr: term (OP term)*
434n/aterm: CONSTANT | '(' expr ')'
435n/a
436n/aThe DFA corresponding to the rule for expr is:
437n/a
438n/a------->.---term-->.------->
439n/a ^ |
440n/a | |
441n/a \----OP----/
442n/a
443n/aThe parse tree generated for the input a+b is:
444n/a
445n/a(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
446n/a
447n/a*/