ยปCore Development>Code coverage>Modules/parsermodule.c

Python code coverage for Modules/parsermodule.c

#countcontent
1n/a/* parsermodule.c
2n/a *
3n/a * Copyright 1995-1996 by Fred L. Drake, Jr. and Virginia Polytechnic
4n/a * Institute and State University, Blacksburg, Virginia, USA.
5n/a * Portions copyright 1991-1995 by Stichting Mathematisch Centrum,
6n/a * Amsterdam, The Netherlands. Copying is permitted under the terms
7n/a * associated with the main Python distribution, with the additional
8n/a * restriction that this additional notice be included and maintained
9n/a * on all distributed copies.
10n/a *
11n/a * This module serves to replace the original parser module written
12n/a * by Guido. The functionality is not matched precisely, but the
13n/a * original may be implemented on top of this. This is desirable
14n/a * since the source of the text to be parsed is now divorced from
15n/a * this interface.
16n/a *
17n/a * Unlike the prior interface, the ability to give a parse tree
18n/a * produced by Python code as a tuple to the compiler is enabled by
19n/a * this module. See the documentation for more details.
20n/a *
21n/a * I've added some annotations that help with the lint code-checking
22n/a * program, but they're not complete by a long shot. The real errors
23n/a * that lint detects are gone, but there are still warnings with
24n/a * Py_[X]DECREF() and Py_[X]INCREF() macros. The lint annotations
25n/a * look like "NOTE(...)".
26n/a *
27n/a * To debug parser errors like
28n/a * "parser.ParserError: Expected node type 12, got 333."
29n/a * decode symbol numbers using the automatically-generated files
30n/a * Lib/symbol.h and Include/token.h.
31n/a */
32n/a
33n/a#include "Python.h" /* general Python API */
34n/a#include "Python-ast.h" /* mod_ty */
35n/a#include "graminit.h" /* symbols defined in the grammar */
36n/a#include "node.h" /* internal parser structure */
37n/a#include "errcode.h" /* error codes for PyNode_*() */
38n/a#include "token.h" /* token definitions */
39n/a#include "grammar.h"
40n/a#include "parsetok.h"
41n/a /* ISTERMINAL() / ISNONTERMINAL() */
42n/a#undef Yield
43n/a#include "ast.h"
44n/a
45n/aextern grammar _PyParser_Grammar; /* From graminit.c */
46n/a
47n/a#ifdef lint
48n/a#include <note.h>
49n/a#else
50n/a#define NOTE(x)
51n/a#endif
52n/a
53n/a/* String constants used to initialize module attributes.
54n/a *
55n/a */
56n/astatic const char parser_copyright_string[] =
57n/a"Copyright 1995-1996 by Virginia Polytechnic Institute & State\n\
58n/aUniversity, Blacksburg, Virginia, USA, and Fred L. Drake, Jr., Reston,\n\
59n/aVirginia, USA. Portions copyright 1991-1995 by Stichting Mathematisch\n\
60n/aCentrum, Amsterdam, The Netherlands.";
61n/a
62n/a
63n/aPyDoc_STRVAR(parser_doc_string,
64n/a"This is an interface to Python's internal parser.");
65n/a
66n/astatic const char parser_version_string[] = "0.5";
67n/a
68n/a
69n/atypedef PyObject* (*SeqMaker) (Py_ssize_t length);
70n/atypedef int (*SeqInserter) (PyObject* sequence,
71n/a Py_ssize_t index,
72n/a PyObject* element);
73n/a
74n/a/* The function below is copyrighted by Stichting Mathematisch Centrum. The
75n/a * original copyright statement is included below, and continues to apply
76n/a * in full to the function immediately following. All other material is
77n/a * original, copyrighted by Fred L. Drake, Jr. and Virginia Polytechnic
78n/a * Institute and State University. Changes were made to comply with the
79n/a * new naming conventions. Added arguments to provide support for creating
80n/a * lists as well as tuples, and optionally including the line numbers.
81n/a */
82n/a
83n/a
84n/astatic PyObject*
85n/anode2tuple(node *n, /* node to convert */
86n/a SeqMaker mkseq, /* create sequence */
87n/a SeqInserter addelem, /* func. to add elem. in seq. */
88n/a int lineno, /* include line numbers? */
89n/a int col_offset) /* include column offsets? */
90n/a{
91n/a PyObject *result = NULL, *w;
92n/a
93n/a if (n == NULL) {
94n/a Py_RETURN_NONE;
95n/a }
96n/a
97n/a if (ISNONTERMINAL(TYPE(n))) {
98n/a int i;
99n/a
100n/a result = mkseq(1 + NCH(n) + (TYPE(n) == encoding_decl));
101n/a if (result == NULL)
102n/a goto error;
103n/a
104n/a w = PyLong_FromLong(TYPE(n));
105n/a if (w == NULL)
106n/a goto error;
107n/a (void) addelem(result, 0, w);
108n/a
109n/a for (i = 0; i < NCH(n); i++) {
110n/a w = node2tuple(CHILD(n, i), mkseq, addelem, lineno, col_offset);
111n/a if (w == NULL)
112n/a goto error;
113n/a (void) addelem(result, i+1, w);
114n/a }
115n/a
116n/a if (TYPE(n) == encoding_decl) {
117n/a w = PyUnicode_FromString(STR(n));
118n/a if (w == NULL)
119n/a goto error;
120n/a (void) addelem(result, i+1, w);
121n/a }
122n/a }
123n/a else if (ISTERMINAL(TYPE(n))) {
124n/a result = mkseq(2 + lineno + col_offset);
125n/a if (result == NULL)
126n/a goto error;
127n/a
128n/a w = PyLong_FromLong(TYPE(n));
129n/a if (w == NULL)
130n/a goto error;
131n/a (void) addelem(result, 0, w);
132n/a
133n/a w = PyUnicode_FromString(STR(n));
134n/a if (w == NULL)
135n/a goto error;
136n/a (void) addelem(result, 1, w);
137n/a
138n/a if (lineno == 1) {
139n/a w = PyLong_FromLong(n->n_lineno);
140n/a if (w == NULL)
141n/a goto error;
142n/a (void) addelem(result, 2, w);
143n/a }
144n/a
145n/a if (col_offset == 1) {
146n/a w = PyLong_FromLong(n->n_col_offset);
147n/a if (w == NULL)
148n/a goto error;
149n/a (void) addelem(result, 3, w);
150n/a }
151n/a }
152n/a else {
153n/a PyErr_SetString(PyExc_SystemError,
154n/a "unrecognized parse tree node type");
155n/a return ((PyObject*) NULL);
156n/a }
157n/a return result;
158n/a
159n/aerror:
160n/a Py_XDECREF(result);
161n/a return NULL;
162n/a}
163n/a/*
164n/a * End of material copyrighted by Stichting Mathematisch Centrum.
165n/a */
166n/a
167n/a
168n/a
169n/a/* There are two types of intermediate objects we're interested in:
170n/a * 'eval' and 'exec' types. These constants can be used in the st_type
171n/a * field of the object type to identify which any given object represents.
172n/a * These should probably go in an external header to allow other extensions
173n/a * to use them, but then, we really should be using C++ too. ;-)
174n/a */
175n/a
176n/a#define PyST_EXPR 1
177n/a#define PyST_SUITE 2
178n/a
179n/a
180n/a/* These are the internal objects and definitions required to implement the
181n/a * ST type. Most of the internal names are more reminiscent of the 'old'
182n/a * naming style, but the code uses the new naming convention.
183n/a */
184n/a
185n/astatic PyObject*
186n/aparser_error = 0;
187n/a
188n/a
189n/atypedef struct {
190n/a PyObject_HEAD /* standard object header */
191n/a node* st_node; /* the node* returned by the parser */
192n/a int st_type; /* EXPR or SUITE ? */
193n/a PyCompilerFlags st_flags; /* Parser and compiler flags */
194n/a} PyST_Object;
195n/a
196n/a
197n/astatic void parser_free(PyST_Object *st);
198n/astatic PyObject* parser_sizeof(PyST_Object *, void *);
199n/astatic PyObject* parser_richcompare(PyObject *left, PyObject *right, int op);
200n/astatic PyObject* parser_compilest(PyST_Object *, PyObject *, PyObject *);
201n/astatic PyObject* parser_isexpr(PyST_Object *, PyObject *, PyObject *);
202n/astatic PyObject* parser_issuite(PyST_Object *, PyObject *, PyObject *);
203n/astatic PyObject* parser_st2list(PyST_Object *, PyObject *, PyObject *);
204n/astatic PyObject* parser_st2tuple(PyST_Object *, PyObject *, PyObject *);
205n/a
206n/a#define PUBLIC_METHOD_TYPE (METH_VARARGS|METH_KEYWORDS)
207n/a
208n/astatic PyMethodDef parser_methods[] = {
209n/a {"compile", (PyCFunction)parser_compilest, PUBLIC_METHOD_TYPE,
210n/a PyDoc_STR("Compile this ST object into a code object.")},
211n/a {"isexpr", (PyCFunction)parser_isexpr, PUBLIC_METHOD_TYPE,
212n/a PyDoc_STR("Determines if this ST object was created from an expression.")},
213n/a {"issuite", (PyCFunction)parser_issuite, PUBLIC_METHOD_TYPE,
214n/a PyDoc_STR("Determines if this ST object was created from a suite.")},
215n/a {"tolist", (PyCFunction)parser_st2list, PUBLIC_METHOD_TYPE,
216n/a PyDoc_STR("Creates a list-tree representation of this ST.")},
217n/a {"totuple", (PyCFunction)parser_st2tuple, PUBLIC_METHOD_TYPE,
218n/a PyDoc_STR("Creates a tuple-tree representation of this ST.")},
219n/a {"__sizeof__", (PyCFunction)parser_sizeof, METH_NOARGS,
220n/a PyDoc_STR("Returns size in memory, in bytes.")},
221n/a {NULL, NULL, 0, NULL}
222n/a};
223n/a
224n/astatic
225n/aPyTypeObject PyST_Type = {
226n/a PyVarObject_HEAD_INIT(NULL, 0)
227n/a "parser.st", /* tp_name */
228n/a (int) sizeof(PyST_Object), /* tp_basicsize */
229n/a 0, /* tp_itemsize */
230n/a (destructor)parser_free, /* tp_dealloc */
231n/a 0, /* tp_print */
232n/a 0, /* tp_getattr */
233n/a 0, /* tp_setattr */
234n/a 0, /* tp_reserved */
235n/a 0, /* tp_repr */
236n/a 0, /* tp_as_number */
237n/a 0, /* tp_as_sequence */
238n/a 0, /* tp_as_mapping */
239n/a 0, /* tp_hash */
240n/a 0, /* tp_call */
241n/a 0, /* tp_str */
242n/a 0, /* tp_getattro */
243n/a 0, /* tp_setattro */
244n/a
245n/a /* Functions to access object as input/output buffer */
246n/a 0, /* tp_as_buffer */
247n/a
248n/a Py_TPFLAGS_DEFAULT, /* tp_flags */
249n/a
250n/a /* __doc__ */
251n/a "Intermediate representation of a Python parse tree.",
252n/a 0, /* tp_traverse */
253n/a 0, /* tp_clear */
254n/a parser_richcompare, /* tp_richcompare */
255n/a 0, /* tp_weaklistoffset */
256n/a 0, /* tp_iter */
257n/a 0, /* tp_iternext */
258n/a parser_methods, /* tp_methods */
259n/a}; /* PyST_Type */
260n/a
261n/a
262n/a/* PyST_Type isn't subclassable, so just check ob_type */
263n/a#define PyST_Object_Check(v) ((v)->ob_type == &PyST_Type)
264n/a
265n/astatic int
266n/aparser_compare_nodes(node *left, node *right)
267n/a{
268n/a int j;
269n/a
270n/a if (TYPE(left) < TYPE(right))
271n/a return (-1);
272n/a
273n/a if (TYPE(right) < TYPE(left))
274n/a return (1);
275n/a
276n/a if (ISTERMINAL(TYPE(left)))
277n/a return (strcmp(STR(left), STR(right)));
278n/a
279n/a if (NCH(left) < NCH(right))
280n/a return (-1);
281n/a
282n/a if (NCH(right) < NCH(left))
283n/a return (1);
284n/a
285n/a for (j = 0; j < NCH(left); ++j) {
286n/a int v = parser_compare_nodes(CHILD(left, j), CHILD(right, j));
287n/a
288n/a if (v != 0)
289n/a return (v);
290n/a }
291n/a return (0);
292n/a}
293n/a
294n/a/* parser_richcompare(PyObject* left, PyObject* right, int op)
295n/a *
296n/a * Comparison function used by the Python operators ==, !=, <, >, <=, >=
297n/a * This really just wraps a call to parser_compare_nodes() with some easy
298n/a * checks and protection code.
299n/a *
300n/a */
301n/a
302n/a#define TEST_COND(cond) ((cond) ? Py_True : Py_False)
303n/a
304n/astatic PyObject *
305n/aparser_richcompare(PyObject *left, PyObject *right, int op)
306n/a{
307n/a int result;
308n/a PyObject *v;
309n/a
310n/a /* neither argument should be NULL, unless something's gone wrong */
311n/a if (left == NULL || right == NULL) {
312n/a PyErr_BadInternalCall();
313n/a return NULL;
314n/a }
315n/a
316n/a /* both arguments should be instances of PyST_Object */
317n/a if (!PyST_Object_Check(left) || !PyST_Object_Check(right)) {
318n/a v = Py_NotImplemented;
319n/a goto finished;
320n/a }
321n/a
322n/a if (left == right)
323n/a /* if arguments are identical, they're equal */
324n/a result = 0;
325n/a else
326n/a result = parser_compare_nodes(((PyST_Object *)left)->st_node,
327n/a ((PyST_Object *)right)->st_node);
328n/a
329n/a /* Convert return value to a Boolean */
330n/a switch (op) {
331n/a case Py_EQ:
332n/a v = TEST_COND(result == 0);
333n/a break;
334n/a case Py_NE:
335n/a v = TEST_COND(result != 0);
336n/a break;
337n/a case Py_LE:
338n/a v = TEST_COND(result <= 0);
339n/a break;
340n/a case Py_GE:
341n/a v = TEST_COND(result >= 0);
342n/a break;
343n/a case Py_LT:
344n/a v = TEST_COND(result < 0);
345n/a break;
346n/a case Py_GT:
347n/a v = TEST_COND(result > 0);
348n/a break;
349n/a default:
350n/a PyErr_BadArgument();
351n/a return NULL;
352n/a }
353n/a finished:
354n/a Py_INCREF(v);
355n/a return v;
356n/a}
357n/a
358n/a/* parser_newstobject(node* st)
359n/a *
360n/a * Allocates a new Python object representing an ST. This is simply the
361n/a * 'wrapper' object that holds a node* and allows it to be passed around in
362n/a * Python code.
363n/a *
364n/a */
365n/astatic PyObject*
366n/aparser_newstobject(node *st, int type)
367n/a{
368n/a PyST_Object* o = PyObject_New(PyST_Object, &PyST_Type);
369n/a
370n/a if (o != 0) {
371n/a o->st_node = st;
372n/a o->st_type = type;
373n/a o->st_flags.cf_flags = 0;
374n/a }
375n/a else {
376n/a PyNode_Free(st);
377n/a }
378n/a return ((PyObject*)o);
379n/a}
380n/a
381n/a
382n/a/* void parser_free(PyST_Object* st)
383n/a *
384n/a * This is called by a del statement that reduces the reference count to 0.
385n/a *
386n/a */
387n/astatic void
388n/aparser_free(PyST_Object *st)
389n/a{
390n/a PyNode_Free(st->st_node);
391n/a PyObject_Del(st);
392n/a}
393n/a
394n/astatic PyObject *
395n/aparser_sizeof(PyST_Object *st, void *unused)
396n/a{
397n/a Py_ssize_t res;
398n/a
399n/a res = _PyObject_SIZE(Py_TYPE(st)) + _PyNode_SizeOf(st->st_node);
400n/a return PyLong_FromSsize_t(res);
401n/a}
402n/a
403n/a
404n/a/* parser_st2tuple(PyObject* self, PyObject* args, PyObject* kw)
405n/a *
406n/a * This provides conversion from a node* to a tuple object that can be
407n/a * returned to the Python-level caller. The ST object is not modified.
408n/a *
409n/a */
410n/astatic PyObject*
411n/aparser_st2tuple(PyST_Object *self, PyObject *args, PyObject *kw)
412n/a{
413n/a int line_info = 0;
414n/a int col_info = 0;
415n/a PyObject *res = 0;
416n/a int ok;
417n/a
418n/a static char *keywords[] = {"st", "line_info", "col_info", NULL};
419n/a
420n/a if (self == NULL || PyModule_Check(self)) {
421n/a ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|pp:st2tuple", keywords,
422n/a &PyST_Type, &self, &line_info,
423n/a &col_info);
424n/a }
425n/a else
426n/a ok = PyArg_ParseTupleAndKeywords(args, kw, "|pp:totuple", &keywords[1],
427n/a &line_info, &col_info);
428n/a if (ok != 0) {
429n/a /*
430n/a * Convert ST into a tuple representation. Use Guido's function,
431n/a * since it's known to work already.
432n/a */
433n/a res = node2tuple(((PyST_Object*)self)->st_node,
434n/a PyTuple_New, PyTuple_SetItem, line_info, col_info);
435n/a }
436n/a return (res);
437n/a}
438n/a
439n/a
440n/a/* parser_st2list(PyObject* self, PyObject* args, PyObject* kw)
441n/a *
442n/a * This provides conversion from a node* to a list object that can be
443n/a * returned to the Python-level caller. The ST object is not modified.
444n/a *
445n/a */
446n/astatic PyObject*
447n/aparser_st2list(PyST_Object *self, PyObject *args, PyObject *kw)
448n/a{
449n/a int line_info = 0;
450n/a int col_info = 0;
451n/a PyObject *res = 0;
452n/a int ok;
453n/a
454n/a static char *keywords[] = {"st", "line_info", "col_info", NULL};
455n/a
456n/a if (self == NULL || PyModule_Check(self))
457n/a ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|pp:st2list", keywords,
458n/a &PyST_Type, &self, &line_info,
459n/a &col_info);
460n/a else
461n/a ok = PyArg_ParseTupleAndKeywords(args, kw, "|pp:tolist", &keywords[1],
462n/a &line_info, &col_info);
463n/a if (ok) {
464n/a /*
465n/a * Convert ST into a tuple representation. Use Guido's function,
466n/a * since it's known to work already.
467n/a */
468n/a res = node2tuple(self->st_node,
469n/a PyList_New, PyList_SetItem, line_info, col_info);
470n/a }
471n/a return (res);
472n/a}
473n/a
474n/a
475n/a/* parser_compilest(PyObject* self, PyObject* args)
476n/a *
477n/a * This function creates code objects from the parse tree represented by
478n/a * the passed-in data object. An optional file name is passed in as well.
479n/a *
480n/a */
481n/astatic PyObject*
482n/aparser_compilest(PyST_Object *self, PyObject *args, PyObject *kw)
483n/a{
484n/a PyObject* res = NULL;
485n/a PyArena* arena = NULL;
486n/a mod_ty mod;
487n/a PyObject* filename = NULL;
488n/a int ok;
489n/a
490n/a static char *keywords[] = {"st", "filename", NULL};
491n/a
492n/a if (self == NULL || PyModule_Check(self))
493n/a ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|O&:compilest", keywords,
494n/a &PyST_Type, &self,
495n/a PyUnicode_FSDecoder, &filename);
496n/a else
497n/a ok = PyArg_ParseTupleAndKeywords(args, kw, "|O&:compile", &keywords[1],
498n/a PyUnicode_FSDecoder, &filename);
499n/a if (!ok)
500n/a goto error;
501n/a
502n/a if (filename == NULL) {
503n/a filename = PyUnicode_FromString("<syntax-tree>");
504n/a if (filename == NULL)
505n/a goto error;
506n/a }
507n/a
508n/a arena = PyArena_New();
509n/a if (!arena)
510n/a goto error;
511n/a
512n/a mod = PyAST_FromNodeObject(self->st_node, &self->st_flags,
513n/a filename, arena);
514n/a if (!mod)
515n/a goto error;
516n/a
517n/a res = (PyObject *)PyAST_CompileObject(mod, filename,
518n/a &self->st_flags, -1, arena);
519n/aerror:
520n/a Py_XDECREF(filename);
521n/a if (arena != NULL)
522n/a PyArena_Free(arena);
523n/a return res;
524n/a}
525n/a
526n/a
527n/a/* PyObject* parser_isexpr(PyObject* self, PyObject* args)
528n/a * PyObject* parser_issuite(PyObject* self, PyObject* args)
529n/a *
530n/a * Checks the passed-in ST object to determine if it is an expression or
531n/a * a statement suite, respectively. The return is a Python truth value.
532n/a *
533n/a */
534n/astatic PyObject*
535n/aparser_isexpr(PyST_Object *self, PyObject *args, PyObject *kw)
536n/a{
537n/a PyObject* res = 0;
538n/a int ok;
539n/a
540n/a static char *keywords[] = {"st", NULL};
541n/a
542n/a if (self == NULL || PyModule_Check(self))
543n/a ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:isexpr", keywords,
544n/a &PyST_Type, &self);
545n/a else
546n/a ok = PyArg_ParseTupleAndKeywords(args, kw, ":isexpr", &keywords[1]);
547n/a
548n/a if (ok) {
549n/a /* Check to see if the ST represents an expression or not. */
550n/a res = (self->st_type == PyST_EXPR) ? Py_True : Py_False;
551n/a Py_INCREF(res);
552n/a }
553n/a return (res);
554n/a}
555n/a
556n/a
557n/astatic PyObject*
558n/aparser_issuite(PyST_Object *self, PyObject *args, PyObject *kw)
559n/a{
560n/a PyObject* res = 0;
561n/a int ok;
562n/a
563n/a static char *keywords[] = {"st", NULL};
564n/a
565n/a if (self == NULL || PyModule_Check(self))
566n/a ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:issuite", keywords,
567n/a &PyST_Type, &self);
568n/a else
569n/a ok = PyArg_ParseTupleAndKeywords(args, kw, ":issuite", &keywords[1]);
570n/a
571n/a if (ok) {
572n/a /* Check to see if the ST represents an expression or not. */
573n/a res = (self->st_type == PyST_EXPR) ? Py_False : Py_True;
574n/a Py_INCREF(res);
575n/a }
576n/a return (res);
577n/a}
578n/a
579n/a
580n/a/* err_string(const char* message)
581n/a *
582n/a * Sets the error string for an exception of type ParserError.
583n/a *
584n/a */
585n/astatic void
586n/aerr_string(const char *message)
587n/a{
588n/a PyErr_SetString(parser_error, message);
589n/a}
590n/a
591n/a
592n/a/* PyObject* parser_do_parse(PyObject* args, int type)
593n/a *
594n/a * Internal function to actually execute the parse and return the result if
595n/a * successful or set an exception if not.
596n/a *
597n/a */
598n/astatic PyObject*
599n/aparser_do_parse(PyObject *args, PyObject *kw, const char *argspec, int type)
600n/a{
601n/a char* string = 0;
602n/a PyObject* res = 0;
603n/a int flags = 0;
604n/a perrdetail err;
605n/a
606n/a static char *keywords[] = {"source", NULL};
607n/a
608n/a if (PyArg_ParseTupleAndKeywords(args, kw, argspec, keywords, &string)) {
609n/a node* n = PyParser_ParseStringFlagsFilenameEx(string, NULL,
610n/a &_PyParser_Grammar,
611n/a (type == PyST_EXPR)
612n/a ? eval_input : file_input,
613n/a &err, &flags);
614n/a
615n/a if (n) {
616n/a res = parser_newstobject(n, type);
617n/a if (res)
618n/a ((PyST_Object *)res)->st_flags.cf_flags = flags & PyCF_MASK;
619n/a }
620n/a else {
621n/a PyParser_SetError(&err);
622n/a }
623n/a PyParser_ClearError(&err);
624n/a }
625n/a return (res);
626n/a}
627n/a
628n/a
629n/a/* PyObject* parser_expr(PyObject* self, PyObject* args)
630n/a * PyObject* parser_suite(PyObject* self, PyObject* args)
631n/a *
632n/a * External interfaces to the parser itself. Which is called determines if
633n/a * the parser attempts to recognize an expression ('eval' form) or statement
634n/a * suite ('exec' form). The real work is done by parser_do_parse() above.
635n/a *
636n/a */
637n/astatic PyObject*
638n/aparser_expr(PyST_Object *self, PyObject *args, PyObject *kw)
639n/a{
640n/a NOTE(ARGUNUSED(self))
641n/a return (parser_do_parse(args, kw, "s:expr", PyST_EXPR));
642n/a}
643n/a
644n/a
645n/astatic PyObject*
646n/aparser_suite(PyST_Object *self, PyObject *args, PyObject *kw)
647n/a{
648n/a NOTE(ARGUNUSED(self))
649n/a return (parser_do_parse(args, kw, "s:suite", PyST_SUITE));
650n/a}
651n/a
652n/a
653n/a
654n/a/* This is the messy part of the code. Conversion from a tuple to an ST
655n/a * object requires that the input tuple be valid without having to rely on
656n/a * catching an exception from the compiler. This is done to allow the
657n/a * compiler itself to remain fast, since most of its input will come from
658n/a * the parser directly, and therefore be known to be syntactically correct.
659n/a * This validation is done to ensure that we don't core dump the compile
660n/a * phase, returning an exception instead.
661n/a *
662n/a * Two aspects can be broken out in this code: creating a node tree from
663n/a * the tuple passed in, and verifying that it is indeed valid. It may be
664n/a * advantageous to expand the number of ST types to include funcdefs and
665n/a * lambdadefs to take advantage of the optimizer, recognizing those STs
666n/a * here. They are not necessary, and not quite as useful in a raw form.
667n/a * For now, let's get expressions and suites working reliably.
668n/a */
669n/a
670n/a
671n/astatic node* build_node_tree(PyObject *tuple);
672n/a
673n/astatic int
674n/avalidate_node(node *tree)
675n/a{
676n/a int type = TYPE(tree);
677n/a int nch = NCH(tree);
678n/a dfa *nt_dfa;
679n/a state *dfa_state;
680n/a int pos, arc;
681n/a
682n/a assert(ISNONTERMINAL(type));
683n/a type -= NT_OFFSET;
684n/a if (type >= _PyParser_Grammar.g_ndfas) {
685n/a PyErr_Format(parser_error, "Unrecognized node type %d.", TYPE(tree));
686n/a return 0;
687n/a }
688n/a nt_dfa = &_PyParser_Grammar.g_dfa[type];
689n/a REQ(tree, nt_dfa->d_type);
690n/a
691n/a /* Run the DFA for this nonterminal. */
692n/a dfa_state = &nt_dfa->d_state[nt_dfa->d_initial];
693n/a for (pos = 0; pos < nch; ++pos) {
694n/a node *ch = CHILD(tree, pos);
695n/a int ch_type = TYPE(ch);
696n/a for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
697n/a short a_label = dfa_state->s_arc[arc].a_lbl;
698n/a assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels);
699n/a if (_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type) {
700n/a /* The child is acceptable; if non-terminal, validate it recursively. */
701n/a if (ISNONTERMINAL(ch_type) && !validate_node(ch))
702n/a return 0;
703n/a
704n/a /* Update the state, and move on to the next child. */
705n/a dfa_state = &nt_dfa->d_state[dfa_state->s_arc[arc].a_arrow];
706n/a goto arc_found;
707n/a }
708n/a }
709n/a /* What would this state have accepted? */
710n/a {
711n/a short a_label = dfa_state->s_arc->a_lbl;
712n/a int next_type;
713n/a if (!a_label) /* Wouldn't accept any more children */
714n/a goto illegal_num_children;
715n/a
716n/a next_type = _PyParser_Grammar.g_ll.ll_label[a_label].lb_type;
717n/a if (ISNONTERMINAL(next_type))
718n/a PyErr_Format(parser_error, "Expected node type %d, got %d.",
719n/a next_type, ch_type);
720n/a else
721n/a PyErr_Format(parser_error, "Illegal terminal: expected %s.",
722n/a _PyParser_TokenNames[next_type]);
723n/a return 0;
724n/a }
725n/a
726n/aarc_found:
727n/a continue;
728n/a }
729n/a /* Are we in a final state? If so, return 1 for successful validation. */
730n/a for (arc = 0; arc < dfa_state->s_narcs; ++arc) {
731n/a if (!dfa_state->s_arc[arc].a_lbl) {
732n/a return 1;
733n/a }
734n/a }
735n/a
736n/aillegal_num_children:
737n/a PyErr_Format(parser_error,
738n/a "Illegal number of children for %s node.", nt_dfa->d_name);
739n/a return 0;
740n/a}
741n/a
742n/a/* PyObject* parser_tuple2st(PyObject* self, PyObject* args)
743n/a *
744n/a * This is the public function, called from the Python code. It receives a
745n/a * single tuple object from the caller, and creates an ST object if the
746n/a * tuple can be validated. It does this by checking the first code of the
747n/a * tuple, and, if acceptable, builds the internal representation. If this
748n/a * step succeeds, the internal representation is validated as fully as
749n/a * possible with the recursive validate_node() routine defined above.
750n/a *
751n/a * This function must be changed if support is to be added for PyST_FRAGMENT
752n/a * ST objects.
753n/a *
754n/a */
755n/astatic PyObject*
756n/aparser_tuple2st(PyST_Object *self, PyObject *args, PyObject *kw)
757n/a{
758n/a NOTE(ARGUNUSED(self))
759n/a PyObject *st = 0;
760n/a PyObject *tuple;
761n/a node *tree;
762n/a
763n/a static char *keywords[] = {"sequence", NULL};
764n/a
765n/a if (!PyArg_ParseTupleAndKeywords(args, kw, "O:sequence2st", keywords,
766n/a &tuple))
767n/a return (0);
768n/a if (!PySequence_Check(tuple)) {
769n/a PyErr_SetString(PyExc_ValueError,
770n/a "sequence2st() requires a single sequence argument");
771n/a return (0);
772n/a }
773n/a /*
774n/a * Convert the tree to the internal form before checking it.
775n/a */
776n/a tree = build_node_tree(tuple);
777n/a if (tree != 0) {
778n/a node *validation_root = tree;
779n/a int tree_type = 0;
780n/a switch (TYPE(tree)) {
781n/a case eval_input:
782n/a /* Might be an eval form. */
783n/a tree_type = PyST_EXPR;
784n/a break;
785n/a case encoding_decl:
786n/a /* This looks like an encoding_decl so far. */
787n/a if (NCH(tree) != 1)
788n/a err_string("Error Parsing encoding_decl");
789n/a validation_root = CHILD(tree, 0);
790n/a /* Fall through */
791n/a case file_input:
792n/a /* This looks like an exec form so far. */
793n/a
794n/a tree_type = PyST_SUITE;
795n/a break;
796n/a default:
797n/a /* This is a fragment, at best. */
798n/a PyNode_Free(tree);
799n/a err_string("parse tree does not use a valid start symbol");
800n/a return (0);
801n/a }
802n/a
803n/a if (validate_node(validation_root))
804n/a st = parser_newstobject(tree, tree_type);
805n/a else
806n/a PyNode_Free(tree);
807n/a }
808n/a /* Make sure we raise an exception on all errors. We should never
809n/a * get this, but we'd do well to be sure something is done.
810n/a */
811n/a if (st == NULL && !PyErr_Occurred())
812n/a err_string("unspecified ST error occurred");
813n/a
814n/a return st;
815n/a}
816n/a
817n/a
818n/a/* node* build_node_children()
819n/a *
820n/a * Iterate across the children of the current non-terminal node and build
821n/a * their structures. If successful, return the root of this portion of
822n/a * the tree, otherwise, 0. Any required exception will be specified already,
823n/a * and no memory will have been deallocated.
824n/a *
825n/a */
826n/astatic node*
827n/abuild_node_children(PyObject *tuple, node *root, int *line_num)
828n/a{
829n/a Py_ssize_t len = PyObject_Size(tuple);
830n/a Py_ssize_t i;
831n/a int err;
832n/a
833n/a for (i = 1; i < len; ++i) {
834n/a /* elem must always be a sequence, however simple */
835n/a PyObject* elem = PySequence_GetItem(tuple, i);
836n/a int ok = elem != NULL;
837n/a int type = 0;
838n/a char *strn = 0;
839n/a
840n/a if (ok)
841n/a ok = PySequence_Check(elem);
842n/a if (ok) {
843n/a PyObject *temp = PySequence_GetItem(elem, 0);
844n/a if (temp == NULL)
845n/a ok = 0;
846n/a else {
847n/a ok = PyLong_Check(temp);
848n/a if (ok) {
849n/a type = _PyLong_AsInt(temp);
850n/a if (type == -1 && PyErr_Occurred()) {
851n/a Py_DECREF(temp);
852n/a Py_DECREF(elem);
853n/a return 0;
854n/a }
855n/a }
856n/a Py_DECREF(temp);
857n/a }
858n/a }
859n/a if (!ok) {
860n/a PyObject *err = Py_BuildValue("Os", elem,
861n/a "Illegal node construct.");
862n/a PyErr_SetObject(parser_error, err);
863n/a Py_XDECREF(err);
864n/a Py_XDECREF(elem);
865n/a return (0);
866n/a }
867n/a if (ISTERMINAL(type)) {
868n/a Py_ssize_t len = PyObject_Size(elem);
869n/a PyObject *temp;
870n/a const char *temp_str;
871n/a
872n/a if ((len != 2) && (len != 3)) {
873n/a err_string("terminal nodes must have 2 or 3 entries");
874n/a return 0;
875n/a }
876n/a temp = PySequence_GetItem(elem, 1);
877n/a if (temp == NULL)
878n/a return 0;
879n/a if (!PyUnicode_Check(temp)) {
880n/a PyErr_Format(parser_error,
881n/a "second item in terminal node must be a string,"
882n/a " found %s",
883n/a Py_TYPE(temp)->tp_name);
884n/a Py_DECREF(temp);
885n/a Py_DECREF(elem);
886n/a return 0;
887n/a }
888n/a if (len == 3) {
889n/a PyObject *o = PySequence_GetItem(elem, 2);
890n/a if (o != NULL) {
891n/a if (PyLong_Check(o)) {
892n/a int num = _PyLong_AsInt(o);
893n/a if (num == -1 && PyErr_Occurred()) {
894n/a Py_DECREF(o);
895n/a Py_DECREF(temp);
896n/a Py_DECREF(elem);
897n/a return 0;
898n/a }
899n/a *line_num = num;
900n/a }
901n/a else {
902n/a PyErr_Format(parser_error,
903n/a "third item in terminal node must be an"
904n/a " integer, found %s",
905n/a Py_TYPE(temp)->tp_name);
906n/a Py_DECREF(o);
907n/a Py_DECREF(temp);
908n/a Py_DECREF(elem);
909n/a return 0;
910n/a }
911n/a Py_DECREF(o);
912n/a }
913n/a }
914n/a temp_str = PyUnicode_AsUTF8AndSize(temp, &len);
915n/a if (temp_str == NULL) {
916n/a Py_DECREF(temp);
917n/a Py_XDECREF(elem);
918n/a return 0;
919n/a }
920n/a strn = (char *)PyObject_MALLOC(len + 1);
921n/a if (strn == NULL) {
922n/a Py_DECREF(temp);
923n/a Py_XDECREF(elem);
924n/a PyErr_NoMemory();
925n/a return 0;
926n/a }
927n/a (void) memcpy(strn, temp_str, len + 1);
928n/a Py_DECREF(temp);
929n/a }
930n/a else if (!ISNONTERMINAL(type)) {
931n/a /*
932n/a * It has to be one or the other; this is an error.
933n/a * Raise an exception.
934n/a */
935n/a PyObject *err = Py_BuildValue("os", elem, "unknown node type.");
936n/a PyErr_SetObject(parser_error, err);
937n/a Py_XDECREF(err);
938n/a Py_XDECREF(elem);
939n/a return (0);
940n/a }
941n/a err = PyNode_AddChild(root, type, strn, *line_num, 0);
942n/a if (err == E_NOMEM) {
943n/a Py_XDECREF(elem);
944n/a PyObject_FREE(strn);
945n/a return (node *) PyErr_NoMemory();
946n/a }
947n/a if (err == E_OVERFLOW) {
948n/a Py_XDECREF(elem);
949n/a PyObject_FREE(strn);
950n/a PyErr_SetString(PyExc_ValueError,
951n/a "unsupported number of child nodes");
952n/a return NULL;
953n/a }
954n/a
955n/a if (ISNONTERMINAL(type)) {
956n/a node* new_child = CHILD(root, i - 1);
957n/a
958n/a if (new_child != build_node_children(elem, new_child, line_num)) {
959n/a Py_XDECREF(elem);
960n/a return (0);
961n/a }
962n/a }
963n/a else if (type == NEWLINE) { /* It's true: we increment the */
964n/a ++(*line_num); /* line number *after* the newline! */
965n/a }
966n/a Py_XDECREF(elem);
967n/a }
968n/a return root;
969n/a}
970n/a
971n/a
972n/astatic node*
973n/abuild_node_tree(PyObject *tuple)
974n/a{
975n/a node* res = 0;
976n/a PyObject *temp = PySequence_GetItem(tuple, 0);
977n/a long num = -1;
978n/a
979n/a if (temp != NULL)
980n/a num = PyLong_AsLong(temp);
981n/a Py_XDECREF(temp);
982n/a if (ISTERMINAL(num)) {
983n/a /*
984n/a * The tuple is simple, but it doesn't start with a start symbol.
985n/a * Raise an exception now and be done with it.
986n/a */
987n/a tuple = Py_BuildValue("Os", tuple,
988n/a "Illegal syntax-tree; cannot start with terminal symbol.");
989n/a PyErr_SetObject(parser_error, tuple);
990n/a Py_XDECREF(tuple);
991n/a }
992n/a else if (ISNONTERMINAL(num)) {
993n/a /*
994n/a * Not efficient, but that can be handled later.
995n/a */
996n/a int line_num = 0;
997n/a PyObject *encoding = NULL;
998n/a
999n/a if (num == encoding_decl) {
1000n/a encoding = PySequence_GetItem(tuple, 2);
1001n/a /* tuple isn't borrowed anymore here, need to DECREF */
1002n/a tuple = PySequence_GetSlice(tuple, 0, 2);
1003n/a if (tuple == NULL)
1004n/a return NULL;
1005n/a }
1006n/a res = PyNode_New(num);
1007n/a if (res != NULL) {
1008n/a if (res != build_node_children(tuple, res, &line_num)) {
1009n/a PyNode_Free(res);
1010n/a res = NULL;
1011n/a }
1012n/a if (res && encoding) {
1013n/a Py_ssize_t len;
1014n/a const char *temp;
1015n/a temp = PyUnicode_AsUTF8AndSize(encoding, &len);
1016n/a if (temp == NULL) {
1017n/a Py_DECREF(res);
1018n/a Py_DECREF(encoding);
1019n/a Py_DECREF(tuple);
1020n/a return NULL;
1021n/a }
1022n/a res->n_str = (char *)PyObject_MALLOC(len + 1);
1023n/a if (res->n_str == NULL) {
1024n/a Py_DECREF(res);
1025n/a Py_DECREF(encoding);
1026n/a Py_DECREF(tuple);
1027n/a PyErr_NoMemory();
1028n/a return NULL;
1029n/a }
1030n/a (void) memcpy(res->n_str, temp, len + 1);
1031n/a Py_DECREF(encoding);
1032n/a Py_DECREF(tuple);
1033n/a }
1034n/a }
1035n/a }
1036n/a else {
1037n/a /* The tuple is illegal -- if the number is neither TERMINAL nor
1038n/a * NONTERMINAL, we can't use it. Not sure the implementation
1039n/a * allows this condition, but the API doesn't preclude it.
1040n/a */
1041n/a PyObject *err = Py_BuildValue("os", tuple,
1042n/a "Illegal component tuple.");
1043n/a PyErr_SetObject(parser_error, err);
1044n/a Py_XDECREF(err);
1045n/a }
1046n/a
1047n/a return (res);
1048n/a}
1049n/a
1050n/a
1051n/astatic PyObject*
1052n/apickle_constructor = NULL;
1053n/a
1054n/a
1055n/astatic PyObject*
1056n/aparser__pickler(PyObject *self, PyObject *args)
1057n/a{
1058n/a NOTE(ARGUNUSED(self))
1059n/a PyObject *result = NULL;
1060n/a PyObject *st = NULL;
1061n/a PyObject *empty_dict = NULL;
1062n/a
1063n/a if (PyArg_ParseTuple(args, "O!:_pickler", &PyST_Type, &st)) {
1064n/a PyObject *newargs;
1065n/a PyObject *tuple;
1066n/a
1067n/a if ((empty_dict = PyDict_New()) == NULL)
1068n/a goto finally;
1069n/a if ((newargs = Py_BuildValue("Oi", st, 1)) == NULL)
1070n/a goto finally;
1071n/a tuple = parser_st2tuple((PyST_Object*)NULL, newargs, empty_dict);
1072n/a if (tuple != NULL) {
1073n/a result = Py_BuildValue("O(O)", pickle_constructor, tuple);
1074n/a Py_DECREF(tuple);
1075n/a }
1076n/a Py_DECREF(empty_dict);
1077n/a Py_DECREF(newargs);
1078n/a }
1079n/a finally:
1080n/a Py_XDECREF(empty_dict);
1081n/a
1082n/a return (result);
1083n/a}
1084n/a
1085n/a
1086n/a/* Functions exported by this module. Most of this should probably
1087n/a * be converted into an ST object with methods, but that is better
1088n/a * done directly in Python, allowing subclasses to be created directly.
1089n/a * We'd really have to write a wrapper around it all anyway to allow
1090n/a * inheritance.
1091n/a */
1092n/astatic PyMethodDef parser_functions[] = {
1093n/a {"compilest", (PyCFunction)parser_compilest, PUBLIC_METHOD_TYPE,
1094n/a PyDoc_STR("Compiles an ST object into a code object.")},
1095n/a {"expr", (PyCFunction)parser_expr, PUBLIC_METHOD_TYPE,
1096n/a PyDoc_STR("Creates an ST object from an expression.")},
1097n/a {"isexpr", (PyCFunction)parser_isexpr, PUBLIC_METHOD_TYPE,
1098n/a PyDoc_STR("Determines if an ST object was created from an expression.")},
1099n/a {"issuite", (PyCFunction)parser_issuite, PUBLIC_METHOD_TYPE,
1100n/a PyDoc_STR("Determines if an ST object was created from a suite.")},
1101n/a {"suite", (PyCFunction)parser_suite, PUBLIC_METHOD_TYPE,
1102n/a PyDoc_STR("Creates an ST object from a suite.")},
1103n/a {"sequence2st", (PyCFunction)parser_tuple2st, PUBLIC_METHOD_TYPE,
1104n/a PyDoc_STR("Creates an ST object from a tree representation.")},
1105n/a {"st2tuple", (PyCFunction)parser_st2tuple, PUBLIC_METHOD_TYPE,
1106n/a PyDoc_STR("Creates a tuple-tree representation of an ST.")},
1107n/a {"st2list", (PyCFunction)parser_st2list, PUBLIC_METHOD_TYPE,
1108n/a PyDoc_STR("Creates a list-tree representation of an ST.")},
1109n/a {"tuple2st", (PyCFunction)parser_tuple2st, PUBLIC_METHOD_TYPE,
1110n/a PyDoc_STR("Creates an ST object from a tree representation.")},
1111n/a
1112n/a /* private stuff: support pickle module */
1113n/a {"_pickler", (PyCFunction)parser__pickler, METH_VARARGS,
1114n/a PyDoc_STR("Returns the pickle magic to allow ST objects to be pickled.")},
1115n/a
1116n/a {NULL, NULL, 0, NULL}
1117n/a };
1118n/a
1119n/a
1120n/a
1121n/astatic struct PyModuleDef parsermodule = {
1122n/a PyModuleDef_HEAD_INIT,
1123n/a "parser",
1124n/a NULL,
1125n/a -1,
1126n/a parser_functions,
1127n/a NULL,
1128n/a NULL,
1129n/a NULL,
1130n/a NULL
1131n/a};
1132n/a
1133n/aPyMODINIT_FUNC PyInit_parser(void); /* supply a prototype */
1134n/a
1135n/aPyMODINIT_FUNC
1136n/aPyInit_parser(void)
1137n/a{
1138n/a PyObject *module, *copyreg;
1139n/a
1140n/a if (PyType_Ready(&PyST_Type) < 0)
1141n/a return NULL;
1142n/a module = PyModule_Create(&parsermodule);
1143n/a if (module == NULL)
1144n/a return NULL;
1145n/a
1146n/a if (parser_error == 0)
1147n/a parser_error = PyErr_NewException("parser.ParserError", NULL, NULL);
1148n/a
1149n/a if (parser_error == 0)
1150n/a return NULL;
1151n/a /* CAUTION: The code next used to skip bumping the refcount on
1152n/a * parser_error. That's a disaster if PyInit_parser() gets called more
1153n/a * than once. By incref'ing, we ensure that each module dict that
1154n/a * gets created owns its reference to the shared parser_error object,
1155n/a * and the file static parser_error vrbl owns a reference too.
1156n/a */
1157n/a Py_INCREF(parser_error);
1158n/a if (PyModule_AddObject(module, "ParserError", parser_error) != 0)
1159n/a return NULL;
1160n/a
1161n/a Py_INCREF(&PyST_Type);
1162n/a PyModule_AddObject(module, "STType", (PyObject*)&PyST_Type);
1163n/a
1164n/a PyModule_AddStringConstant(module, "__copyright__",
1165n/a parser_copyright_string);
1166n/a PyModule_AddStringConstant(module, "__doc__",
1167n/a parser_doc_string);
1168n/a PyModule_AddStringConstant(module, "__version__",
1169n/a parser_version_string);
1170n/a
1171n/a /* Register to support pickling.
1172n/a * If this fails, the import of this module will fail because an
1173n/a * exception will be raised here; should we clear the exception?
1174n/a */
1175n/a copyreg = PyImport_ImportModuleNoBlock("copyreg");
1176n/a if (copyreg != NULL) {
1177n/a PyObject *func, *pickler;
1178n/a _Py_IDENTIFIER(pickle);
1179n/a _Py_IDENTIFIER(sequence2st);
1180n/a _Py_IDENTIFIER(_pickler);
1181n/a
1182n/a func = _PyObject_GetAttrId(copyreg, &PyId_pickle);
1183n/a pickle_constructor = _PyObject_GetAttrId(module, &PyId_sequence2st);
1184n/a pickler = _PyObject_GetAttrId(module, &PyId__pickler);
1185n/a Py_XINCREF(pickle_constructor);
1186n/a if ((func != NULL) && (pickle_constructor != NULL)
1187n/a && (pickler != NULL)) {
1188n/a PyObject *res;
1189n/a
1190n/a res = PyObject_CallFunctionObjArgs(func, &PyST_Type, pickler,
1191n/a pickle_constructor, NULL);
1192n/a Py_XDECREF(res);
1193n/a }
1194n/a Py_XDECREF(func);
1195n/a Py_XDECREF(pickle_constructor);
1196n/a Py_XDECREF(pickler);
1197n/a Py_DECREF(copyreg);
1198n/a }
1199n/a return module;
1200n/a}