»Core Development>Code coverage>Python/ast.c

Python code coverage for Python/ast.c

#countcontent
1n/a/*
2n/a * This file includes functions to transform a concrete syntax tree (CST) to
3n/a * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4n/a *
5n/a */
6n/a#include "Python.h"
7n/a#include "Python-ast.h"
8n/a#include "node.h"
9n/a#include "ast.h"
10n/a#include "token.h"
11n/a
12n/a#include <assert.h>
13n/a
14n/astatic int validate_stmts(asdl_seq *);
15n/astatic int validate_exprs(asdl_seq *, expr_context_ty, int);
16n/astatic int validate_nonempty_seq(asdl_seq *, const char *, const char *);
17n/astatic int validate_stmt(stmt_ty);
18n/astatic int validate_expr(expr_ty, expr_context_ty);
19n/a
20n/astatic int
21n/avalidate_comprehension(asdl_seq *gens)
22n/a{
23n/a int i;
24n/a if (!asdl_seq_LEN(gens)) {
25n/a PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
26n/a return 0;
27n/a }
28n/a for (i = 0; i < asdl_seq_LEN(gens); i++) {
29n/a comprehension_ty comp = asdl_seq_GET(gens, i);
30n/a if (!validate_expr(comp->target, Store) ||
31n/a !validate_expr(comp->iter, Load) ||
32n/a !validate_exprs(comp->ifs, Load, 0))
33n/a return 0;
34n/a }
35n/a return 1;
36n/a}
37n/a
38n/astatic int
39n/avalidate_slice(slice_ty slice)
40n/a{
41n/a switch (slice->kind) {
42n/a case Slice_kind:
43n/a return (!slice->v.Slice.lower || validate_expr(slice->v.Slice.lower, Load)) &&
44n/a (!slice->v.Slice.upper || validate_expr(slice->v.Slice.upper, Load)) &&
45n/a (!slice->v.Slice.step || validate_expr(slice->v.Slice.step, Load));
46n/a case ExtSlice_kind: {
47n/a int i;
48n/a if (!validate_nonempty_seq(slice->v.ExtSlice.dims, "dims", "ExtSlice"))
49n/a return 0;
50n/a for (i = 0; i < asdl_seq_LEN(slice->v.ExtSlice.dims); i++)
51n/a if (!validate_slice(asdl_seq_GET(slice->v.ExtSlice.dims, i)))
52n/a return 0;
53n/a return 1;
54n/a }
55n/a case Index_kind:
56n/a return validate_expr(slice->v.Index.value, Load);
57n/a default:
58n/a PyErr_SetString(PyExc_SystemError, "unknown slice node");
59n/a return 0;
60n/a }
61n/a}
62n/a
63n/astatic int
64n/avalidate_keywords(asdl_seq *keywords)
65n/a{
66n/a int i;
67n/a for (i = 0; i < asdl_seq_LEN(keywords); i++)
68n/a if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load))
69n/a return 0;
70n/a return 1;
71n/a}
72n/a
73n/astatic int
74n/avalidate_args(asdl_seq *args)
75n/a{
76n/a int i;
77n/a for (i = 0; i < asdl_seq_LEN(args); i++) {
78n/a arg_ty arg = asdl_seq_GET(args, i);
79n/a if (arg->annotation && !validate_expr(arg->annotation, Load))
80n/a return 0;
81n/a }
82n/a return 1;
83n/a}
84n/a
85n/astatic const char *
86n/aexpr_context_name(expr_context_ty ctx)
87n/a{
88n/a switch (ctx) {
89n/a case Load:
90n/a return "Load";
91n/a case Store:
92n/a return "Store";
93n/a case Del:
94n/a return "Del";
95n/a case AugLoad:
96n/a return "AugLoad";
97n/a case AugStore:
98n/a return "AugStore";
99n/a case Param:
100n/a return "Param";
101n/a default:
102n/a assert(0);
103n/a return "(unknown)";
104n/a }
105n/a}
106n/a
107n/astatic int
108n/avalidate_arguments(arguments_ty args)
109n/a{
110n/a if (!validate_args(args->args))
111n/a return 0;
112n/a if (args->vararg && args->vararg->annotation
113n/a && !validate_expr(args->vararg->annotation, Load)) {
114n/a return 0;
115n/a }
116n/a if (!validate_args(args->kwonlyargs))
117n/a return 0;
118n/a if (args->kwarg && args->kwarg->annotation
119n/a && !validate_expr(args->kwarg->annotation, Load)) {
120n/a return 0;
121n/a }
122n/a if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->args)) {
123n/a PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
124n/a return 0;
125n/a }
126n/a if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
127n/a PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
128n/a "kw_defaults on arguments");
129n/a return 0;
130n/a }
131n/a return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1);
132n/a}
133n/a
134n/astatic int
135n/avalidate_constant(PyObject *value)
136n/a{
137n/a if (value == Py_None || value == Py_Ellipsis)
138n/a return 1;
139n/a
140n/a if (PyLong_CheckExact(value)
141n/a || PyFloat_CheckExact(value)
142n/a || PyComplex_CheckExact(value)
143n/a || PyBool_Check(value)
144n/a || PyUnicode_CheckExact(value)
145n/a || PyBytes_CheckExact(value))
146n/a return 1;
147n/a
148n/a if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
149n/a PyObject *it;
150n/a
151n/a it = PyObject_GetIter(value);
152n/a if (it == NULL)
153n/a return 0;
154n/a
155n/a while (1) {
156n/a PyObject *item = PyIter_Next(it);
157n/a if (item == NULL) {
158n/a if (PyErr_Occurred()) {
159n/a Py_DECREF(it);
160n/a return 0;
161n/a }
162n/a break;
163n/a }
164n/a
165n/a if (!validate_constant(item)) {
166n/a Py_DECREF(it);
167n/a Py_DECREF(item);
168n/a return 0;
169n/a }
170n/a Py_DECREF(item);
171n/a }
172n/a
173n/a Py_DECREF(it);
174n/a return 1;
175n/a }
176n/a
177n/a return 0;
178n/a}
179n/a
180n/astatic int
181n/avalidate_expr(expr_ty exp, expr_context_ty ctx)
182n/a{
183n/a int check_ctx = 1;
184n/a expr_context_ty actual_ctx;
185n/a
186n/a /* First check expression context. */
187n/a switch (exp->kind) {
188n/a case Attribute_kind:
189n/a actual_ctx = exp->v.Attribute.ctx;
190n/a break;
191n/a case Subscript_kind:
192n/a actual_ctx = exp->v.Subscript.ctx;
193n/a break;
194n/a case Starred_kind:
195n/a actual_ctx = exp->v.Starred.ctx;
196n/a break;
197n/a case Name_kind:
198n/a actual_ctx = exp->v.Name.ctx;
199n/a break;
200n/a case List_kind:
201n/a actual_ctx = exp->v.List.ctx;
202n/a break;
203n/a case Tuple_kind:
204n/a actual_ctx = exp->v.Tuple.ctx;
205n/a break;
206n/a default:
207n/a if (ctx != Load) {
208n/a PyErr_Format(PyExc_ValueError, "expression which can't be "
209n/a "assigned to in %s context", expr_context_name(ctx));
210n/a return 0;
211n/a }
212n/a check_ctx = 0;
213n/a /* set actual_ctx to prevent gcc warning */
214n/a actual_ctx = 0;
215n/a }
216n/a if (check_ctx && actual_ctx != ctx) {
217n/a PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
218n/a expr_context_name(ctx), expr_context_name(actual_ctx));
219n/a return 0;
220n/a }
221n/a
222n/a /* Now validate expression. */
223n/a switch (exp->kind) {
224n/a case BoolOp_kind:
225n/a if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
226n/a PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
227n/a return 0;
228n/a }
229n/a return validate_exprs(exp->v.BoolOp.values, Load, 0);
230n/a case BinOp_kind:
231n/a return validate_expr(exp->v.BinOp.left, Load) &&
232n/a validate_expr(exp->v.BinOp.right, Load);
233n/a case UnaryOp_kind:
234n/a return validate_expr(exp->v.UnaryOp.operand, Load);
235n/a case Lambda_kind:
236n/a return validate_arguments(exp->v.Lambda.args) &&
237n/a validate_expr(exp->v.Lambda.body, Load);
238n/a case IfExp_kind:
239n/a return validate_expr(exp->v.IfExp.test, Load) &&
240n/a validate_expr(exp->v.IfExp.body, Load) &&
241n/a validate_expr(exp->v.IfExp.orelse, Load);
242n/a case Dict_kind:
243n/a if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
244n/a PyErr_SetString(PyExc_ValueError,
245n/a "Dict doesn't have the same number of keys as values");
246n/a return 0;
247n/a }
248n/a /* null_ok=1 for keys expressions to allow dict unpacking to work in
249n/a dict literals, i.e. ``{**{a:b}}`` */
250n/a return validate_exprs(exp->v.Dict.keys, Load, /*null_ok=*/ 1) &&
251n/a validate_exprs(exp->v.Dict.values, Load, /*null_ok=*/ 0);
252n/a case Set_kind:
253n/a return validate_exprs(exp->v.Set.elts, Load, 0);
254n/a#define COMP(NAME) \
255n/a case NAME ## _kind: \
256n/a return validate_comprehension(exp->v.NAME.generators) && \
257n/a validate_expr(exp->v.NAME.elt, Load);
258n/a COMP(ListComp)
259n/a COMP(SetComp)
260n/a COMP(GeneratorExp)
261n/a#undef COMP
262n/a case DictComp_kind:
263n/a return validate_comprehension(exp->v.DictComp.generators) &&
264n/a validate_expr(exp->v.DictComp.key, Load) &&
265n/a validate_expr(exp->v.DictComp.value, Load);
266n/a case Yield_kind:
267n/a return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load);
268n/a case YieldFrom_kind:
269n/a return validate_expr(exp->v.YieldFrom.value, Load);
270n/a case Await_kind:
271n/a return validate_expr(exp->v.Await.value, Load);
272n/a case Compare_kind:
273n/a if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
274n/a PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
275n/a return 0;
276n/a }
277n/a if (asdl_seq_LEN(exp->v.Compare.comparators) !=
278n/a asdl_seq_LEN(exp->v.Compare.ops)) {
279n/a PyErr_SetString(PyExc_ValueError, "Compare has a different number "
280n/a "of comparators and operands");
281n/a return 0;
282n/a }
283n/a return validate_exprs(exp->v.Compare.comparators, Load, 0) &&
284n/a validate_expr(exp->v.Compare.left, Load);
285n/a case Call_kind:
286n/a return validate_expr(exp->v.Call.func, Load) &&
287n/a validate_exprs(exp->v.Call.args, Load, 0) &&
288n/a validate_keywords(exp->v.Call.keywords);
289n/a case Constant_kind:
290n/a if (!validate_constant(exp->v.Constant.value)) {
291n/a PyErr_Format(PyExc_TypeError,
292n/a "got an invalid type in Constant: %s",
293n/a Py_TYPE(exp->v.Constant.value)->tp_name);
294n/a return 0;
295n/a }
296n/a return 1;
297n/a case Num_kind: {
298n/a PyObject *n = exp->v.Num.n;
299n/a if (!PyLong_CheckExact(n) && !PyFloat_CheckExact(n) &&
300n/a !PyComplex_CheckExact(n)) {
301n/a PyErr_SetString(PyExc_TypeError, "non-numeric type in Num");
302n/a return 0;
303n/a }
304n/a return 1;
305n/a }
306n/a case Str_kind: {
307n/a PyObject *s = exp->v.Str.s;
308n/a if (!PyUnicode_CheckExact(s)) {
309n/a PyErr_SetString(PyExc_TypeError, "non-string type in Str");
310n/a return 0;
311n/a }
312n/a return 1;
313n/a }
314n/a case JoinedStr_kind:
315n/a return validate_exprs(exp->v.JoinedStr.values, Load, 0);
316n/a case FormattedValue_kind:
317n/a if (validate_expr(exp->v.FormattedValue.value, Load) == 0)
318n/a return 0;
319n/a if (exp->v.FormattedValue.format_spec)
320n/a return validate_expr(exp->v.FormattedValue.format_spec, Load);
321n/a return 1;
322n/a case Bytes_kind: {
323n/a PyObject *b = exp->v.Bytes.s;
324n/a if (!PyBytes_CheckExact(b)) {
325n/a PyErr_SetString(PyExc_TypeError, "non-bytes type in Bytes");
326n/a return 0;
327n/a }
328n/a return 1;
329n/a }
330n/a case Attribute_kind:
331n/a return validate_expr(exp->v.Attribute.value, Load);
332n/a case Subscript_kind:
333n/a return validate_slice(exp->v.Subscript.slice) &&
334n/a validate_expr(exp->v.Subscript.value, Load);
335n/a case Starred_kind:
336n/a return validate_expr(exp->v.Starred.value, ctx);
337n/a case List_kind:
338n/a return validate_exprs(exp->v.List.elts, ctx, 0);
339n/a case Tuple_kind:
340n/a return validate_exprs(exp->v.Tuple.elts, ctx, 0);
341n/a /* These last cases don't have any checking. */
342n/a case Name_kind:
343n/a case NameConstant_kind:
344n/a case Ellipsis_kind:
345n/a return 1;
346n/a default:
347n/a PyErr_SetString(PyExc_SystemError, "unexpected expression");
348n/a return 0;
349n/a }
350n/a}
351n/a
352n/astatic int
353n/avalidate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
354n/a{
355n/a if (asdl_seq_LEN(seq))
356n/a return 1;
357n/a PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
358n/a return 0;
359n/a}
360n/a
361n/astatic int
362n/avalidate_assignlist(asdl_seq *targets, expr_context_ty ctx)
363n/a{
364n/a return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
365n/a validate_exprs(targets, ctx, 0);
366n/a}
367n/a
368n/astatic int
369n/avalidate_body(asdl_seq *body, const char *owner)
370n/a{
371n/a return validate_nonempty_seq(body, "body", owner) && validate_stmts(body);
372n/a}
373n/a
374n/astatic int
375n/avalidate_stmt(stmt_ty stmt)
376n/a{
377n/a int i;
378n/a switch (stmt->kind) {
379n/a case FunctionDef_kind:
380n/a return validate_body(stmt->v.FunctionDef.body, "FunctionDef") &&
381n/a validate_arguments(stmt->v.FunctionDef.args) &&
382n/a validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) &&
383n/a (!stmt->v.FunctionDef.returns ||
384n/a validate_expr(stmt->v.FunctionDef.returns, Load));
385n/a case ClassDef_kind:
386n/a return validate_body(stmt->v.ClassDef.body, "ClassDef") &&
387n/a validate_exprs(stmt->v.ClassDef.bases, Load, 0) &&
388n/a validate_keywords(stmt->v.ClassDef.keywords) &&
389n/a validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0);
390n/a case Return_kind:
391n/a return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load);
392n/a case Delete_kind:
393n/a return validate_assignlist(stmt->v.Delete.targets, Del);
394n/a case Assign_kind:
395n/a return validate_assignlist(stmt->v.Assign.targets, Store) &&
396n/a validate_expr(stmt->v.Assign.value, Load);
397n/a case AugAssign_kind:
398n/a return validate_expr(stmt->v.AugAssign.target, Store) &&
399n/a validate_expr(stmt->v.AugAssign.value, Load);
400n/a case AnnAssign_kind:
401n/a if (stmt->v.AnnAssign.target->kind != Name_kind &&
402n/a stmt->v.AnnAssign.simple) {
403n/a PyErr_SetString(PyExc_TypeError,
404n/a "AnnAssign with simple non-Name target");
405n/a return 0;
406n/a }
407n/a return validate_expr(stmt->v.AnnAssign.target, Store) &&
408n/a (!stmt->v.AnnAssign.value ||
409n/a validate_expr(stmt->v.AnnAssign.value, Load)) &&
410n/a validate_expr(stmt->v.AnnAssign.annotation, Load);
411n/a case For_kind:
412n/a return validate_expr(stmt->v.For.target, Store) &&
413n/a validate_expr(stmt->v.For.iter, Load) &&
414n/a validate_body(stmt->v.For.body, "For") &&
415n/a validate_stmts(stmt->v.For.orelse);
416n/a case AsyncFor_kind:
417n/a return validate_expr(stmt->v.AsyncFor.target, Store) &&
418n/a validate_expr(stmt->v.AsyncFor.iter, Load) &&
419n/a validate_body(stmt->v.AsyncFor.body, "AsyncFor") &&
420n/a validate_stmts(stmt->v.AsyncFor.orelse);
421n/a case While_kind:
422n/a return validate_expr(stmt->v.While.test, Load) &&
423n/a validate_body(stmt->v.While.body, "While") &&
424n/a validate_stmts(stmt->v.While.orelse);
425n/a case If_kind:
426n/a return validate_expr(stmt->v.If.test, Load) &&
427n/a validate_body(stmt->v.If.body, "If") &&
428n/a validate_stmts(stmt->v.If.orelse);
429n/a case With_kind:
430n/a if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
431n/a return 0;
432n/a for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
433n/a withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
434n/a if (!validate_expr(item->context_expr, Load) ||
435n/a (item->optional_vars && !validate_expr(item->optional_vars, Store)))
436n/a return 0;
437n/a }
438n/a return validate_body(stmt->v.With.body, "With");
439n/a case AsyncWith_kind:
440n/a if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith"))
441n/a return 0;
442n/a for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) {
443n/a withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i);
444n/a if (!validate_expr(item->context_expr, Load) ||
445n/a (item->optional_vars && !validate_expr(item->optional_vars, Store)))
446n/a return 0;
447n/a }
448n/a return validate_body(stmt->v.AsyncWith.body, "AsyncWith");
449n/a case Raise_kind:
450n/a if (stmt->v.Raise.exc) {
451n/a return validate_expr(stmt->v.Raise.exc, Load) &&
452n/a (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load));
453n/a }
454n/a if (stmt->v.Raise.cause) {
455n/a PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
456n/a return 0;
457n/a }
458n/a return 1;
459n/a case Try_kind:
460n/a if (!validate_body(stmt->v.Try.body, "Try"))
461n/a return 0;
462n/a if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
463n/a !asdl_seq_LEN(stmt->v.Try.finalbody)) {
464n/a PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
465n/a return 0;
466n/a }
467n/a if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
468n/a asdl_seq_LEN(stmt->v.Try.orelse)) {
469n/a PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
470n/a return 0;
471n/a }
472n/a for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
473n/a excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
474n/a if ((handler->v.ExceptHandler.type &&
475n/a !validate_expr(handler->v.ExceptHandler.type, Load)) ||
476n/a !validate_body(handler->v.ExceptHandler.body, "ExceptHandler"))
477n/a return 0;
478n/a }
479n/a return (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
480n/a validate_stmts(stmt->v.Try.finalbody)) &&
481n/a (!asdl_seq_LEN(stmt->v.Try.orelse) ||
482n/a validate_stmts(stmt->v.Try.orelse));
483n/a case Assert_kind:
484n/a return validate_expr(stmt->v.Assert.test, Load) &&
485n/a (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load));
486n/a case Import_kind:
487n/a return validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
488n/a case ImportFrom_kind:
489n/a if (stmt->v.ImportFrom.level < 0) {
490n/a PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level");
491n/a return 0;
492n/a }
493n/a return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
494n/a case Global_kind:
495n/a return validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
496n/a case Nonlocal_kind:
497n/a return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
498n/a case Expr_kind:
499n/a return validate_expr(stmt->v.Expr.value, Load);
500n/a case AsyncFunctionDef_kind:
501n/a return validate_body(stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") &&
502n/a validate_arguments(stmt->v.AsyncFunctionDef.args) &&
503n/a validate_exprs(stmt->v.AsyncFunctionDef.decorator_list, Load, 0) &&
504n/a (!stmt->v.AsyncFunctionDef.returns ||
505n/a validate_expr(stmt->v.AsyncFunctionDef.returns, Load));
506n/a case Pass_kind:
507n/a case Break_kind:
508n/a case Continue_kind:
509n/a return 1;
510n/a default:
511n/a PyErr_SetString(PyExc_SystemError, "unexpected statement");
512n/a return 0;
513n/a }
514n/a}
515n/a
516n/astatic int
517n/avalidate_stmts(asdl_seq *seq)
518n/a{
519n/a int i;
520n/a for (i = 0; i < asdl_seq_LEN(seq); i++) {
521n/a stmt_ty stmt = asdl_seq_GET(seq, i);
522n/a if (stmt) {
523n/a if (!validate_stmt(stmt))
524n/a return 0;
525n/a }
526n/a else {
527n/a PyErr_SetString(PyExc_ValueError,
528n/a "None disallowed in statement list");
529n/a return 0;
530n/a }
531n/a }
532n/a return 1;
533n/a}
534n/a
535n/astatic int
536n/avalidate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok)
537n/a{
538n/a int i;
539n/a for (i = 0; i < asdl_seq_LEN(exprs); i++) {
540n/a expr_ty expr = asdl_seq_GET(exprs, i);
541n/a if (expr) {
542n/a if (!validate_expr(expr, ctx))
543n/a return 0;
544n/a }
545n/a else if (!null_ok) {
546n/a PyErr_SetString(PyExc_ValueError,
547n/a "None disallowed in expression list");
548n/a return 0;
549n/a }
550n/a
551n/a }
552n/a return 1;
553n/a}
554n/a
555n/aint
556n/aPyAST_Validate(mod_ty mod)
557n/a{
558n/a int res = 0;
559n/a
560n/a switch (mod->kind) {
561n/a case Module_kind:
562n/a res = validate_stmts(mod->v.Module.body);
563n/a break;
564n/a case Interactive_kind:
565n/a res = validate_stmts(mod->v.Interactive.body);
566n/a break;
567n/a case Expression_kind:
568n/a res = validate_expr(mod->v.Expression.body, Load);
569n/a break;
570n/a case Suite_kind:
571n/a PyErr_SetString(PyExc_ValueError, "Suite is not valid in the CPython compiler");
572n/a break;
573n/a default:
574n/a PyErr_SetString(PyExc_SystemError, "impossible module node");
575n/a res = 0;
576n/a break;
577n/a }
578n/a return res;
579n/a}
580n/a
581n/a/* This is done here, so defines like "test" don't interfere with AST use above. */
582n/a#include "grammar.h"
583n/a#include "parsetok.h"
584n/a#include "graminit.h"
585n/a
586n/a/* Data structure used internally */
587n/astruct compiling {
588n/a PyArena *c_arena; /* Arena for allocating memory. */
589n/a PyObject *c_filename; /* filename */
590n/a PyObject *c_normalize; /* Normalization function from unicodedata. */
591n/a PyObject *c_normalize_args; /* Normalization argument tuple. */
592n/a};
593n/a
594n/astatic asdl_seq *seq_for_testlist(struct compiling *, const node *);
595n/astatic expr_ty ast_for_expr(struct compiling *, const node *);
596n/astatic stmt_ty ast_for_stmt(struct compiling *, const node *);
597n/astatic asdl_seq *ast_for_suite(struct compiling *, const node *);
598n/astatic asdl_seq *ast_for_exprlist(struct compiling *, const node *,
599n/a expr_context_ty);
600n/astatic expr_ty ast_for_testlist(struct compiling *, const node *);
601n/astatic stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
602n/a
603n/astatic stmt_ty ast_for_with_stmt(struct compiling *, const node *, int);
604n/astatic stmt_ty ast_for_for_stmt(struct compiling *, const node *, int);
605n/a
606n/a/* Note different signature for ast_for_call */
607n/astatic expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
608n/a
609n/astatic PyObject *parsenumber(struct compiling *, const char *);
610n/astatic expr_ty parsestrplus(struct compiling *, const node *n);
611n/a
612n/a#define COMP_GENEXP 0
613n/a#define COMP_LISTCOMP 1
614n/a#define COMP_SETCOMP 2
615n/a
616n/astatic int
617n/ainit_normalization(struct compiling *c)
618n/a{
619n/a PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
620n/a if (!m)
621n/a return 0;
622n/a c->c_normalize = PyObject_GetAttrString(m, "normalize");
623n/a Py_DECREF(m);
624n/a if (!c->c_normalize)
625n/a return 0;
626n/a c->c_normalize_args = Py_BuildValue("(sN)", "NFKC", Py_None);
627n/a if (!c->c_normalize_args) {
628n/a Py_CLEAR(c->c_normalize);
629n/a return 0;
630n/a }
631n/a PyTuple_SET_ITEM(c->c_normalize_args, 1, NULL);
632n/a return 1;
633n/a}
634n/a
635n/astatic identifier
636n/anew_identifier(const char *n, struct compiling *c)
637n/a{
638n/a PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
639n/a if (!id)
640n/a return NULL;
641n/a /* PyUnicode_DecodeUTF8 should always return a ready string. */
642n/a assert(PyUnicode_IS_READY(id));
643n/a /* Check whether there are non-ASCII characters in the
644n/a identifier; if so, normalize to NFKC. */
645n/a if (!PyUnicode_IS_ASCII(id)) {
646n/a PyObject *id2;
647n/a if (!c->c_normalize && !init_normalization(c)) {
648n/a Py_DECREF(id);
649n/a return NULL;
650n/a }
651n/a PyTuple_SET_ITEM(c->c_normalize_args, 1, id);
652n/a id2 = PyObject_Call(c->c_normalize, c->c_normalize_args, NULL);
653n/a Py_DECREF(id);
654n/a if (!id2)
655n/a return NULL;
656n/a id = id2;
657n/a }
658n/a PyUnicode_InternInPlace(&id);
659n/a if (PyArena_AddPyObject(c->c_arena, id) < 0) {
660n/a Py_DECREF(id);
661n/a return NULL;
662n/a }
663n/a return id;
664n/a}
665n/a
666n/a#define NEW_IDENTIFIER(n) new_identifier(STR(n), c)
667n/a
668n/astatic int
669n/aast_error(struct compiling *c, const node *n, const char *errmsg)
670n/a{
671n/a PyObject *value, *errstr, *loc, *tmp;
672n/a
673n/a loc = PyErr_ProgramTextObject(c->c_filename, LINENO(n));
674n/a if (!loc) {
675n/a Py_INCREF(Py_None);
676n/a loc = Py_None;
677n/a }
678n/a tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset, loc);
679n/a if (!tmp)
680n/a return 0;
681n/a errstr = PyUnicode_FromString(errmsg);
682n/a if (!errstr) {
683n/a Py_DECREF(tmp);
684n/a return 0;
685n/a }
686n/a value = PyTuple_Pack(2, errstr, tmp);
687n/a Py_DECREF(errstr);
688n/a Py_DECREF(tmp);
689n/a if (value) {
690n/a PyErr_SetObject(PyExc_SyntaxError, value);
691n/a Py_DECREF(value);
692n/a }
693n/a return 0;
694n/a}
695n/a
696n/a/* num_stmts() returns number of contained statements.
697n/a
698n/a Use this routine to determine how big a sequence is needed for
699n/a the statements in a parse tree. Its raison d'etre is this bit of
700n/a grammar:
701n/a
702n/a stmt: simple_stmt | compound_stmt
703n/a simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
704n/a
705n/a A simple_stmt can contain multiple small_stmt elements joined
706n/a by semicolons. If the arg is a simple_stmt, the number of
707n/a small_stmt elements is returned.
708n/a*/
709n/a
710n/astatic int
711n/anum_stmts(const node *n)
712n/a{
713n/a int i, l;
714n/a node *ch;
715n/a
716n/a switch (TYPE(n)) {
717n/a case single_input:
718n/a if (TYPE(CHILD(n, 0)) == NEWLINE)
719n/a return 0;
720n/a else
721n/a return num_stmts(CHILD(n, 0));
722n/a case file_input:
723n/a l = 0;
724n/a for (i = 0; i < NCH(n); i++) {
725n/a ch = CHILD(n, i);
726n/a if (TYPE(ch) == stmt)
727n/a l += num_stmts(ch);
728n/a }
729n/a return l;
730n/a case stmt:
731n/a return num_stmts(CHILD(n, 0));
732n/a case compound_stmt:
733n/a return 1;
734n/a case simple_stmt:
735n/a return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
736n/a case suite:
737n/a if (NCH(n) == 1)
738n/a return num_stmts(CHILD(n, 0));
739n/a else {
740n/a l = 0;
741n/a for (i = 2; i < (NCH(n) - 1); i++)
742n/a l += num_stmts(CHILD(n, i));
743n/a return l;
744n/a }
745n/a default: {
746n/a char buf[128];
747n/a
748n/a sprintf(buf, "Non-statement found: %d %d",
749n/a TYPE(n), NCH(n));
750n/a Py_FatalError(buf);
751n/a }
752n/a }
753n/a assert(0);
754n/a return 0;
755n/a}
756n/a
757n/a/* Transform the CST rooted at node * to the appropriate AST
758n/a*/
759n/a
760n/amod_ty
761n/aPyAST_FromNodeObject(const node *n, PyCompilerFlags *flags,
762n/a PyObject *filename, PyArena *arena)
763n/a{
764n/a int i, j, k, num;
765n/a asdl_seq *stmts = NULL;
766n/a stmt_ty s;
767n/a node *ch;
768n/a struct compiling c;
769n/a mod_ty res = NULL;
770n/a
771n/a c.c_arena = arena;
772n/a /* borrowed reference */
773n/a c.c_filename = filename;
774n/a c.c_normalize = NULL;
775n/a c.c_normalize_args = NULL;
776n/a
777n/a if (TYPE(n) == encoding_decl)
778n/a n = CHILD(n, 0);
779n/a
780n/a k = 0;
781n/a switch (TYPE(n)) {
782n/a case file_input:
783n/a stmts = _Py_asdl_seq_new(num_stmts(n), arena);
784n/a if (!stmts)
785n/a goto out;
786n/a for (i = 0; i < NCH(n) - 1; i++) {
787n/a ch = CHILD(n, i);
788n/a if (TYPE(ch) == NEWLINE)
789n/a continue;
790n/a REQ(ch, stmt);
791n/a num = num_stmts(ch);
792n/a if (num == 1) {
793n/a s = ast_for_stmt(&c, ch);
794n/a if (!s)
795n/a goto out;
796n/a asdl_seq_SET(stmts, k++, s);
797n/a }
798n/a else {
799n/a ch = CHILD(ch, 0);
800n/a REQ(ch, simple_stmt);
801n/a for (j = 0; j < num; j++) {
802n/a s = ast_for_stmt(&c, CHILD(ch, j * 2));
803n/a if (!s)
804n/a goto out;
805n/a asdl_seq_SET(stmts, k++, s);
806n/a }
807n/a }
808n/a }
809n/a res = Module(stmts, arena);
810n/a break;
811n/a case eval_input: {
812n/a expr_ty testlist_ast;
813n/a
814n/a /* XXX Why not comp_for here? */
815n/a testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
816n/a if (!testlist_ast)
817n/a goto out;
818n/a res = Expression(testlist_ast, arena);
819n/a break;
820n/a }
821n/a case single_input:
822n/a if (TYPE(CHILD(n, 0)) == NEWLINE) {
823n/a stmts = _Py_asdl_seq_new(1, arena);
824n/a if (!stmts)
825n/a goto out;
826n/a asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
827n/a arena));
828n/a if (!asdl_seq_GET(stmts, 0))
829n/a goto out;
830n/a res = Interactive(stmts, arena);
831n/a }
832n/a else {
833n/a n = CHILD(n, 0);
834n/a num = num_stmts(n);
835n/a stmts = _Py_asdl_seq_new(num, arena);
836n/a if (!stmts)
837n/a goto out;
838n/a if (num == 1) {
839n/a s = ast_for_stmt(&c, n);
840n/a if (!s)
841n/a goto out;
842n/a asdl_seq_SET(stmts, 0, s);
843n/a }
844n/a else {
845n/a /* Only a simple_stmt can contain multiple statements. */
846n/a REQ(n, simple_stmt);
847n/a for (i = 0; i < NCH(n); i += 2) {
848n/a if (TYPE(CHILD(n, i)) == NEWLINE)
849n/a break;
850n/a s = ast_for_stmt(&c, CHILD(n, i));
851n/a if (!s)
852n/a goto out;
853n/a asdl_seq_SET(stmts, i / 2, s);
854n/a }
855n/a }
856n/a
857n/a res = Interactive(stmts, arena);
858n/a }
859n/a break;
860n/a default:
861n/a PyErr_Format(PyExc_SystemError,
862n/a "invalid node %d for PyAST_FromNode", TYPE(n));
863n/a goto out;
864n/a }
865n/a out:
866n/a if (c.c_normalize) {
867n/a Py_DECREF(c.c_normalize);
868n/a PyTuple_SET_ITEM(c.c_normalize_args, 1, NULL);
869n/a Py_DECREF(c.c_normalize_args);
870n/a }
871n/a return res;
872n/a}
873n/a
874n/amod_ty
875n/aPyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename_str,
876n/a PyArena *arena)
877n/a{
878n/a mod_ty mod;
879n/a PyObject *filename;
880n/a filename = PyUnicode_DecodeFSDefault(filename_str);
881n/a if (filename == NULL)
882n/a return NULL;
883n/a mod = PyAST_FromNodeObject(n, flags, filename, arena);
884n/a Py_DECREF(filename);
885n/a return mod;
886n/a
887n/a}
888n/a
889n/a/* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
890n/a*/
891n/a
892n/astatic operator_ty
893n/aget_operator(const node *n)
894n/a{
895n/a switch (TYPE(n)) {
896n/a case VBAR:
897n/a return BitOr;
898n/a case CIRCUMFLEX:
899n/a return BitXor;
900n/a case AMPER:
901n/a return BitAnd;
902n/a case LEFTSHIFT:
903n/a return LShift;
904n/a case RIGHTSHIFT:
905n/a return RShift;
906n/a case PLUS:
907n/a return Add;
908n/a case MINUS:
909n/a return Sub;
910n/a case STAR:
911n/a return Mult;
912n/a case AT:
913n/a return MatMult;
914n/a case SLASH:
915n/a return Div;
916n/a case DOUBLESLASH:
917n/a return FloorDiv;
918n/a case PERCENT:
919n/a return Mod;
920n/a default:
921n/a return (operator_ty)0;
922n/a }
923n/a}
924n/a
925n/astatic const char * const FORBIDDEN[] = {
926n/a "None",
927n/a "True",
928n/a "False",
929n/a NULL,
930n/a};
931n/a
932n/astatic int
933n/aforbidden_name(struct compiling *c, identifier name, const node *n,
934n/a int full_checks)
935n/a{
936n/a assert(PyUnicode_Check(name));
937n/a if (_PyUnicode_EqualToASCIIString(name, "__debug__")) {
938n/a ast_error(c, n, "assignment to keyword");
939n/a return 1;
940n/a }
941n/a if (_PyUnicode_EqualToASCIIString(name, "async") ||
942n/a _PyUnicode_EqualToASCIIString(name, "await"))
943n/a {
944n/a PyObject *message = PyUnicode_FromString(
945n/a "'async' and 'await' will become reserved keywords"
946n/a " in Python 3.7");
947n/a int ret;
948n/a if (message == NULL) {
949n/a return 1;
950n/a }
951n/a ret = PyErr_WarnExplicitObject(
952n/a PyExc_DeprecationWarning,
953n/a message,
954n/a c->c_filename,
955n/a LINENO(n),
956n/a NULL,
957n/a NULL);
958n/a Py_DECREF(message);
959n/a if (ret < 0) {
960n/a return 1;
961n/a }
962n/a }
963n/a if (full_checks) {
964n/a const char * const *p;
965n/a for (p = FORBIDDEN; *p; p++) {
966n/a if (_PyUnicode_EqualToASCIIString(name, *p)) {
967n/a ast_error(c, n, "assignment to keyword");
968n/a return 1;
969n/a }
970n/a }
971n/a }
972n/a return 0;
973n/a}
974n/a
975n/a/* Set the context ctx for expr_ty e, recursively traversing e.
976n/a
977n/a Only sets context for expr kinds that "can appear in assignment context"
978n/a (according to ../Parser/Python.asdl). For other expr kinds, it sets
979n/a an appropriate syntax error and returns false.
980n/a*/
981n/a
982n/astatic int
983n/aset_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
984n/a{
985n/a asdl_seq *s = NULL;
986n/a /* If a particular expression type can't be used for assign / delete,
987n/a set expr_name to its name and an error message will be generated.
988n/a */
989n/a const char* expr_name = NULL;
990n/a
991n/a /* The ast defines augmented store and load contexts, but the
992n/a implementation here doesn't actually use them. The code may be
993n/a a little more complex than necessary as a result. It also means
994n/a that expressions in an augmented assignment have a Store context.
995n/a Consider restructuring so that augmented assignment uses
996n/a set_context(), too.
997n/a */
998n/a assert(ctx != AugStore && ctx != AugLoad);
999n/a
1000n/a switch (e->kind) {
1001n/a case Attribute_kind:
1002n/a e->v.Attribute.ctx = ctx;
1003n/a if (ctx == Store && forbidden_name(c, e->v.Attribute.attr, n, 1))
1004n/a return 0;
1005n/a break;
1006n/a case Subscript_kind:
1007n/a e->v.Subscript.ctx = ctx;
1008n/a break;
1009n/a case Starred_kind:
1010n/a e->v.Starred.ctx = ctx;
1011n/a if (!set_context(c, e->v.Starred.value, ctx, n))
1012n/a return 0;
1013n/a break;
1014n/a case Name_kind:
1015n/a if (ctx == Store) {
1016n/a if (forbidden_name(c, e->v.Name.id, n, 0))
1017n/a return 0; /* forbidden_name() calls ast_error() */
1018n/a }
1019n/a e->v.Name.ctx = ctx;
1020n/a break;
1021n/a case List_kind:
1022n/a e->v.List.ctx = ctx;
1023n/a s = e->v.List.elts;
1024n/a break;
1025n/a case Tuple_kind:
1026n/a e->v.Tuple.ctx = ctx;
1027n/a s = e->v.Tuple.elts;
1028n/a break;
1029n/a case Lambda_kind:
1030n/a expr_name = "lambda";
1031n/a break;
1032n/a case Call_kind:
1033n/a expr_name = "function call";
1034n/a break;
1035n/a case BoolOp_kind:
1036n/a case BinOp_kind:
1037n/a case UnaryOp_kind:
1038n/a expr_name = "operator";
1039n/a break;
1040n/a case GeneratorExp_kind:
1041n/a expr_name = "generator expression";
1042n/a break;
1043n/a case Yield_kind:
1044n/a case YieldFrom_kind:
1045n/a expr_name = "yield expression";
1046n/a break;
1047n/a case Await_kind:
1048n/a expr_name = "await expression";
1049n/a break;
1050n/a case ListComp_kind:
1051n/a expr_name = "list comprehension";
1052n/a break;
1053n/a case SetComp_kind:
1054n/a expr_name = "set comprehension";
1055n/a break;
1056n/a case DictComp_kind:
1057n/a expr_name = "dict comprehension";
1058n/a break;
1059n/a case Dict_kind:
1060n/a case Set_kind:
1061n/a case Num_kind:
1062n/a case Str_kind:
1063n/a case Bytes_kind:
1064n/a case JoinedStr_kind:
1065n/a case FormattedValue_kind:
1066n/a expr_name = "literal";
1067n/a break;
1068n/a case NameConstant_kind:
1069n/a expr_name = "keyword";
1070n/a break;
1071n/a case Ellipsis_kind:
1072n/a expr_name = "Ellipsis";
1073n/a break;
1074n/a case Compare_kind:
1075n/a expr_name = "comparison";
1076n/a break;
1077n/a case IfExp_kind:
1078n/a expr_name = "conditional expression";
1079n/a break;
1080n/a default:
1081n/a PyErr_Format(PyExc_SystemError,
1082n/a "unexpected expression in assignment %d (line %d)",
1083n/a e->kind, e->lineno);
1084n/a return 0;
1085n/a }
1086n/a /* Check for error string set by switch */
1087n/a if (expr_name) {
1088n/a char buf[300];
1089n/a PyOS_snprintf(buf, sizeof(buf),
1090n/a "can't %s %s",
1091n/a ctx == Store ? "assign to" : "delete",
1092n/a expr_name);
1093n/a return ast_error(c, n, buf);
1094n/a }
1095n/a
1096n/a /* If the LHS is a list or tuple, we need to set the assignment
1097n/a context for all the contained elements.
1098n/a */
1099n/a if (s) {
1100n/a int i;
1101n/a
1102n/a for (i = 0; i < asdl_seq_LEN(s); i++) {
1103n/a if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
1104n/a return 0;
1105n/a }
1106n/a }
1107n/a return 1;
1108n/a}
1109n/a
1110n/astatic operator_ty
1111n/aast_for_augassign(struct compiling *c, const node *n)
1112n/a{
1113n/a REQ(n, augassign);
1114n/a n = CHILD(n, 0);
1115n/a switch (STR(n)[0]) {
1116n/a case '+':
1117n/a return Add;
1118n/a case '-':
1119n/a return Sub;
1120n/a case '/':
1121n/a if (STR(n)[1] == '/')
1122n/a return FloorDiv;
1123n/a else
1124n/a return Div;
1125n/a case '%':
1126n/a return Mod;
1127n/a case '<':
1128n/a return LShift;
1129n/a case '>':
1130n/a return RShift;
1131n/a case '&':
1132n/a return BitAnd;
1133n/a case '^':
1134n/a return BitXor;
1135n/a case '|':
1136n/a return BitOr;
1137n/a case '*':
1138n/a if (STR(n)[1] == '*')
1139n/a return Pow;
1140n/a else
1141n/a return Mult;
1142n/a case '@':
1143n/a return MatMult;
1144n/a default:
1145n/a PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
1146n/a return (operator_ty)0;
1147n/a }
1148n/a}
1149n/a
1150n/astatic cmpop_ty
1151n/aast_for_comp_op(struct compiling *c, const node *n)
1152n/a{
1153n/a /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'
1154n/a |'is' 'not'
1155n/a */
1156n/a REQ(n, comp_op);
1157n/a if (NCH(n) == 1) {
1158n/a n = CHILD(n, 0);
1159n/a switch (TYPE(n)) {
1160n/a case LESS:
1161n/a return Lt;
1162n/a case GREATER:
1163n/a return Gt;
1164n/a case EQEQUAL: /* == */
1165n/a return Eq;
1166n/a case LESSEQUAL:
1167n/a return LtE;
1168n/a case GREATEREQUAL:
1169n/a return GtE;
1170n/a case NOTEQUAL:
1171n/a return NotEq;
1172n/a case NAME:
1173n/a if (strcmp(STR(n), "in") == 0)
1174n/a return In;
1175n/a if (strcmp(STR(n), "is") == 0)
1176n/a return Is;
1177n/a default:
1178n/a PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
1179n/a STR(n));
1180n/a return (cmpop_ty)0;
1181n/a }
1182n/a }
1183n/a else if (NCH(n) == 2) {
1184n/a /* handle "not in" and "is not" */
1185n/a switch (TYPE(CHILD(n, 0))) {
1186n/a case NAME:
1187n/a if (strcmp(STR(CHILD(n, 1)), "in") == 0)
1188n/a return NotIn;
1189n/a if (strcmp(STR(CHILD(n, 0)), "is") == 0)
1190n/a return IsNot;
1191n/a default:
1192n/a PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
1193n/a STR(CHILD(n, 0)), STR(CHILD(n, 1)));
1194n/a return (cmpop_ty)0;
1195n/a }
1196n/a }
1197n/a PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
1198n/a NCH(n));
1199n/a return (cmpop_ty)0;
1200n/a}
1201n/a
1202n/astatic asdl_seq *
1203n/aseq_for_testlist(struct compiling *c, const node *n)
1204n/a{
1205n/a /* testlist: test (',' test)* [',']
1206n/a testlist_star_expr: test|star_expr (',' test|star_expr)* [',']
1207n/a */
1208n/a asdl_seq *seq;
1209n/a expr_ty expression;
1210n/a int i;
1211n/a assert(TYPE(n) == testlist || TYPE(n) == testlist_star_expr || TYPE(n) == testlist_comp);
1212n/a
1213n/a seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1214n/a if (!seq)
1215n/a return NULL;
1216n/a
1217n/a for (i = 0; i < NCH(n); i += 2) {
1218n/a const node *ch = CHILD(n, i);
1219n/a assert(TYPE(ch) == test || TYPE(ch) == test_nocond || TYPE(ch) == star_expr);
1220n/a
1221n/a expression = ast_for_expr(c, ch);
1222n/a if (!expression)
1223n/a return NULL;
1224n/a
1225n/a assert(i / 2 < seq->size);
1226n/a asdl_seq_SET(seq, i / 2, expression);
1227n/a }
1228n/a return seq;
1229n/a}
1230n/a
1231n/astatic arg_ty
1232n/aast_for_arg(struct compiling *c, const node *n)
1233n/a{
1234n/a identifier name;
1235n/a expr_ty annotation = NULL;
1236n/a node *ch;
1237n/a arg_ty ret;
1238n/a
1239n/a assert(TYPE(n) == tfpdef || TYPE(n) == vfpdef);
1240n/a ch = CHILD(n, 0);
1241n/a name = NEW_IDENTIFIER(ch);
1242n/a if (!name)
1243n/a return NULL;
1244n/a if (forbidden_name(c, name, ch, 0))
1245n/a return NULL;
1246n/a
1247n/a if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) {
1248n/a annotation = ast_for_expr(c, CHILD(n, 2));
1249n/a if (!annotation)
1250n/a return NULL;
1251n/a }
1252n/a
1253n/a ret = arg(name, annotation, LINENO(n), n->n_col_offset, c->c_arena);
1254n/a if (!ret)
1255n/a return NULL;
1256n/a return ret;
1257n/a}
1258n/a
1259n/a/* returns -1 if failed to handle keyword only arguments
1260n/a returns new position to keep processing if successful
1261n/a (',' tfpdef ['=' test])*
1262n/a ^^^
1263n/a start pointing here
1264n/a */
1265n/astatic int
1266n/ahandle_keywordonly_args(struct compiling *c, const node *n, int start,
1267n/a asdl_seq *kwonlyargs, asdl_seq *kwdefaults)
1268n/a{
1269n/a PyObject *argname;
1270n/a node *ch;
1271n/a expr_ty expression, annotation;
1272n/a arg_ty arg;
1273n/a int i = start;
1274n/a int j = 0; /* index for kwdefaults and kwonlyargs */
1275n/a
1276n/a if (kwonlyargs == NULL) {
1277n/a ast_error(c, CHILD(n, start), "named arguments must follow bare *");
1278n/a return -1;
1279n/a }
1280n/a assert(kwdefaults != NULL);
1281n/a while (i < NCH(n)) {
1282n/a ch = CHILD(n, i);
1283n/a switch (TYPE(ch)) {
1284n/a case vfpdef:
1285n/a case tfpdef:
1286n/a if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1287n/a expression = ast_for_expr(c, CHILD(n, i + 2));
1288n/a if (!expression)
1289n/a goto error;
1290n/a asdl_seq_SET(kwdefaults, j, expression);
1291n/a i += 2; /* '=' and test */
1292n/a }
1293n/a else { /* setting NULL if no default value exists */
1294n/a asdl_seq_SET(kwdefaults, j, NULL);
1295n/a }
1296n/a if (NCH(ch) == 3) {
1297n/a /* ch is NAME ':' test */
1298n/a annotation = ast_for_expr(c, CHILD(ch, 2));
1299n/a if (!annotation)
1300n/a goto error;
1301n/a }
1302n/a else {
1303n/a annotation = NULL;
1304n/a }
1305n/a ch = CHILD(ch, 0);
1306n/a argname = NEW_IDENTIFIER(ch);
1307n/a if (!argname)
1308n/a goto error;
1309n/a if (forbidden_name(c, argname, ch, 0))
1310n/a goto error;
1311n/a arg = arg(argname, annotation, LINENO(ch), ch->n_col_offset,
1312n/a c->c_arena);
1313n/a if (!arg)
1314n/a goto error;
1315n/a asdl_seq_SET(kwonlyargs, j++, arg);
1316n/a i += 2; /* the name and the comma */
1317n/a break;
1318n/a case DOUBLESTAR:
1319n/a return i;
1320n/a default:
1321n/a ast_error(c, ch, "unexpected node");
1322n/a goto error;
1323n/a }
1324n/a }
1325n/a return i;
1326n/a error:
1327n/a return -1;
1328n/a}
1329n/a
1330n/a/* Create AST for argument list. */
1331n/a
1332n/astatic arguments_ty
1333n/aast_for_arguments(struct compiling *c, const node *n)
1334n/a{
1335n/a /* This function handles both typedargslist (function definition)
1336n/a and varargslist (lambda definition).
1337n/a
1338n/a parameters: '(' [typedargslist] ')'
1339n/a typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
1340n/a '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
1341n/a | '**' tfpdef [',']]]
1342n/a | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
1343n/a | '**' tfpdef [','])
1344n/a tfpdef: NAME [':' test]
1345n/a varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
1346n/a '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
1347n/a | '**' vfpdef [',']]]
1348n/a | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
1349n/a | '**' vfpdef [',']
1350n/a )
1351n/a vfpdef: NAME
1352n/a
1353n/a */
1354n/a int i, j, k, nposargs = 0, nkwonlyargs = 0;
1355n/a int nposdefaults = 0, found_default = 0;
1356n/a asdl_seq *posargs, *posdefaults, *kwonlyargs, *kwdefaults;
1357n/a arg_ty vararg = NULL, kwarg = NULL;
1358n/a arg_ty arg;
1359n/a node *ch;
1360n/a
1361n/a if (TYPE(n) == parameters) {
1362n/a if (NCH(n) == 2) /* () as argument list */
1363n/a return arguments(NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1364n/a n = CHILD(n, 1);
1365n/a }
1366n/a assert(TYPE(n) == typedargslist || TYPE(n) == varargslist);
1367n/a
1368n/a /* First count the number of positional args & defaults. The
1369n/a variable i is the loop index for this for loop and the next.
1370n/a The next loop picks up where the first leaves off.
1371n/a */
1372n/a for (i = 0; i < NCH(n); i++) {
1373n/a ch = CHILD(n, i);
1374n/a if (TYPE(ch) == STAR) {
1375n/a /* skip star */
1376n/a i++;
1377n/a if (i < NCH(n) && /* skip argument following star */
1378n/a (TYPE(CHILD(n, i)) == tfpdef ||
1379n/a TYPE(CHILD(n, i)) == vfpdef)) {
1380n/a i++;
1381n/a }
1382n/a break;
1383n/a }
1384n/a if (TYPE(ch) == DOUBLESTAR) break;
1385n/a if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++;
1386n/a if (TYPE(ch) == EQUAL) nposdefaults++;
1387n/a }
1388n/a /* count the number of keyword only args &
1389n/a defaults for keyword only args */
1390n/a for ( ; i < NCH(n); ++i) {
1391n/a ch = CHILD(n, i);
1392n/a if (TYPE(ch) == DOUBLESTAR) break;
1393n/a if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++;
1394n/a }
1395n/a posargs = (nposargs ? _Py_asdl_seq_new(nposargs, c->c_arena) : NULL);
1396n/a if (!posargs && nposargs)
1397n/a return NULL;
1398n/a kwonlyargs = (nkwonlyargs ?
1399n/a _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1400n/a if (!kwonlyargs && nkwonlyargs)
1401n/a return NULL;
1402n/a posdefaults = (nposdefaults ?
1403n/a _Py_asdl_seq_new(nposdefaults, c->c_arena) : NULL);
1404n/a if (!posdefaults && nposdefaults)
1405n/a return NULL;
1406n/a /* The length of kwonlyargs and kwdefaults are same
1407n/a since we set NULL as default for keyword only argument w/o default
1408n/a - we have sequence data structure, but no dictionary */
1409n/a kwdefaults = (nkwonlyargs ?
1410n/a _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1411n/a if (!kwdefaults && nkwonlyargs)
1412n/a return NULL;
1413n/a
1414n/a /* tfpdef: NAME [':' test]
1415n/a vfpdef: NAME
1416n/a */
1417n/a i = 0;
1418n/a j = 0; /* index for defaults */
1419n/a k = 0; /* index for args */
1420n/a while (i < NCH(n)) {
1421n/a ch = CHILD(n, i);
1422n/a switch (TYPE(ch)) {
1423n/a case tfpdef:
1424n/a case vfpdef:
1425n/a /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
1426n/a anything other than EQUAL or a comma? */
1427n/a /* XXX Should NCH(n) check be made a separate check? */
1428n/a if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1429n/a expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
1430n/a if (!expression)
1431n/a return NULL;
1432n/a assert(posdefaults != NULL);
1433n/a asdl_seq_SET(posdefaults, j++, expression);
1434n/a i += 2;
1435n/a found_default = 1;
1436n/a }
1437n/a else if (found_default) {
1438n/a ast_error(c, n,
1439n/a "non-default argument follows default argument");
1440n/a return NULL;
1441n/a }
1442n/a arg = ast_for_arg(c, ch);
1443n/a if (!arg)
1444n/a return NULL;
1445n/a asdl_seq_SET(posargs, k++, arg);
1446n/a i += 2; /* the name and the comma */
1447n/a break;
1448n/a case STAR:
1449n/a if (i+1 >= NCH(n) ||
1450n/a (i+2 == NCH(n) && TYPE(CHILD(n, i+1)) == COMMA)) {
1451n/a ast_error(c, CHILD(n, i),
1452n/a "named arguments must follow bare *");
1453n/a return NULL;
1454n/a }
1455n/a ch = CHILD(n, i+1); /* tfpdef or COMMA */
1456n/a if (TYPE(ch) == COMMA) {
1457n/a int res = 0;
1458n/a i += 2; /* now follows keyword only arguments */
1459n/a res = handle_keywordonly_args(c, n, i,
1460n/a kwonlyargs, kwdefaults);
1461n/a if (res == -1) return NULL;
1462n/a i = res; /* res has new position to process */
1463n/a }
1464n/a else {
1465n/a vararg = ast_for_arg(c, ch);
1466n/a if (!vararg)
1467n/a return NULL;
1468n/a
1469n/a i += 3;
1470n/a if (i < NCH(n) && (TYPE(CHILD(n, i)) == tfpdef
1471n/a || TYPE(CHILD(n, i)) == vfpdef)) {
1472n/a int res = 0;
1473n/a res = handle_keywordonly_args(c, n, i,
1474n/a kwonlyargs, kwdefaults);
1475n/a if (res == -1) return NULL;
1476n/a i = res; /* res has new position to process */
1477n/a }
1478n/a }
1479n/a break;
1480n/a case DOUBLESTAR:
1481n/a ch = CHILD(n, i+1); /* tfpdef */
1482n/a assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef);
1483n/a kwarg = ast_for_arg(c, ch);
1484n/a if (!kwarg)
1485n/a return NULL;
1486n/a i += 3;
1487n/a break;
1488n/a default:
1489n/a PyErr_Format(PyExc_SystemError,
1490n/a "unexpected node in varargslist: %d @ %d",
1491n/a TYPE(ch), i);
1492n/a return NULL;
1493n/a }
1494n/a }
1495n/a return arguments(posargs, vararg, kwonlyargs, kwdefaults, kwarg, posdefaults, c->c_arena);
1496n/a}
1497n/a
1498n/astatic expr_ty
1499n/aast_for_dotted_name(struct compiling *c, const node *n)
1500n/a{
1501n/a expr_ty e;
1502n/a identifier id;
1503n/a int lineno, col_offset;
1504n/a int i;
1505n/a
1506n/a REQ(n, dotted_name);
1507n/a
1508n/a lineno = LINENO(n);
1509n/a col_offset = n->n_col_offset;
1510n/a
1511n/a id = NEW_IDENTIFIER(CHILD(n, 0));
1512n/a if (!id)
1513n/a return NULL;
1514n/a e = Name(id, Load, lineno, col_offset, c->c_arena);
1515n/a if (!e)
1516n/a return NULL;
1517n/a
1518n/a for (i = 2; i < NCH(n); i+=2) {
1519n/a id = NEW_IDENTIFIER(CHILD(n, i));
1520n/a if (!id)
1521n/a return NULL;
1522n/a e = Attribute(e, id, Load, lineno, col_offset, c->c_arena);
1523n/a if (!e)
1524n/a return NULL;
1525n/a }
1526n/a
1527n/a return e;
1528n/a}
1529n/a
1530n/astatic expr_ty
1531n/aast_for_decorator(struct compiling *c, const node *n)
1532n/a{
1533n/a /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
1534n/a expr_ty d = NULL;
1535n/a expr_ty name_expr;
1536n/a
1537n/a REQ(n, decorator);
1538n/a REQ(CHILD(n, 0), AT);
1539n/a REQ(RCHILD(n, -1), NEWLINE);
1540n/a
1541n/a name_expr = ast_for_dotted_name(c, CHILD(n, 1));
1542n/a if (!name_expr)
1543n/a return NULL;
1544n/a
1545n/a if (NCH(n) == 3) { /* No arguments */
1546n/a d = name_expr;
1547n/a name_expr = NULL;
1548n/a }
1549n/a else if (NCH(n) == 5) { /* Call with no arguments */
1550n/a d = Call(name_expr, NULL, NULL, LINENO(n),
1551n/a n->n_col_offset, c->c_arena);
1552n/a if (!d)
1553n/a return NULL;
1554n/a name_expr = NULL;
1555n/a }
1556n/a else {
1557n/a d = ast_for_call(c, CHILD(n, 3), name_expr);
1558n/a if (!d)
1559n/a return NULL;
1560n/a name_expr = NULL;
1561n/a }
1562n/a
1563n/a return d;
1564n/a}
1565n/a
1566n/astatic asdl_seq*
1567n/aast_for_decorators(struct compiling *c, const node *n)
1568n/a{
1569n/a asdl_seq* decorator_seq;
1570n/a expr_ty d;
1571n/a int i;
1572n/a
1573n/a REQ(n, decorators);
1574n/a decorator_seq = _Py_asdl_seq_new(NCH(n), c->c_arena);
1575n/a if (!decorator_seq)
1576n/a return NULL;
1577n/a
1578n/a for (i = 0; i < NCH(n); i++) {
1579n/a d = ast_for_decorator(c, CHILD(n, i));
1580n/a if (!d)
1581n/a return NULL;
1582n/a asdl_seq_SET(decorator_seq, i, d);
1583n/a }
1584n/a return decorator_seq;
1585n/a}
1586n/a
1587n/astatic stmt_ty
1588n/aast_for_funcdef_impl(struct compiling *c, const node *n,
1589n/a asdl_seq *decorator_seq, int is_async)
1590n/a{
1591n/a /* funcdef: 'def' NAME parameters ['->' test] ':' suite */
1592n/a identifier name;
1593n/a arguments_ty args;
1594n/a asdl_seq *body;
1595n/a expr_ty returns = NULL;
1596n/a int name_i = 1;
1597n/a
1598n/a REQ(n, funcdef);
1599n/a
1600n/a name = NEW_IDENTIFIER(CHILD(n, name_i));
1601n/a if (!name)
1602n/a return NULL;
1603n/a if (forbidden_name(c, name, CHILD(n, name_i), 0))
1604n/a return NULL;
1605n/a args = ast_for_arguments(c, CHILD(n, name_i + 1));
1606n/a if (!args)
1607n/a return NULL;
1608n/a if (TYPE(CHILD(n, name_i+2)) == RARROW) {
1609n/a returns = ast_for_expr(c, CHILD(n, name_i + 3));
1610n/a if (!returns)
1611n/a return NULL;
1612n/a name_i += 2;
1613n/a }
1614n/a body = ast_for_suite(c, CHILD(n, name_i + 3));
1615n/a if (!body)
1616n/a return NULL;
1617n/a
1618n/a if (is_async)
1619n/a return AsyncFunctionDef(name, args, body, decorator_seq, returns,
1620n/a LINENO(n),
1621n/a n->n_col_offset, c->c_arena);
1622n/a else
1623n/a return FunctionDef(name, args, body, decorator_seq, returns,
1624n/a LINENO(n),
1625n/a n->n_col_offset, c->c_arena);
1626n/a}
1627n/a
1628n/astatic stmt_ty
1629n/aast_for_async_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1630n/a{
1631n/a /* async_funcdef: ASYNC funcdef */
1632n/a REQ(n, async_funcdef);
1633n/a REQ(CHILD(n, 0), ASYNC);
1634n/a REQ(CHILD(n, 1), funcdef);
1635n/a
1636n/a return ast_for_funcdef_impl(c, CHILD(n, 1), decorator_seq,
1637n/a 1 /* is_async */);
1638n/a}
1639n/a
1640n/astatic stmt_ty
1641n/aast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1642n/a{
1643n/a /* funcdef: 'def' NAME parameters ['->' test] ':' suite */
1644n/a return ast_for_funcdef_impl(c, n, decorator_seq,
1645n/a 0 /* is_async */);
1646n/a}
1647n/a
1648n/a
1649n/astatic stmt_ty
1650n/aast_for_async_stmt(struct compiling *c, const node *n)
1651n/a{
1652n/a /* async_stmt: ASYNC (funcdef | with_stmt | for_stmt) */
1653n/a REQ(n, async_stmt);
1654n/a REQ(CHILD(n, 0), ASYNC);
1655n/a
1656n/a switch (TYPE(CHILD(n, 1))) {
1657n/a case funcdef:
1658n/a return ast_for_funcdef_impl(c, CHILD(n, 1), NULL,
1659n/a 1 /* is_async */);
1660n/a case with_stmt:
1661n/a return ast_for_with_stmt(c, CHILD(n, 1),
1662n/a 1 /* is_async */);
1663n/a
1664n/a case for_stmt:
1665n/a return ast_for_for_stmt(c, CHILD(n, 1),
1666n/a 1 /* is_async */);
1667n/a
1668n/a default:
1669n/a PyErr_Format(PyExc_SystemError,
1670n/a "invalid async stament: %s",
1671n/a STR(CHILD(n, 1)));
1672n/a return NULL;
1673n/a }
1674n/a}
1675n/a
1676n/astatic stmt_ty
1677n/aast_for_decorated(struct compiling *c, const node *n)
1678n/a{
1679n/a /* decorated: decorators (classdef | funcdef | async_funcdef) */
1680n/a stmt_ty thing = NULL;
1681n/a asdl_seq *decorator_seq = NULL;
1682n/a
1683n/a REQ(n, decorated);
1684n/a
1685n/a decorator_seq = ast_for_decorators(c, CHILD(n, 0));
1686n/a if (!decorator_seq)
1687n/a return NULL;
1688n/a
1689n/a assert(TYPE(CHILD(n, 1)) == funcdef ||
1690n/a TYPE(CHILD(n, 1)) == async_funcdef ||
1691n/a TYPE(CHILD(n, 1)) == classdef);
1692n/a
1693n/a if (TYPE(CHILD(n, 1)) == funcdef) {
1694n/a thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
1695n/a } else if (TYPE(CHILD(n, 1)) == classdef) {
1696n/a thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
1697n/a } else if (TYPE(CHILD(n, 1)) == async_funcdef) {
1698n/a thing = ast_for_async_funcdef(c, CHILD(n, 1), decorator_seq);
1699n/a }
1700n/a /* we count the decorators in when talking about the class' or
1701n/a * function's line number */
1702n/a if (thing) {
1703n/a thing->lineno = LINENO(n);
1704n/a thing->col_offset = n->n_col_offset;
1705n/a }
1706n/a return thing;
1707n/a}
1708n/a
1709n/astatic expr_ty
1710n/aast_for_lambdef(struct compiling *c, const node *n)
1711n/a{
1712n/a /* lambdef: 'lambda' [varargslist] ':' test
1713n/a lambdef_nocond: 'lambda' [varargslist] ':' test_nocond */
1714n/a arguments_ty args;
1715n/a expr_ty expression;
1716n/a
1717n/a if (NCH(n) == 3) {
1718n/a args = arguments(NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1719n/a if (!args)
1720n/a return NULL;
1721n/a expression = ast_for_expr(c, CHILD(n, 2));
1722n/a if (!expression)
1723n/a return NULL;
1724n/a }
1725n/a else {
1726n/a args = ast_for_arguments(c, CHILD(n, 1));
1727n/a if (!args)
1728n/a return NULL;
1729n/a expression = ast_for_expr(c, CHILD(n, 3));
1730n/a if (!expression)
1731n/a return NULL;
1732n/a }
1733n/a
1734n/a return Lambda(args, expression, LINENO(n), n->n_col_offset, c->c_arena);
1735n/a}
1736n/a
1737n/astatic expr_ty
1738n/aast_for_ifexpr(struct compiling *c, const node *n)
1739n/a{
1740n/a /* test: or_test 'if' or_test 'else' test */
1741n/a expr_ty expression, body, orelse;
1742n/a
1743n/a assert(NCH(n) == 5);
1744n/a body = ast_for_expr(c, CHILD(n, 0));
1745n/a if (!body)
1746n/a return NULL;
1747n/a expression = ast_for_expr(c, CHILD(n, 2));
1748n/a if (!expression)
1749n/a return NULL;
1750n/a orelse = ast_for_expr(c, CHILD(n, 4));
1751n/a if (!orelse)
1752n/a return NULL;
1753n/a return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
1754n/a c->c_arena);
1755n/a}
1756n/a
1757n/a/*
1758n/a Count the number of 'for' loops in a comprehension.
1759n/a
1760n/a Helper for ast_for_comprehension().
1761n/a*/
1762n/a
1763n/astatic int
1764n/acount_comp_fors(struct compiling *c, const node *n)
1765n/a{
1766n/a int n_fors = 0;
1767n/a int is_async;
1768n/a
1769n/a count_comp_for:
1770n/a is_async = 0;
1771n/a n_fors++;
1772n/a REQ(n, comp_for);
1773n/a if (TYPE(CHILD(n, 0)) == ASYNC) {
1774n/a is_async = 1;
1775n/a }
1776n/a if (NCH(n) == (5 + is_async)) {
1777n/a n = CHILD(n, 4 + is_async);
1778n/a }
1779n/a else {
1780n/a return n_fors;
1781n/a }
1782n/a count_comp_iter:
1783n/a REQ(n, comp_iter);
1784n/a n = CHILD(n, 0);
1785n/a if (TYPE(n) == comp_for)
1786n/a goto count_comp_for;
1787n/a else if (TYPE(n) == comp_if) {
1788n/a if (NCH(n) == 3) {
1789n/a n = CHILD(n, 2);
1790n/a goto count_comp_iter;
1791n/a }
1792n/a else
1793n/a return n_fors;
1794n/a }
1795n/a
1796n/a /* Should never be reached */
1797n/a PyErr_SetString(PyExc_SystemError,
1798n/a "logic error in count_comp_fors");
1799n/a return -1;
1800n/a}
1801n/a
1802n/a/* Count the number of 'if' statements in a comprehension.
1803n/a
1804n/a Helper for ast_for_comprehension().
1805n/a*/
1806n/a
1807n/astatic int
1808n/acount_comp_ifs(struct compiling *c, const node *n)
1809n/a{
1810n/a int n_ifs = 0;
1811n/a
1812n/a while (1) {
1813n/a REQ(n, comp_iter);
1814n/a if (TYPE(CHILD(n, 0)) == comp_for)
1815n/a return n_ifs;
1816n/a n = CHILD(n, 0);
1817n/a REQ(n, comp_if);
1818n/a n_ifs++;
1819n/a if (NCH(n) == 2)
1820n/a return n_ifs;
1821n/a n = CHILD(n, 2);
1822n/a }
1823n/a}
1824n/a
1825n/astatic asdl_seq *
1826n/aast_for_comprehension(struct compiling *c, const node *n)
1827n/a{
1828n/a int i, n_fors;
1829n/a asdl_seq *comps;
1830n/a
1831n/a n_fors = count_comp_fors(c, n);
1832n/a if (n_fors == -1)
1833n/a return NULL;
1834n/a
1835n/a comps = _Py_asdl_seq_new(n_fors, c->c_arena);
1836n/a if (!comps)
1837n/a return NULL;
1838n/a
1839n/a for (i = 0; i < n_fors; i++) {
1840n/a comprehension_ty comp;
1841n/a asdl_seq *t;
1842n/a expr_ty expression, first;
1843n/a node *for_ch;
1844n/a int is_async = 0;
1845n/a
1846n/a REQ(n, comp_for);
1847n/a
1848n/a if (TYPE(CHILD(n, 0)) == ASYNC) {
1849n/a is_async = 1;
1850n/a }
1851n/a
1852n/a for_ch = CHILD(n, 1 + is_async);
1853n/a t = ast_for_exprlist(c, for_ch, Store);
1854n/a if (!t)
1855n/a return NULL;
1856n/a expression = ast_for_expr(c, CHILD(n, 3 + is_async));
1857n/a if (!expression)
1858n/a return NULL;
1859n/a
1860n/a /* Check the # of children rather than the length of t, since
1861n/a (x for x, in ...) has 1 element in t, but still requires a Tuple. */
1862n/a first = (expr_ty)asdl_seq_GET(t, 0);
1863n/a if (NCH(for_ch) == 1)
1864n/a comp = comprehension(first, expression, NULL,
1865n/a is_async, c->c_arena);
1866n/a else
1867n/a comp = comprehension(Tuple(t, Store, first->lineno,
1868n/a first->col_offset, c->c_arena),
1869n/a expression, NULL, is_async, c->c_arena);
1870n/a if (!comp)
1871n/a return NULL;
1872n/a
1873n/a if (NCH(n) == (5 + is_async)) {
1874n/a int j, n_ifs;
1875n/a asdl_seq *ifs;
1876n/a
1877n/a n = CHILD(n, 4 + is_async);
1878n/a n_ifs = count_comp_ifs(c, n);
1879n/a if (n_ifs == -1)
1880n/a return NULL;
1881n/a
1882n/a ifs = _Py_asdl_seq_new(n_ifs, c->c_arena);
1883n/a if (!ifs)
1884n/a return NULL;
1885n/a
1886n/a for (j = 0; j < n_ifs; j++) {
1887n/a REQ(n, comp_iter);
1888n/a n = CHILD(n, 0);
1889n/a REQ(n, comp_if);
1890n/a
1891n/a expression = ast_for_expr(c, CHILD(n, 1));
1892n/a if (!expression)
1893n/a return NULL;
1894n/a asdl_seq_SET(ifs, j, expression);
1895n/a if (NCH(n) == 3)
1896n/a n = CHILD(n, 2);
1897n/a }
1898n/a /* on exit, must guarantee that n is a comp_for */
1899n/a if (TYPE(n) == comp_iter)
1900n/a n = CHILD(n, 0);
1901n/a comp->ifs = ifs;
1902n/a }
1903n/a asdl_seq_SET(comps, i, comp);
1904n/a }
1905n/a return comps;
1906n/a}
1907n/a
1908n/astatic expr_ty
1909n/aast_for_itercomp(struct compiling *c, const node *n, int type)
1910n/a{
1911n/a /* testlist_comp: (test|star_expr)
1912n/a * ( comp_for | (',' (test|star_expr))* [','] ) */
1913n/a expr_ty elt;
1914n/a asdl_seq *comps;
1915n/a node *ch;
1916n/a
1917n/a assert(NCH(n) > 1);
1918n/a
1919n/a ch = CHILD(n, 0);
1920n/a elt = ast_for_expr(c, ch);
1921n/a if (!elt)
1922n/a return NULL;
1923n/a if (elt->kind == Starred_kind) {
1924n/a ast_error(c, ch, "iterable unpacking cannot be used in comprehension");
1925n/a return NULL;
1926n/a }
1927n/a
1928n/a comps = ast_for_comprehension(c, CHILD(n, 1));
1929n/a if (!comps)
1930n/a return NULL;
1931n/a
1932n/a if (type == COMP_GENEXP)
1933n/a return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1934n/a else if (type == COMP_LISTCOMP)
1935n/a return ListComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1936n/a else if (type == COMP_SETCOMP)
1937n/a return SetComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1938n/a else
1939n/a /* Should never happen */
1940n/a return NULL;
1941n/a}
1942n/a
1943n/a/* Fills in the key, value pair corresponding to the dict element. In case
1944n/a * of an unpacking, key is NULL. *i is advanced by the number of ast
1945n/a * elements. Iff successful, nonzero is returned.
1946n/a */
1947n/astatic int
1948n/aast_for_dictelement(struct compiling *c, const node *n, int *i,
1949n/a expr_ty *key, expr_ty *value)
1950n/a{
1951n/a expr_ty expression;
1952n/a if (TYPE(CHILD(n, *i)) == DOUBLESTAR) {
1953n/a assert(NCH(n) - *i >= 2);
1954n/a
1955n/a expression = ast_for_expr(c, CHILD(n, *i + 1));
1956n/a if (!expression)
1957n/a return 0;
1958n/a *key = NULL;
1959n/a *value = expression;
1960n/a
1961n/a *i += 2;
1962n/a }
1963n/a else {
1964n/a assert(NCH(n) - *i >= 3);
1965n/a
1966n/a expression = ast_for_expr(c, CHILD(n, *i));
1967n/a if (!expression)
1968n/a return 0;
1969n/a *key = expression;
1970n/a
1971n/a REQ(CHILD(n, *i + 1), COLON);
1972n/a
1973n/a expression = ast_for_expr(c, CHILD(n, *i + 2));
1974n/a if (!expression)
1975n/a return 0;
1976n/a *value = expression;
1977n/a
1978n/a *i += 3;
1979n/a }
1980n/a return 1;
1981n/a}
1982n/a
1983n/astatic expr_ty
1984n/aast_for_dictcomp(struct compiling *c, const node *n)
1985n/a{
1986n/a expr_ty key, value;
1987n/a asdl_seq *comps;
1988n/a int i = 0;
1989n/a
1990n/a if (!ast_for_dictelement(c, n, &i, &key, &value))
1991n/a return NULL;
1992n/a assert(key);
1993n/a assert(NCH(n) - i >= 1);
1994n/a
1995n/a comps = ast_for_comprehension(c, CHILD(n, i));
1996n/a if (!comps)
1997n/a return NULL;
1998n/a
1999n/a return DictComp(key, value, comps, LINENO(n), n->n_col_offset, c->c_arena);
2000n/a}
2001n/a
2002n/astatic expr_ty
2003n/aast_for_dictdisplay(struct compiling *c, const node *n)
2004n/a{
2005n/a int i;
2006n/a int j;
2007n/a int size;
2008n/a asdl_seq *keys, *values;
2009n/a
2010n/a size = (NCH(n) + 1) / 3; /* +1 in case no trailing comma */
2011n/a keys = _Py_asdl_seq_new(size, c->c_arena);
2012n/a if (!keys)
2013n/a return NULL;
2014n/a
2015n/a values = _Py_asdl_seq_new(size, c->c_arena);
2016n/a if (!values)
2017n/a return NULL;
2018n/a
2019n/a j = 0;
2020n/a for (i = 0; i < NCH(n); i++) {
2021n/a expr_ty key, value;
2022n/a
2023n/a if (!ast_for_dictelement(c, n, &i, &key, &value))
2024n/a return NULL;
2025n/a asdl_seq_SET(keys, j, key);
2026n/a asdl_seq_SET(values, j, value);
2027n/a
2028n/a j++;
2029n/a }
2030n/a keys->size = j;
2031n/a values->size = j;
2032n/a return Dict(keys, values, LINENO(n), n->n_col_offset, c->c_arena);
2033n/a}
2034n/a
2035n/astatic expr_ty
2036n/aast_for_genexp(struct compiling *c, const node *n)
2037n/a{
2038n/a assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
2039n/a return ast_for_itercomp(c, n, COMP_GENEXP);
2040n/a}
2041n/a
2042n/astatic expr_ty
2043n/aast_for_listcomp(struct compiling *c, const node *n)
2044n/a{
2045n/a assert(TYPE(n) == (testlist_comp));
2046n/a return ast_for_itercomp(c, n, COMP_LISTCOMP);
2047n/a}
2048n/a
2049n/astatic expr_ty
2050n/aast_for_setcomp(struct compiling *c, const node *n)
2051n/a{
2052n/a assert(TYPE(n) == (dictorsetmaker));
2053n/a return ast_for_itercomp(c, n, COMP_SETCOMP);
2054n/a}
2055n/a
2056n/astatic expr_ty
2057n/aast_for_setdisplay(struct compiling *c, const node *n)
2058n/a{
2059n/a int i;
2060n/a int size;
2061n/a asdl_seq *elts;
2062n/a
2063n/a assert(TYPE(n) == (dictorsetmaker));
2064n/a size = (NCH(n) + 1) / 2; /* +1 in case no trailing comma */
2065n/a elts = _Py_asdl_seq_new(size, c->c_arena);
2066n/a if (!elts)
2067n/a return NULL;
2068n/a for (i = 0; i < NCH(n); i += 2) {
2069n/a expr_ty expression;
2070n/a expression = ast_for_expr(c, CHILD(n, i));
2071n/a if (!expression)
2072n/a return NULL;
2073n/a asdl_seq_SET(elts, i / 2, expression);
2074n/a }
2075n/a return Set(elts, LINENO(n), n->n_col_offset, c->c_arena);
2076n/a}
2077n/a
2078n/astatic expr_ty
2079n/aast_for_atom(struct compiling *c, const node *n)
2080n/a{
2081n/a /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']'
2082n/a | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+
2083n/a | '...' | 'None' | 'True' | 'False'
2084n/a */
2085n/a node *ch = CHILD(n, 0);
2086n/a
2087n/a switch (TYPE(ch)) {
2088n/a case NAME: {
2089n/a PyObject *name;
2090n/a const char *s = STR(ch);
2091n/a size_t len = strlen(s);
2092n/a if (len >= 4 && len <= 5) {
2093n/a if (!strcmp(s, "None"))
2094n/a return NameConstant(Py_None, LINENO(n), n->n_col_offset, c->c_arena);
2095n/a if (!strcmp(s, "True"))
2096n/a return NameConstant(Py_True, LINENO(n), n->n_col_offset, c->c_arena);
2097n/a if (!strcmp(s, "False"))
2098n/a return NameConstant(Py_False, LINENO(n), n->n_col_offset, c->c_arena);
2099n/a }
2100n/a name = new_identifier(s, c);
2101n/a if (!name)
2102n/a return NULL;
2103n/a /* All names start in Load context, but may later be changed. */
2104n/a return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena);
2105n/a }
2106n/a case STRING: {
2107n/a expr_ty str = parsestrplus(c, n);
2108n/a if (!str) {
2109n/a const char *errtype = NULL;
2110n/a if (PyErr_ExceptionMatches(PyExc_UnicodeError))
2111n/a errtype = "unicode error";
2112n/a else if (PyErr_ExceptionMatches(PyExc_ValueError))
2113n/a errtype = "value error";
2114n/a if (errtype) {
2115n/a char buf[128];
2116n/a const char *s = NULL;
2117n/a PyObject *type, *value, *tback, *errstr;
2118n/a PyErr_Fetch(&type, &value, &tback);
2119n/a errstr = PyObject_Str(value);
2120n/a if (errstr)
2121n/a s = PyUnicode_AsUTF8(errstr);
2122n/a if (s) {
2123n/a PyOS_snprintf(buf, sizeof(buf), "(%s) %s", errtype, s);
2124n/a } else {
2125n/a PyErr_Clear();
2126n/a PyOS_snprintf(buf, sizeof(buf), "(%s) unknown error", errtype);
2127n/a }
2128n/a Py_XDECREF(errstr);
2129n/a ast_error(c, n, buf);
2130n/a Py_DECREF(type);
2131n/a Py_XDECREF(value);
2132n/a Py_XDECREF(tback);
2133n/a }
2134n/a return NULL;
2135n/a }
2136n/a return str;
2137n/a }
2138n/a case NUMBER: {
2139n/a PyObject *pynum = parsenumber(c, STR(ch));
2140n/a if (!pynum)
2141n/a return NULL;
2142n/a
2143n/a if (PyArena_AddPyObject(c->c_arena, pynum) < 0) {
2144n/a Py_DECREF(pynum);
2145n/a return NULL;
2146n/a }
2147n/a return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
2148n/a }
2149n/a case ELLIPSIS: /* Ellipsis */
2150n/a return Ellipsis(LINENO(n), n->n_col_offset, c->c_arena);
2151n/a case LPAR: /* some parenthesized expressions */
2152n/a ch = CHILD(n, 1);
2153n/a
2154n/a if (TYPE(ch) == RPAR)
2155n/a return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
2156n/a
2157n/a if (TYPE(ch) == yield_expr)
2158n/a return ast_for_expr(c, ch);
2159n/a
2160n/a /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2161n/a if ((NCH(ch) > 1) && (TYPE(CHILD(ch, 1)) == comp_for))
2162n/a return ast_for_genexp(c, ch);
2163n/a
2164n/a return ast_for_testlist(c, ch);
2165n/a case LSQB: /* list (or list comprehension) */
2166n/a ch = CHILD(n, 1);
2167n/a
2168n/a if (TYPE(ch) == RSQB)
2169n/a return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
2170n/a
2171n/a REQ(ch, testlist_comp);
2172n/a if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
2173n/a asdl_seq *elts = seq_for_testlist(c, ch);
2174n/a if (!elts)
2175n/a return NULL;
2176n/a
2177n/a return List(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
2178n/a }
2179n/a else
2180n/a return ast_for_listcomp(c, ch);
2181n/a case LBRACE: {
2182n/a /* dictorsetmaker: ( ((test ':' test | '**' test)
2183n/a * (comp_for | (',' (test ':' test | '**' test))* [','])) |
2184n/a * ((test | '*' test)
2185n/a * (comp_for | (',' (test | '*' test))* [','])) ) */
2186n/a expr_ty res;
2187n/a ch = CHILD(n, 1);
2188n/a if (TYPE(ch) == RBRACE) {
2189n/a /* It's an empty dict. */
2190n/a return Dict(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2191n/a }
2192n/a else {
2193n/a int is_dict = (TYPE(CHILD(ch, 0)) == DOUBLESTAR);
2194n/a if (NCH(ch) == 1 ||
2195n/a (NCH(ch) > 1 &&
2196n/a TYPE(CHILD(ch, 1)) == COMMA)) {
2197n/a /* It's a set display. */
2198n/a res = ast_for_setdisplay(c, ch);
2199n/a }
2200n/a else if (NCH(ch) > 1 &&
2201n/a TYPE(CHILD(ch, 1)) == comp_for) {
2202n/a /* It's a set comprehension. */
2203n/a res = ast_for_setcomp(c, ch);
2204n/a }
2205n/a else if (NCH(ch) > 3 - is_dict &&
2206n/a TYPE(CHILD(ch, 3 - is_dict)) == comp_for) {
2207n/a /* It's a dictionary comprehension. */
2208n/a if (is_dict) {
2209n/a ast_error(c, n, "dict unpacking cannot be used in "
2210n/a "dict comprehension");
2211n/a return NULL;
2212n/a }
2213n/a res = ast_for_dictcomp(c, ch);
2214n/a }
2215n/a else {
2216n/a /* It's a dictionary display. */
2217n/a res = ast_for_dictdisplay(c, ch);
2218n/a }
2219n/a if (res) {
2220n/a res->lineno = LINENO(n);
2221n/a res->col_offset = n->n_col_offset;
2222n/a }
2223n/a return res;
2224n/a }
2225n/a }
2226n/a default:
2227n/a PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
2228n/a return NULL;
2229n/a }
2230n/a}
2231n/a
2232n/astatic slice_ty
2233n/aast_for_slice(struct compiling *c, const node *n)
2234n/a{
2235n/a node *ch;
2236n/a expr_ty lower = NULL, upper = NULL, step = NULL;
2237n/a
2238n/a REQ(n, subscript);
2239n/a
2240n/a /*
2241n/a subscript: test | [test] ':' [test] [sliceop]
2242n/a sliceop: ':' [test]
2243n/a */
2244n/a ch = CHILD(n, 0);
2245n/a if (NCH(n) == 1 && TYPE(ch) == test) {
2246n/a /* 'step' variable hold no significance in terms of being used over
2247n/a other vars */
2248n/a step = ast_for_expr(c, ch);
2249n/a if (!step)
2250n/a return NULL;
2251n/a
2252n/a return Index(step, c->c_arena);
2253n/a }
2254n/a
2255n/a if (TYPE(ch) == test) {
2256n/a lower = ast_for_expr(c, ch);
2257n/a if (!lower)
2258n/a return NULL;
2259n/a }
2260n/a
2261n/a /* If there's an upper bound it's in the second or third position. */
2262n/a if (TYPE(ch) == COLON) {
2263n/a if (NCH(n) > 1) {
2264n/a node *n2 = CHILD(n, 1);
2265n/a
2266n/a if (TYPE(n2) == test) {
2267n/a upper = ast_for_expr(c, n2);
2268n/a if (!upper)
2269n/a return NULL;
2270n/a }
2271n/a }
2272n/a } else if (NCH(n) > 2) {
2273n/a node *n2 = CHILD(n, 2);
2274n/a
2275n/a if (TYPE(n2) == test) {
2276n/a upper = ast_for_expr(c, n2);
2277n/a if (!upper)
2278n/a return NULL;
2279n/a }
2280n/a }
2281n/a
2282n/a ch = CHILD(n, NCH(n) - 1);
2283n/a if (TYPE(ch) == sliceop) {
2284n/a if (NCH(ch) != 1) {
2285n/a ch = CHILD(ch, 1);
2286n/a if (TYPE(ch) == test) {
2287n/a step = ast_for_expr(c, ch);
2288n/a if (!step)
2289n/a return NULL;
2290n/a }
2291n/a }
2292n/a }
2293n/a
2294n/a return Slice(lower, upper, step, c->c_arena);
2295n/a}
2296n/a
2297n/astatic expr_ty
2298n/aast_for_binop(struct compiling *c, const node *n)
2299n/a{
2300n/a /* Must account for a sequence of expressions.
2301n/a How should A op B op C by represented?
2302n/a BinOp(BinOp(A, op, B), op, C).
2303n/a */
2304n/a
2305n/a int i, nops;
2306n/a expr_ty expr1, expr2, result;
2307n/a operator_ty newoperator;
2308n/a
2309n/a expr1 = ast_for_expr(c, CHILD(n, 0));
2310n/a if (!expr1)
2311n/a return NULL;
2312n/a
2313n/a expr2 = ast_for_expr(c, CHILD(n, 2));
2314n/a if (!expr2)
2315n/a return NULL;
2316n/a
2317n/a newoperator = get_operator(CHILD(n, 1));
2318n/a if (!newoperator)
2319n/a return NULL;
2320n/a
2321n/a result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2322n/a c->c_arena);
2323n/a if (!result)
2324n/a return NULL;
2325n/a
2326n/a nops = (NCH(n) - 1) / 2;
2327n/a for (i = 1; i < nops; i++) {
2328n/a expr_ty tmp_result, tmp;
2329n/a const node* next_oper = CHILD(n, i * 2 + 1);
2330n/a
2331n/a newoperator = get_operator(next_oper);
2332n/a if (!newoperator)
2333n/a return NULL;
2334n/a
2335n/a tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
2336n/a if (!tmp)
2337n/a return NULL;
2338n/a
2339n/a tmp_result = BinOp(result, newoperator, tmp,
2340n/a LINENO(next_oper), next_oper->n_col_offset,
2341n/a c->c_arena);
2342n/a if (!tmp_result)
2343n/a return NULL;
2344n/a result = tmp_result;
2345n/a }
2346n/a return result;
2347n/a}
2348n/a
2349n/astatic expr_ty
2350n/aast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr)
2351n/a{
2352n/a /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
2353n/a subscriptlist: subscript (',' subscript)* [',']
2354n/a subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
2355n/a */
2356n/a REQ(n, trailer);
2357n/a if (TYPE(CHILD(n, 0)) == LPAR) {
2358n/a if (NCH(n) == 2)
2359n/a return Call(left_expr, NULL, NULL, LINENO(n),
2360n/a n->n_col_offset, c->c_arena);
2361n/a else
2362n/a return ast_for_call(c, CHILD(n, 1), left_expr);
2363n/a }
2364n/a else if (TYPE(CHILD(n, 0)) == DOT) {
2365n/a PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
2366n/a if (!attr_id)
2367n/a return NULL;
2368n/a return Attribute(left_expr, attr_id, Load,
2369n/a LINENO(n), n->n_col_offset, c->c_arena);
2370n/a }
2371n/a else {
2372n/a REQ(CHILD(n, 0), LSQB);
2373n/a REQ(CHILD(n, 2), RSQB);
2374n/a n = CHILD(n, 1);
2375n/a if (NCH(n) == 1) {
2376n/a slice_ty slc = ast_for_slice(c, CHILD(n, 0));
2377n/a if (!slc)
2378n/a return NULL;
2379n/a return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset,
2380n/a c->c_arena);
2381n/a }
2382n/a else {
2383n/a /* The grammar is ambiguous here. The ambiguity is resolved
2384n/a by treating the sequence as a tuple literal if there are
2385n/a no slice features.
2386n/a */
2387n/a int j;
2388n/a slice_ty slc;
2389n/a expr_ty e;
2390n/a int simple = 1;
2391n/a asdl_seq *slices, *elts;
2392n/a slices = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2393n/a if (!slices)
2394n/a return NULL;
2395n/a for (j = 0; j < NCH(n); j += 2) {
2396n/a slc = ast_for_slice(c, CHILD(n, j));
2397n/a if (!slc)
2398n/a return NULL;
2399n/a if (slc->kind != Index_kind)
2400n/a simple = 0;
2401n/a asdl_seq_SET(slices, j / 2, slc);
2402n/a }
2403n/a if (!simple) {
2404n/a return Subscript(left_expr, ExtSlice(slices, c->c_arena),
2405n/a Load, LINENO(n), n->n_col_offset, c->c_arena);
2406n/a }
2407n/a /* extract Index values and put them in a Tuple */
2408n/a elts = _Py_asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
2409n/a if (!elts)
2410n/a return NULL;
2411n/a for (j = 0; j < asdl_seq_LEN(slices); ++j) {
2412n/a slc = (slice_ty)asdl_seq_GET(slices, j);
2413n/a assert(slc->kind == Index_kind && slc->v.Index.value);
2414n/a asdl_seq_SET(elts, j, slc->v.Index.value);
2415n/a }
2416n/a e = Tuple(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
2417n/a if (!e)
2418n/a return NULL;
2419n/a return Subscript(left_expr, Index(e, c->c_arena),
2420n/a Load, LINENO(n), n->n_col_offset, c->c_arena);
2421n/a }
2422n/a }
2423n/a}
2424n/a
2425n/astatic expr_ty
2426n/aast_for_factor(struct compiling *c, const node *n)
2427n/a{
2428n/a expr_ty expression;
2429n/a
2430n/a expression = ast_for_expr(c, CHILD(n, 1));
2431n/a if (!expression)
2432n/a return NULL;
2433n/a
2434n/a switch (TYPE(CHILD(n, 0))) {
2435n/a case PLUS:
2436n/a return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
2437n/a c->c_arena);
2438n/a case MINUS:
2439n/a return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
2440n/a c->c_arena);
2441n/a case TILDE:
2442n/a return UnaryOp(Invert, expression, LINENO(n),
2443n/a n->n_col_offset, c->c_arena);
2444n/a }
2445n/a PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
2446n/a TYPE(CHILD(n, 0)));
2447n/a return NULL;
2448n/a}
2449n/a
2450n/astatic expr_ty
2451n/aast_for_atom_expr(struct compiling *c, const node *n)
2452n/a{
2453n/a int i, nch, start = 0;
2454n/a expr_ty e, tmp;
2455n/a
2456n/a REQ(n, atom_expr);
2457n/a nch = NCH(n);
2458n/a
2459n/a if (TYPE(CHILD(n, 0)) == AWAIT) {
2460n/a start = 1;
2461n/a assert(nch > 1);
2462n/a }
2463n/a
2464n/a e = ast_for_atom(c, CHILD(n, start));
2465n/a if (!e)
2466n/a return NULL;
2467n/a if (nch == 1)
2468n/a return e;
2469n/a if (start && nch == 2) {
2470n/a return Await(e, LINENO(n), n->n_col_offset, c->c_arena);
2471n/a }
2472n/a
2473n/a for (i = start + 1; i < nch; i++) {
2474n/a node *ch = CHILD(n, i);
2475n/a if (TYPE(ch) != trailer)
2476n/a break;
2477n/a tmp = ast_for_trailer(c, ch, e);
2478n/a if (!tmp)
2479n/a return NULL;
2480n/a tmp->lineno = e->lineno;
2481n/a tmp->col_offset = e->col_offset;
2482n/a e = tmp;
2483n/a }
2484n/a
2485n/a if (start) {
2486n/a /* there was an AWAIT */
2487n/a return Await(e, LINENO(n), n->n_col_offset, c->c_arena);
2488n/a }
2489n/a else {
2490n/a return e;
2491n/a }
2492n/a}
2493n/a
2494n/astatic expr_ty
2495n/aast_for_power(struct compiling *c, const node *n)
2496n/a{
2497n/a /* power: atom trailer* ('**' factor)*
2498n/a */
2499n/a expr_ty e;
2500n/a REQ(n, power);
2501n/a e = ast_for_atom_expr(c, CHILD(n, 0));
2502n/a if (!e)
2503n/a return NULL;
2504n/a if (NCH(n) == 1)
2505n/a return e;
2506n/a if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
2507n/a expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
2508n/a if (!f)
2509n/a return NULL;
2510n/a e = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, c->c_arena);
2511n/a }
2512n/a return e;
2513n/a}
2514n/a
2515n/astatic expr_ty
2516n/aast_for_starred(struct compiling *c, const node *n)
2517n/a{
2518n/a expr_ty tmp;
2519n/a REQ(n, star_expr);
2520n/a
2521n/a tmp = ast_for_expr(c, CHILD(n, 1));
2522n/a if (!tmp)
2523n/a return NULL;
2524n/a
2525n/a /* The Load context is changed later. */
2526n/a return Starred(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
2527n/a}
2528n/a
2529n/a
2530n/a/* Do not name a variable 'expr'! Will cause a compile error.
2531n/a*/
2532n/a
2533n/astatic expr_ty
2534n/aast_for_expr(struct compiling *c, const node *n)
2535n/a{
2536n/a /* handle the full range of simple expressions
2537n/a test: or_test ['if' or_test 'else' test] | lambdef
2538n/a test_nocond: or_test | lambdef_nocond
2539n/a or_test: and_test ('or' and_test)*
2540n/a and_test: not_test ('and' not_test)*
2541n/a not_test: 'not' not_test | comparison
2542n/a comparison: expr (comp_op expr)*
2543n/a expr: xor_expr ('|' xor_expr)*
2544n/a xor_expr: and_expr ('^' and_expr)*
2545n/a and_expr: shift_expr ('&' shift_expr)*
2546n/a shift_expr: arith_expr (('<<'|'>>') arith_expr)*
2547n/a arith_expr: term (('+'|'-') term)*
2548n/a term: factor (('*'|'@'|'/'|'%'|'//') factor)*
2549n/a factor: ('+'|'-'|'~') factor | power
2550n/a power: atom_expr ['**' factor]
2551n/a atom_expr: [AWAIT] atom trailer*
2552n/a yield_expr: 'yield' [yield_arg]
2553n/a */
2554n/a
2555n/a asdl_seq *seq;
2556n/a int i;
2557n/a
2558n/a loop:
2559n/a switch (TYPE(n)) {
2560n/a case test:
2561n/a case test_nocond:
2562n/a if (TYPE(CHILD(n, 0)) == lambdef ||
2563n/a TYPE(CHILD(n, 0)) == lambdef_nocond)
2564n/a return ast_for_lambdef(c, CHILD(n, 0));
2565n/a else if (NCH(n) > 1)
2566n/a return ast_for_ifexpr(c, n);
2567n/a /* Fallthrough */
2568n/a case or_test:
2569n/a case and_test:
2570n/a if (NCH(n) == 1) {
2571n/a n = CHILD(n, 0);
2572n/a goto loop;
2573n/a }
2574n/a seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2575n/a if (!seq)
2576n/a return NULL;
2577n/a for (i = 0; i < NCH(n); i += 2) {
2578n/a expr_ty e = ast_for_expr(c, CHILD(n, i));
2579n/a if (!e)
2580n/a return NULL;
2581n/a asdl_seq_SET(seq, i / 2, e);
2582n/a }
2583n/a if (!strcmp(STR(CHILD(n, 1)), "and"))
2584n/a return BoolOp(And, seq, LINENO(n), n->n_col_offset,
2585n/a c->c_arena);
2586n/a assert(!strcmp(STR(CHILD(n, 1)), "or"));
2587n/a return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena);
2588n/a case not_test:
2589n/a if (NCH(n) == 1) {
2590n/a n = CHILD(n, 0);
2591n/a goto loop;
2592n/a }
2593n/a else {
2594n/a expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2595n/a if (!expression)
2596n/a return NULL;
2597n/a
2598n/a return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
2599n/a c->c_arena);
2600n/a }
2601n/a case comparison:
2602n/a if (NCH(n) == 1) {
2603n/a n = CHILD(n, 0);
2604n/a goto loop;
2605n/a }
2606n/a else {
2607n/a expr_ty expression;
2608n/a asdl_int_seq *ops;
2609n/a asdl_seq *cmps;
2610n/a ops = _Py_asdl_int_seq_new(NCH(n) / 2, c->c_arena);
2611n/a if (!ops)
2612n/a return NULL;
2613n/a cmps = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
2614n/a if (!cmps) {
2615n/a return NULL;
2616n/a }
2617n/a for (i = 1; i < NCH(n); i += 2) {
2618n/a cmpop_ty newoperator;
2619n/a
2620n/a newoperator = ast_for_comp_op(c, CHILD(n, i));
2621n/a if (!newoperator) {
2622n/a return NULL;
2623n/a }
2624n/a
2625n/a expression = ast_for_expr(c, CHILD(n, i + 1));
2626n/a if (!expression) {
2627n/a return NULL;
2628n/a }
2629n/a
2630n/a asdl_seq_SET(ops, i / 2, newoperator);
2631n/a asdl_seq_SET(cmps, i / 2, expression);
2632n/a }
2633n/a expression = ast_for_expr(c, CHILD(n, 0));
2634n/a if (!expression) {
2635n/a return NULL;
2636n/a }
2637n/a
2638n/a return Compare(expression, ops, cmps, LINENO(n),
2639n/a n->n_col_offset, c->c_arena);
2640n/a }
2641n/a break;
2642n/a
2643n/a case star_expr:
2644n/a return ast_for_starred(c, n);
2645n/a /* The next five cases all handle BinOps. The main body of code
2646n/a is the same in each case, but the switch turned inside out to
2647n/a reuse the code for each type of operator.
2648n/a */
2649n/a case expr:
2650n/a case xor_expr:
2651n/a case and_expr:
2652n/a case shift_expr:
2653n/a case arith_expr:
2654n/a case term:
2655n/a if (NCH(n) == 1) {
2656n/a n = CHILD(n, 0);
2657n/a goto loop;
2658n/a }
2659n/a return ast_for_binop(c, n);
2660n/a case yield_expr: {
2661n/a node *an = NULL;
2662n/a node *en = NULL;
2663n/a int is_from = 0;
2664n/a expr_ty exp = NULL;
2665n/a if (NCH(n) > 1)
2666n/a an = CHILD(n, 1); /* yield_arg */
2667n/a if (an) {
2668n/a en = CHILD(an, NCH(an) - 1);
2669n/a if (NCH(an) == 2) {
2670n/a is_from = 1;
2671n/a exp = ast_for_expr(c, en);
2672n/a }
2673n/a else
2674n/a exp = ast_for_testlist(c, en);
2675n/a if (!exp)
2676n/a return NULL;
2677n/a }
2678n/a if (is_from)
2679n/a return YieldFrom(exp, LINENO(n), n->n_col_offset, c->c_arena);
2680n/a return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena);
2681n/a }
2682n/a case factor:
2683n/a if (NCH(n) == 1) {
2684n/a n = CHILD(n, 0);
2685n/a goto loop;
2686n/a }
2687n/a return ast_for_factor(c, n);
2688n/a case power:
2689n/a return ast_for_power(c, n);
2690n/a default:
2691n/a PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
2692n/a return NULL;
2693n/a }
2694n/a /* should never get here unless if error is set */
2695n/a return NULL;
2696n/a}
2697n/a
2698n/astatic expr_ty
2699n/aast_for_call(struct compiling *c, const node *n, expr_ty func)
2700n/a{
2701n/a /*
2702n/a arglist: argument (',' argument)* [',']
2703n/a argument: ( test [comp_for] | '*' test | test '=' test | '**' test )
2704n/a */
2705n/a
2706n/a int i, nargs, nkeywords, ngens;
2707n/a int ndoublestars;
2708n/a asdl_seq *args;
2709n/a asdl_seq *keywords;
2710n/a
2711n/a REQ(n, arglist);
2712n/a
2713n/a nargs = 0;
2714n/a nkeywords = 0;
2715n/a ngens = 0;
2716n/a for (i = 0; i < NCH(n); i++) {
2717n/a node *ch = CHILD(n, i);
2718n/a if (TYPE(ch) == argument) {
2719n/a if (NCH(ch) == 1)
2720n/a nargs++;
2721n/a else if (TYPE(CHILD(ch, 1)) == comp_for)
2722n/a ngens++;
2723n/a else if (TYPE(CHILD(ch, 0)) == STAR)
2724n/a nargs++;
2725n/a else
2726n/a /* TYPE(CHILD(ch, 0)) == DOUBLESTAR or keyword argument */
2727n/a nkeywords++;
2728n/a }
2729n/a }
2730n/a if (ngens > 1 || (ngens && (nargs || nkeywords))) {
2731n/a ast_error(c, n, "Generator expression must be parenthesized "
2732n/a "if not sole argument");
2733n/a return NULL;
2734n/a }
2735n/a
2736n/a args = _Py_asdl_seq_new(nargs + ngens, c->c_arena);
2737n/a if (!args)
2738n/a return NULL;
2739n/a keywords = _Py_asdl_seq_new(nkeywords, c->c_arena);
2740n/a if (!keywords)
2741n/a return NULL;
2742n/a
2743n/a nargs = 0; /* positional arguments + iterable argument unpackings */
2744n/a nkeywords = 0; /* keyword arguments + keyword argument unpackings */
2745n/a ndoublestars = 0; /* just keyword argument unpackings */
2746n/a for (i = 0; i < NCH(n); i++) {
2747n/a node *ch = CHILD(n, i);
2748n/a if (TYPE(ch) == argument) {
2749n/a expr_ty e;
2750n/a node *chch = CHILD(ch, 0);
2751n/a if (NCH(ch) == 1) {
2752n/a /* a positional argument */
2753n/a if (nkeywords) {
2754n/a if (ndoublestars) {
2755n/a ast_error(c, chch,
2756n/a "positional argument follows "
2757n/a "keyword argument unpacking");
2758n/a }
2759n/a else {
2760n/a ast_error(c, chch,
2761n/a "positional argument follows "
2762n/a "keyword argument");
2763n/a }
2764n/a return NULL;
2765n/a }
2766n/a e = ast_for_expr(c, chch);
2767n/a if (!e)
2768n/a return NULL;
2769n/a asdl_seq_SET(args, nargs++, e);
2770n/a }
2771n/a else if (TYPE(chch) == STAR) {
2772n/a /* an iterable argument unpacking */
2773n/a expr_ty starred;
2774n/a if (ndoublestars) {
2775n/a ast_error(c, chch,
2776n/a "iterable argument unpacking follows "
2777n/a "keyword argument unpacking");
2778n/a return NULL;
2779n/a }
2780n/a e = ast_for_expr(c, CHILD(ch, 1));
2781n/a if (!e)
2782n/a return NULL;
2783n/a starred = Starred(e, Load, LINENO(chch),
2784n/a chch->n_col_offset,
2785n/a c->c_arena);
2786n/a if (!starred)
2787n/a return NULL;
2788n/a asdl_seq_SET(args, nargs++, starred);
2789n/a
2790n/a }
2791n/a else if (TYPE(chch) == DOUBLESTAR) {
2792n/a /* a keyword argument unpacking */
2793n/a keyword_ty kw;
2794n/a i++;
2795n/a e = ast_for_expr(c, CHILD(ch, 1));
2796n/a if (!e)
2797n/a return NULL;
2798n/a kw = keyword(NULL, e, c->c_arena);
2799n/a asdl_seq_SET(keywords, nkeywords++, kw);
2800n/a ndoublestars++;
2801n/a }
2802n/a else if (TYPE(CHILD(ch, 1)) == comp_for) {
2803n/a /* the lone generator expression */
2804n/a e = ast_for_genexp(c, ch);
2805n/a if (!e)
2806n/a return NULL;
2807n/a asdl_seq_SET(args, nargs++, e);
2808n/a }
2809n/a else {
2810n/a /* a keyword argument */
2811n/a keyword_ty kw;
2812n/a identifier key, tmp;
2813n/a int k;
2814n/a
2815n/a /* chch is test, but must be an identifier? */
2816n/a e = ast_for_expr(c, chch);
2817n/a if (!e)
2818n/a return NULL;
2819n/a /* f(lambda x: x[0] = 3) ends up getting parsed with
2820n/a * LHS test = lambda x: x[0], and RHS test = 3.
2821n/a * SF bug 132313 points out that complaining about a keyword
2822n/a * then is very confusing.
2823n/a */
2824n/a if (e->kind == Lambda_kind) {
2825n/a ast_error(c, chch,
2826n/a "lambda cannot contain assignment");
2827n/a return NULL;
2828n/a }
2829n/a else if (e->kind != Name_kind) {
2830n/a ast_error(c, chch,
2831n/a "keyword can't be an expression");
2832n/a return NULL;
2833n/a }
2834n/a else if (forbidden_name(c, e->v.Name.id, ch, 1)) {
2835n/a return NULL;
2836n/a }
2837n/a key = e->v.Name.id;
2838n/a for (k = 0; k < nkeywords; k++) {
2839n/a tmp = ((keyword_ty)asdl_seq_GET(keywords, k))->arg;
2840n/a if (tmp && !PyUnicode_Compare(tmp, key)) {
2841n/a ast_error(c, chch,
2842n/a "keyword argument repeated");
2843n/a return NULL;
2844n/a }
2845n/a }
2846n/a e = ast_for_expr(c, CHILD(ch, 2));
2847n/a if (!e)
2848n/a return NULL;
2849n/a kw = keyword(key, e, c->c_arena);
2850n/a if (!kw)
2851n/a return NULL;
2852n/a asdl_seq_SET(keywords, nkeywords++, kw);
2853n/a }
2854n/a }
2855n/a }
2856n/a
2857n/a return Call(func, args, keywords, func->lineno, func->col_offset, c->c_arena);
2858n/a}
2859n/a
2860n/astatic expr_ty
2861n/aast_for_testlist(struct compiling *c, const node* n)
2862n/a{
2863n/a /* testlist_comp: test (comp_for | (',' test)* [',']) */
2864n/a /* testlist: test (',' test)* [','] */
2865n/a assert(NCH(n) > 0);
2866n/a if (TYPE(n) == testlist_comp) {
2867n/a if (NCH(n) > 1)
2868n/a assert(TYPE(CHILD(n, 1)) != comp_for);
2869n/a }
2870n/a else {
2871n/a assert(TYPE(n) == testlist ||
2872n/a TYPE(n) == testlist_star_expr);
2873n/a }
2874n/a if (NCH(n) == 1)
2875n/a return ast_for_expr(c, CHILD(n, 0));
2876n/a else {
2877n/a asdl_seq *tmp = seq_for_testlist(c, n);
2878n/a if (!tmp)
2879n/a return NULL;
2880n/a return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
2881n/a }
2882n/a}
2883n/a
2884n/astatic stmt_ty
2885n/aast_for_expr_stmt(struct compiling *c, const node *n)
2886n/a{
2887n/a REQ(n, expr_stmt);
2888n/a /* expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
2889n/a ('=' (yield_expr|testlist_star_expr))*)
2890n/a annassign: ':' test ['=' test]
2891n/a testlist_star_expr: (test|star_expr) (',' test|star_expr)* [',']
2892n/a augassign: '+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^='
2893n/a | '<<=' | '>>=' | '**=' | '//='
2894n/a test: ... here starts the operator precedence dance
2895n/a */
2896n/a
2897n/a if (NCH(n) == 1) {
2898n/a expr_ty e = ast_for_testlist(c, CHILD(n, 0));
2899n/a if (!e)
2900n/a return NULL;
2901n/a
2902n/a return Expr(e, LINENO(n), n->n_col_offset, c->c_arena);
2903n/a }
2904n/a else if (TYPE(CHILD(n, 1)) == augassign) {
2905n/a expr_ty expr1, expr2;
2906n/a operator_ty newoperator;
2907n/a node *ch = CHILD(n, 0);
2908n/a
2909n/a expr1 = ast_for_testlist(c, ch);
2910n/a if (!expr1)
2911n/a return NULL;
2912n/a if(!set_context(c, expr1, Store, ch))
2913n/a return NULL;
2914n/a /* set_context checks that most expressions are not the left side.
2915n/a Augmented assignments can only have a name, a subscript, or an
2916n/a attribute on the left, though, so we have to explicitly check for
2917n/a those. */
2918n/a switch (expr1->kind) {
2919n/a case Name_kind:
2920n/a case Attribute_kind:
2921n/a case Subscript_kind:
2922n/a break;
2923n/a default:
2924n/a ast_error(c, ch, "illegal expression for augmented assignment");
2925n/a return NULL;
2926n/a }
2927n/a
2928n/a ch = CHILD(n, 2);
2929n/a if (TYPE(ch) == testlist)
2930n/a expr2 = ast_for_testlist(c, ch);
2931n/a else
2932n/a expr2 = ast_for_expr(c, ch);
2933n/a if (!expr2)
2934n/a return NULL;
2935n/a
2936n/a newoperator = ast_for_augassign(c, CHILD(n, 1));
2937n/a if (!newoperator)
2938n/a return NULL;
2939n/a
2940n/a return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset, c->c_arena);
2941n/a }
2942n/a else if (TYPE(CHILD(n, 1)) == annassign) {
2943n/a expr_ty expr1, expr2, expr3;
2944n/a node *ch = CHILD(n, 0);
2945n/a node *deep, *ann = CHILD(n, 1);
2946n/a int simple = 1;
2947n/a
2948n/a /* we keep track of parens to qualify (x) as expression not name */
2949n/a deep = ch;
2950n/a while (NCH(deep) == 1) {
2951n/a deep = CHILD(deep, 0);
2952n/a }
2953n/a if (NCH(deep) > 0 && TYPE(CHILD(deep, 0)) == LPAR) {
2954n/a simple = 0;
2955n/a }
2956n/a expr1 = ast_for_testlist(c, ch);
2957n/a if (!expr1) {
2958n/a return NULL;
2959n/a }
2960n/a switch (expr1->kind) {
2961n/a case Name_kind:
2962n/a if (forbidden_name(c, expr1->v.Name.id, n, 0)) {
2963n/a return NULL;
2964n/a }
2965n/a expr1->v.Name.ctx = Store;
2966n/a break;
2967n/a case Attribute_kind:
2968n/a if (forbidden_name(c, expr1->v.Attribute.attr, n, 1)) {
2969n/a return NULL;
2970n/a }
2971n/a expr1->v.Attribute.ctx = Store;
2972n/a break;
2973n/a case Subscript_kind:
2974n/a expr1->v.Subscript.ctx = Store;
2975n/a break;
2976n/a case List_kind:
2977n/a ast_error(c, ch,
2978n/a "only single target (not list) can be annotated");
2979n/a return NULL;
2980n/a case Tuple_kind:
2981n/a ast_error(c, ch,
2982n/a "only single target (not tuple) can be annotated");
2983n/a return NULL;
2984n/a default:
2985n/a ast_error(c, ch,
2986n/a "illegal target for annotation");
2987n/a return NULL;
2988n/a }
2989n/a
2990n/a if (expr1->kind != Name_kind) {
2991n/a simple = 0;
2992n/a }
2993n/a ch = CHILD(ann, 1);
2994n/a expr2 = ast_for_expr(c, ch);
2995n/a if (!expr2) {
2996n/a return NULL;
2997n/a }
2998n/a if (NCH(ann) == 2) {
2999n/a return AnnAssign(expr1, expr2, NULL, simple,
3000n/a LINENO(n), n->n_col_offset, c->c_arena);
3001n/a }
3002n/a else {
3003n/a ch = CHILD(ann, 3);
3004n/a expr3 = ast_for_expr(c, ch);
3005n/a if (!expr3) {
3006n/a return NULL;
3007n/a }
3008n/a return AnnAssign(expr1, expr2, expr3, simple,
3009n/a LINENO(n), n->n_col_offset, c->c_arena);
3010n/a }
3011n/a }
3012n/a else {
3013n/a int i;
3014n/a asdl_seq *targets;
3015n/a node *value;
3016n/a expr_ty expression;
3017n/a
3018n/a /* a normal assignment */
3019n/a REQ(CHILD(n, 1), EQUAL);
3020n/a targets = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3021n/a if (!targets)
3022n/a return NULL;
3023n/a for (i = 0; i < NCH(n) - 2; i += 2) {
3024n/a expr_ty e;
3025n/a node *ch = CHILD(n, i);
3026n/a if (TYPE(ch) == yield_expr) {
3027n/a ast_error(c, ch, "assignment to yield expression not possible");
3028n/a return NULL;
3029n/a }
3030n/a e = ast_for_testlist(c, ch);
3031n/a if (!e)
3032n/a return NULL;
3033n/a
3034n/a /* set context to assign */
3035n/a if (!set_context(c, e, Store, CHILD(n, i)))
3036n/a return NULL;
3037n/a
3038n/a asdl_seq_SET(targets, i / 2, e);
3039n/a }
3040n/a value = CHILD(n, NCH(n) - 1);
3041n/a if (TYPE(value) == testlist_star_expr)
3042n/a expression = ast_for_testlist(c, value);
3043n/a else
3044n/a expression = ast_for_expr(c, value);
3045n/a if (!expression)
3046n/a return NULL;
3047n/a return Assign(targets, expression, LINENO(n), n->n_col_offset, c->c_arena);
3048n/a }
3049n/a}
3050n/a
3051n/a
3052n/astatic asdl_seq *
3053n/aast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
3054n/a{
3055n/a asdl_seq *seq;
3056n/a int i;
3057n/a expr_ty e;
3058n/a
3059n/a REQ(n, exprlist);
3060n/a
3061n/a seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3062n/a if (!seq)
3063n/a return NULL;
3064n/a for (i = 0; i < NCH(n); i += 2) {
3065n/a e = ast_for_expr(c, CHILD(n, i));
3066n/a if (!e)
3067n/a return NULL;
3068n/a asdl_seq_SET(seq, i / 2, e);
3069n/a if (context && !set_context(c, e, context, CHILD(n, i)))
3070n/a return NULL;
3071n/a }
3072n/a return seq;
3073n/a}
3074n/a
3075n/astatic stmt_ty
3076n/aast_for_del_stmt(struct compiling *c, const node *n)
3077n/a{
3078n/a asdl_seq *expr_list;
3079n/a
3080n/a /* del_stmt: 'del' exprlist */
3081n/a REQ(n, del_stmt);
3082n/a
3083n/a expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
3084n/a if (!expr_list)
3085n/a return NULL;
3086n/a return Delete(expr_list, LINENO(n), n->n_col_offset, c->c_arena);
3087n/a}
3088n/a
3089n/astatic stmt_ty
3090n/aast_for_flow_stmt(struct compiling *c, const node *n)
3091n/a{
3092n/a /*
3093n/a flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
3094n/a | yield_stmt
3095n/a break_stmt: 'break'
3096n/a continue_stmt: 'continue'
3097n/a return_stmt: 'return' [testlist]
3098n/a yield_stmt: yield_expr
3099n/a yield_expr: 'yield' testlist | 'yield' 'from' test
3100n/a raise_stmt: 'raise' [test [',' test [',' test]]]
3101n/a */
3102n/a node *ch;
3103n/a
3104n/a REQ(n, flow_stmt);
3105n/a ch = CHILD(n, 0);
3106n/a switch (TYPE(ch)) {
3107n/a case break_stmt:
3108n/a return Break(LINENO(n), n->n_col_offset, c->c_arena);
3109n/a case continue_stmt:
3110n/a return Continue(LINENO(n), n->n_col_offset, c->c_arena);
3111n/a case yield_stmt: { /* will reduce to yield_expr */
3112n/a expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
3113n/a if (!exp)
3114n/a return NULL;
3115n/a return Expr(exp, LINENO(n), n->n_col_offset, c->c_arena);
3116n/a }
3117n/a case return_stmt:
3118n/a if (NCH(ch) == 1)
3119n/a return Return(NULL, LINENO(n), n->n_col_offset, c->c_arena);
3120n/a else {
3121n/a expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
3122n/a if (!expression)
3123n/a return NULL;
3124n/a return Return(expression, LINENO(n), n->n_col_offset, c->c_arena);
3125n/a }
3126n/a case raise_stmt:
3127n/a if (NCH(ch) == 1)
3128n/a return Raise(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
3129n/a else if (NCH(ch) >= 2) {
3130n/a expr_ty cause = NULL;
3131n/a expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
3132n/a if (!expression)
3133n/a return NULL;
3134n/a if (NCH(ch) == 4) {
3135n/a cause = ast_for_expr(c, CHILD(ch, 3));
3136n/a if (!cause)
3137n/a return NULL;
3138n/a }
3139n/a return Raise(expression, cause, LINENO(n), n->n_col_offset, c->c_arena);
3140n/a }
3141n/a default:
3142n/a PyErr_Format(PyExc_SystemError,
3143n/a "unexpected flow_stmt: %d", TYPE(ch));
3144n/a return NULL;
3145n/a }
3146n/a}
3147n/a
3148n/astatic alias_ty
3149n/aalias_for_import_name(struct compiling *c, const node *n, int store)
3150n/a{
3151n/a /*
3152n/a import_as_name: NAME ['as' NAME]
3153n/a dotted_as_name: dotted_name ['as' NAME]
3154n/a dotted_name: NAME ('.' NAME)*
3155n/a */
3156n/a identifier str, name;
3157n/a
3158n/a loop:
3159n/a switch (TYPE(n)) {
3160n/a case import_as_name: {
3161n/a node *name_node = CHILD(n, 0);
3162n/a str = NULL;
3163n/a name = NEW_IDENTIFIER(name_node);
3164n/a if (!name)
3165n/a return NULL;
3166n/a if (NCH(n) == 3) {
3167n/a node *str_node = CHILD(n, 2);
3168n/a str = NEW_IDENTIFIER(str_node);
3169n/a if (!str)
3170n/a return NULL;
3171n/a if (store && forbidden_name(c, str, str_node, 0))
3172n/a return NULL;
3173n/a }
3174n/a else {
3175n/a if (forbidden_name(c, name, name_node, 0))
3176n/a return NULL;
3177n/a }
3178n/a return alias(name, str, c->c_arena);
3179n/a }
3180n/a case dotted_as_name:
3181n/a if (NCH(n) == 1) {
3182n/a n = CHILD(n, 0);
3183n/a goto loop;
3184n/a }
3185n/a else {
3186n/a node *asname_node = CHILD(n, 2);
3187n/a alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
3188n/a if (!a)
3189n/a return NULL;
3190n/a assert(!a->asname);
3191n/a a->asname = NEW_IDENTIFIER(asname_node);
3192n/a if (!a->asname)
3193n/a return NULL;
3194n/a if (forbidden_name(c, a->asname, asname_node, 0))
3195n/a return NULL;
3196n/a return a;
3197n/a }
3198n/a break;
3199n/a case dotted_name:
3200n/a if (NCH(n) == 1) {
3201n/a node *name_node = CHILD(n, 0);
3202n/a name = NEW_IDENTIFIER(name_node);
3203n/a if (!name)
3204n/a return NULL;
3205n/a if (store && forbidden_name(c, name, name_node, 0))
3206n/a return NULL;
3207n/a return alias(name, NULL, c->c_arena);
3208n/a }
3209n/a else {
3210n/a /* Create a string of the form "a.b.c" */
3211n/a int i;
3212n/a size_t len;
3213n/a char *s;
3214n/a PyObject *uni;
3215n/a
3216n/a len = 0;
3217n/a for (i = 0; i < NCH(n); i += 2)
3218n/a /* length of string plus one for the dot */
3219n/a len += strlen(STR(CHILD(n, i))) + 1;
3220n/a len--; /* the last name doesn't have a dot */
3221n/a str = PyBytes_FromStringAndSize(NULL, len);
3222n/a if (!str)
3223n/a return NULL;
3224n/a s = PyBytes_AS_STRING(str);
3225n/a if (!s)
3226n/a return NULL;
3227n/a for (i = 0; i < NCH(n); i += 2) {
3228n/a char *sch = STR(CHILD(n, i));
3229n/a strcpy(s, STR(CHILD(n, i)));
3230n/a s += strlen(sch);
3231n/a *s++ = '.';
3232n/a }
3233n/a --s;
3234n/a *s = '\0';
3235n/a uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str),
3236n/a PyBytes_GET_SIZE(str),
3237n/a NULL);
3238n/a Py_DECREF(str);
3239n/a if (!uni)
3240n/a return NULL;
3241n/a str = uni;
3242n/a PyUnicode_InternInPlace(&str);
3243n/a if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3244n/a Py_DECREF(str);
3245n/a return NULL;
3246n/a }
3247n/a return alias(str, NULL, c->c_arena);
3248n/a }
3249n/a break;
3250n/a case STAR:
3251n/a str = PyUnicode_InternFromString("*");
3252n/a if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3253n/a Py_DECREF(str);
3254n/a return NULL;
3255n/a }
3256n/a return alias(str, NULL, c->c_arena);
3257n/a default:
3258n/a PyErr_Format(PyExc_SystemError,
3259n/a "unexpected import name: %d", TYPE(n));
3260n/a return NULL;
3261n/a }
3262n/a
3263n/a PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
3264n/a return NULL;
3265n/a}
3266n/a
3267n/astatic stmt_ty
3268n/aast_for_import_stmt(struct compiling *c, const node *n)
3269n/a{
3270n/a /*
3271n/a import_stmt: import_name | import_from
3272n/a import_name: 'import' dotted_as_names
3273n/a import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+)
3274n/a 'import' ('*' | '(' import_as_names ')' | import_as_names)
3275n/a */
3276n/a int lineno;
3277n/a int col_offset;
3278n/a int i;
3279n/a asdl_seq *aliases;
3280n/a
3281n/a REQ(n, import_stmt);
3282n/a lineno = LINENO(n);
3283n/a col_offset = n->n_col_offset;
3284n/a n = CHILD(n, 0);
3285n/a if (TYPE(n) == import_name) {
3286n/a n = CHILD(n, 1);
3287n/a REQ(n, dotted_as_names);
3288n/a aliases = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3289n/a if (!aliases)
3290n/a return NULL;
3291n/a for (i = 0; i < NCH(n); i += 2) {
3292n/a alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3293n/a if (!import_alias)
3294n/a return NULL;
3295n/a asdl_seq_SET(aliases, i / 2, import_alias);
3296n/a }
3297n/a return Import(aliases, lineno, col_offset, c->c_arena);
3298n/a }
3299n/a else if (TYPE(n) == import_from) {
3300n/a int n_children;
3301n/a int idx, ndots = 0;
3302n/a alias_ty mod = NULL;
3303n/a identifier modname = NULL;
3304n/a
3305n/a /* Count the number of dots (for relative imports) and check for the
3306n/a optional module name */
3307n/a for (idx = 1; idx < NCH(n); idx++) {
3308n/a if (TYPE(CHILD(n, idx)) == dotted_name) {
3309n/a mod = alias_for_import_name(c, CHILD(n, idx), 0);
3310n/a if (!mod)
3311n/a return NULL;
3312n/a idx++;
3313n/a break;
3314n/a } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) {
3315n/a /* three consecutive dots are tokenized as one ELLIPSIS */
3316n/a ndots += 3;
3317n/a continue;
3318n/a } else if (TYPE(CHILD(n, idx)) != DOT) {
3319n/a break;
3320n/a }
3321n/a ndots++;
3322n/a }
3323n/a idx++; /* skip over the 'import' keyword */
3324n/a switch (TYPE(CHILD(n, idx))) {
3325n/a case STAR:
3326n/a /* from ... import * */
3327n/a n = CHILD(n, idx);
3328n/a n_children = 1;
3329n/a break;
3330n/a case LPAR:
3331n/a /* from ... import (x, y, z) */
3332n/a n = CHILD(n, idx + 1);
3333n/a n_children = NCH(n);
3334n/a break;
3335n/a case import_as_names:
3336n/a /* from ... import x, y, z */
3337n/a n = CHILD(n, idx);
3338n/a n_children = NCH(n);
3339n/a if (n_children % 2 == 0) {
3340n/a ast_error(c, n, "trailing comma not allowed without"
3341n/a " surrounding parentheses");
3342n/a return NULL;
3343n/a }
3344n/a break;
3345n/a default:
3346n/a ast_error(c, n, "Unexpected node-type in from-import");
3347n/a return NULL;
3348n/a }
3349n/a
3350n/a aliases = _Py_asdl_seq_new((n_children + 1) / 2, c->c_arena);
3351n/a if (!aliases)
3352n/a return NULL;
3353n/a
3354n/a /* handle "from ... import *" special b/c there's no children */
3355n/a if (TYPE(n) == STAR) {
3356n/a alias_ty import_alias = alias_for_import_name(c, n, 1);
3357n/a if (!import_alias)
3358n/a return NULL;
3359n/a asdl_seq_SET(aliases, 0, import_alias);
3360n/a }
3361n/a else {
3362n/a for (i = 0; i < NCH(n); i += 2) {
3363n/a alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3364n/a if (!import_alias)
3365n/a return NULL;
3366n/a asdl_seq_SET(aliases, i / 2, import_alias);
3367n/a }
3368n/a }
3369n/a if (mod != NULL)
3370n/a modname = mod->name;
3371n/a return ImportFrom(modname, aliases, ndots, lineno, col_offset,
3372n/a c->c_arena);
3373n/a }
3374n/a PyErr_Format(PyExc_SystemError,
3375n/a "unknown import statement: starts with command '%s'",
3376n/a STR(CHILD(n, 0)));
3377n/a return NULL;
3378n/a}
3379n/a
3380n/astatic stmt_ty
3381n/aast_for_global_stmt(struct compiling *c, const node *n)
3382n/a{
3383n/a /* global_stmt: 'global' NAME (',' NAME)* */
3384n/a identifier name;
3385n/a asdl_seq *s;
3386n/a int i;
3387n/a
3388n/a REQ(n, global_stmt);
3389n/a s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3390n/a if (!s)
3391n/a return NULL;
3392n/a for (i = 1; i < NCH(n); i += 2) {
3393n/a name = NEW_IDENTIFIER(CHILD(n, i));
3394n/a if (!name)
3395n/a return NULL;
3396n/a asdl_seq_SET(s, i / 2, name);
3397n/a }
3398n/a return Global(s, LINENO(n), n->n_col_offset, c->c_arena);
3399n/a}
3400n/a
3401n/astatic stmt_ty
3402n/aast_for_nonlocal_stmt(struct compiling *c, const node *n)
3403n/a{
3404n/a /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */
3405n/a identifier name;
3406n/a asdl_seq *s;
3407n/a int i;
3408n/a
3409n/a REQ(n, nonlocal_stmt);
3410n/a s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3411n/a if (!s)
3412n/a return NULL;
3413n/a for (i = 1; i < NCH(n); i += 2) {
3414n/a name = NEW_IDENTIFIER(CHILD(n, i));
3415n/a if (!name)
3416n/a return NULL;
3417n/a asdl_seq_SET(s, i / 2, name);
3418n/a }
3419n/a return Nonlocal(s, LINENO(n), n->n_col_offset, c->c_arena);
3420n/a}
3421n/a
3422n/astatic stmt_ty
3423n/aast_for_assert_stmt(struct compiling *c, const node *n)
3424n/a{
3425n/a /* assert_stmt: 'assert' test [',' test] */
3426n/a REQ(n, assert_stmt);
3427n/a if (NCH(n) == 2) {
3428n/a expr_ty expression = ast_for_expr(c, CHILD(n, 1));
3429n/a if (!expression)
3430n/a return NULL;
3431n/a return Assert(expression, NULL, LINENO(n), n->n_col_offset, c->c_arena);
3432n/a }
3433n/a else if (NCH(n) == 4) {
3434n/a expr_ty expr1, expr2;
3435n/a
3436n/a expr1 = ast_for_expr(c, CHILD(n, 1));
3437n/a if (!expr1)
3438n/a return NULL;
3439n/a expr2 = ast_for_expr(c, CHILD(n, 3));
3440n/a if (!expr2)
3441n/a return NULL;
3442n/a
3443n/a return Assert(expr1, expr2, LINENO(n), n->n_col_offset, c->c_arena);
3444n/a }
3445n/a PyErr_Format(PyExc_SystemError,
3446n/a "improper number of parts to 'assert' statement: %d",
3447n/a NCH(n));
3448n/a return NULL;
3449n/a}
3450n/a
3451n/astatic asdl_seq *
3452n/aast_for_suite(struct compiling *c, const node *n)
3453n/a{
3454n/a /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
3455n/a asdl_seq *seq;
3456n/a stmt_ty s;
3457n/a int i, total, num, end, pos = 0;
3458n/a node *ch;
3459n/a
3460n/a REQ(n, suite);
3461n/a
3462n/a total = num_stmts(n);
3463n/a seq = _Py_asdl_seq_new(total, c->c_arena);
3464n/a if (!seq)
3465n/a return NULL;
3466n/a if (TYPE(CHILD(n, 0)) == simple_stmt) {
3467n/a n = CHILD(n, 0);
3468n/a /* simple_stmt always ends with a NEWLINE,
3469n/a and may have a trailing SEMI
3470n/a */
3471n/a end = NCH(n) - 1;
3472n/a if (TYPE(CHILD(n, end - 1)) == SEMI)
3473n/a end--;
3474n/a /* loop by 2 to skip semi-colons */
3475n/a for (i = 0; i < end; i += 2) {
3476n/a ch = CHILD(n, i);
3477n/a s = ast_for_stmt(c, ch);
3478n/a if (!s)
3479n/a return NULL;
3480n/a asdl_seq_SET(seq, pos++, s);
3481n/a }
3482n/a }
3483n/a else {
3484n/a for (i = 2; i < (NCH(n) - 1); i++) {
3485n/a ch = CHILD(n, i);
3486n/a REQ(ch, stmt);
3487n/a num = num_stmts(ch);
3488n/a if (num == 1) {
3489n/a /* small_stmt or compound_stmt with only one child */
3490n/a s = ast_for_stmt(c, ch);
3491n/a if (!s)
3492n/a return NULL;
3493n/a asdl_seq_SET(seq, pos++, s);
3494n/a }
3495n/a else {
3496n/a int j;
3497n/a ch = CHILD(ch, 0);
3498n/a REQ(ch, simple_stmt);
3499n/a for (j = 0; j < NCH(ch); j += 2) {
3500n/a /* statement terminates with a semi-colon ';' */
3501n/a if (NCH(CHILD(ch, j)) == 0) {
3502n/a assert((j + 1) == NCH(ch));
3503n/a break;
3504n/a }
3505n/a s = ast_for_stmt(c, CHILD(ch, j));
3506n/a if (!s)
3507n/a return NULL;
3508n/a asdl_seq_SET(seq, pos++, s);
3509n/a }
3510n/a }
3511n/a }
3512n/a }
3513n/a assert(pos == seq->size);
3514n/a return seq;
3515n/a}
3516n/a
3517n/astatic stmt_ty
3518n/aast_for_if_stmt(struct compiling *c, const node *n)
3519n/a{
3520n/a /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
3521n/a ['else' ':' suite]
3522n/a */
3523n/a char *s;
3524n/a
3525n/a REQ(n, if_stmt);
3526n/a
3527n/a if (NCH(n) == 4) {
3528n/a expr_ty expression;
3529n/a asdl_seq *suite_seq;
3530n/a
3531n/a expression = ast_for_expr(c, CHILD(n, 1));
3532n/a if (!expression)
3533n/a return NULL;
3534n/a suite_seq = ast_for_suite(c, CHILD(n, 3));
3535n/a if (!suite_seq)
3536n/a return NULL;
3537n/a
3538n/a return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
3539n/a c->c_arena);
3540n/a }
3541n/a
3542n/a s = STR(CHILD(n, 4));
3543n/a /* s[2], the third character in the string, will be
3544n/a 's' for el_s_e, or
3545n/a 'i' for el_i_f
3546n/a */
3547n/a if (s[2] == 's') {
3548n/a expr_ty expression;
3549n/a asdl_seq *seq1, *seq2;
3550n/a
3551n/a expression = ast_for_expr(c, CHILD(n, 1));
3552n/a if (!expression)
3553n/a return NULL;
3554n/a seq1 = ast_for_suite(c, CHILD(n, 3));
3555n/a if (!seq1)
3556n/a return NULL;
3557n/a seq2 = ast_for_suite(c, CHILD(n, 6));
3558n/a if (!seq2)
3559n/a return NULL;
3560n/a
3561n/a return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
3562n/a c->c_arena);
3563n/a }
3564n/a else if (s[2] == 'i') {
3565n/a int i, n_elif, has_else = 0;
3566n/a expr_ty expression;
3567n/a asdl_seq *suite_seq;
3568n/a asdl_seq *orelse = NULL;
3569n/a n_elif = NCH(n) - 4;
3570n/a /* must reference the child n_elif+1 since 'else' token is third,
3571n/a not fourth, child from the end. */
3572n/a if (TYPE(CHILD(n, (n_elif + 1))) == NAME
3573n/a && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
3574n/a has_else = 1;
3575n/a n_elif -= 3;
3576n/a }
3577n/a n_elif /= 4;
3578n/a
3579n/a if (has_else) {
3580n/a asdl_seq *suite_seq2;
3581n/a
3582n/a orelse = _Py_asdl_seq_new(1, c->c_arena);
3583n/a if (!orelse)
3584n/a return NULL;
3585n/a expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
3586n/a if (!expression)
3587n/a return NULL;
3588n/a suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
3589n/a if (!suite_seq)
3590n/a return NULL;
3591n/a suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
3592n/a if (!suite_seq2)
3593n/a return NULL;
3594n/a
3595n/a asdl_seq_SET(orelse, 0,
3596n/a If(expression, suite_seq, suite_seq2,
3597n/a LINENO(CHILD(n, NCH(n) - 6)),
3598n/a CHILD(n, NCH(n) - 6)->n_col_offset,
3599n/a c->c_arena));
3600n/a /* the just-created orelse handled the last elif */
3601n/a n_elif--;
3602n/a }
3603n/a
3604n/a for (i = 0; i < n_elif; i++) {
3605n/a int off = 5 + (n_elif - i - 1) * 4;
3606n/a asdl_seq *newobj = _Py_asdl_seq_new(1, c->c_arena);
3607n/a if (!newobj)
3608n/a return NULL;
3609n/a expression = ast_for_expr(c, CHILD(n, off));
3610n/a if (!expression)
3611n/a return NULL;
3612n/a suite_seq = ast_for_suite(c, CHILD(n, off + 2));
3613n/a if (!suite_seq)
3614n/a return NULL;
3615n/a
3616n/a asdl_seq_SET(newobj, 0,
3617n/a If(expression, suite_seq, orelse,
3618n/a LINENO(CHILD(n, off)),
3619n/a CHILD(n, off)->n_col_offset, c->c_arena));
3620n/a orelse = newobj;
3621n/a }
3622n/a expression = ast_for_expr(c, CHILD(n, 1));
3623n/a if (!expression)
3624n/a return NULL;
3625n/a suite_seq = ast_for_suite(c, CHILD(n, 3));
3626n/a if (!suite_seq)
3627n/a return NULL;
3628n/a return If(expression, suite_seq, orelse,
3629n/a LINENO(n), n->n_col_offset, c->c_arena);
3630n/a }
3631n/a
3632n/a PyErr_Format(PyExc_SystemError,
3633n/a "unexpected token in 'if' statement: %s", s);
3634n/a return NULL;
3635n/a}
3636n/a
3637n/astatic stmt_ty
3638n/aast_for_while_stmt(struct compiling *c, const node *n)
3639n/a{
3640n/a /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
3641n/a REQ(n, while_stmt);
3642n/a
3643n/a if (NCH(n) == 4) {
3644n/a expr_ty expression;
3645n/a asdl_seq *suite_seq;
3646n/a
3647n/a expression = ast_for_expr(c, CHILD(n, 1));
3648n/a if (!expression)
3649n/a return NULL;
3650n/a suite_seq = ast_for_suite(c, CHILD(n, 3));
3651n/a if (!suite_seq)
3652n/a return NULL;
3653n/a return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset, c->c_arena);
3654n/a }
3655n/a else if (NCH(n) == 7) {
3656n/a expr_ty expression;
3657n/a asdl_seq *seq1, *seq2;
3658n/a
3659n/a expression = ast_for_expr(c, CHILD(n, 1));
3660n/a if (!expression)
3661n/a return NULL;
3662n/a seq1 = ast_for_suite(c, CHILD(n, 3));
3663n/a if (!seq1)
3664n/a return NULL;
3665n/a seq2 = ast_for_suite(c, CHILD(n, 6));
3666n/a if (!seq2)
3667n/a return NULL;
3668n/a
3669n/a return While(expression, seq1, seq2, LINENO(n), n->n_col_offset, c->c_arena);
3670n/a }
3671n/a
3672n/a PyErr_Format(PyExc_SystemError,
3673n/a "wrong number of tokens for 'while' statement: %d",
3674n/a NCH(n));
3675n/a return NULL;
3676n/a}
3677n/a
3678n/astatic stmt_ty
3679n/aast_for_for_stmt(struct compiling *c, const node *n, int is_async)
3680n/a{
3681n/a asdl_seq *_target, *seq = NULL, *suite_seq;
3682n/a expr_ty expression;
3683n/a expr_ty target, first;
3684n/a const node *node_target;
3685n/a /* for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] */
3686n/a REQ(n, for_stmt);
3687n/a
3688n/a if (NCH(n) == 9) {
3689n/a seq = ast_for_suite(c, CHILD(n, 8));
3690n/a if (!seq)
3691n/a return NULL;
3692n/a }
3693n/a
3694n/a node_target = CHILD(n, 1);
3695n/a _target = ast_for_exprlist(c, node_target, Store);
3696n/a if (!_target)
3697n/a return NULL;
3698n/a /* Check the # of children rather than the length of _target, since
3699n/a for x, in ... has 1 element in _target, but still requires a Tuple. */
3700n/a first = (expr_ty)asdl_seq_GET(_target, 0);
3701n/a if (NCH(node_target) == 1)
3702n/a target = first;
3703n/a else
3704n/a target = Tuple(_target, Store, first->lineno, first->col_offset, c->c_arena);
3705n/a
3706n/a expression = ast_for_testlist(c, CHILD(n, 3));
3707n/a if (!expression)
3708n/a return NULL;
3709n/a suite_seq = ast_for_suite(c, CHILD(n, 5));
3710n/a if (!suite_seq)
3711n/a return NULL;
3712n/a
3713n/a if (is_async)
3714n/a return AsyncFor(target, expression, suite_seq, seq,
3715n/a LINENO(n), n->n_col_offset,
3716n/a c->c_arena);
3717n/a else
3718n/a return For(target, expression, suite_seq, seq,
3719n/a LINENO(n), n->n_col_offset,
3720n/a c->c_arena);
3721n/a}
3722n/a
3723n/astatic excepthandler_ty
3724n/aast_for_except_clause(struct compiling *c, const node *exc, node *body)
3725n/a{
3726n/a /* except_clause: 'except' [test ['as' test]] */
3727n/a REQ(exc, except_clause);
3728n/a REQ(body, suite);
3729n/a
3730n/a if (NCH(exc) == 1) {
3731n/a asdl_seq *suite_seq = ast_for_suite(c, body);
3732n/a if (!suite_seq)
3733n/a return NULL;
3734n/a
3735n/a return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
3736n/a exc->n_col_offset, c->c_arena);
3737n/a }
3738n/a else if (NCH(exc) == 2) {
3739n/a expr_ty expression;
3740n/a asdl_seq *suite_seq;
3741n/a
3742n/a expression = ast_for_expr(c, CHILD(exc, 1));
3743n/a if (!expression)
3744n/a return NULL;
3745n/a suite_seq = ast_for_suite(c, body);
3746n/a if (!suite_seq)
3747n/a return NULL;
3748n/a
3749n/a return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
3750n/a exc->n_col_offset, c->c_arena);
3751n/a }
3752n/a else if (NCH(exc) == 4) {
3753n/a asdl_seq *suite_seq;
3754n/a expr_ty expression;
3755n/a identifier e = NEW_IDENTIFIER(CHILD(exc, 3));
3756n/a if (!e)
3757n/a return NULL;
3758n/a if (forbidden_name(c, e, CHILD(exc, 3), 0))
3759n/a return NULL;
3760n/a expression = ast_for_expr(c, CHILD(exc, 1));
3761n/a if (!expression)
3762n/a return NULL;
3763n/a suite_seq = ast_for_suite(c, body);
3764n/a if (!suite_seq)
3765n/a return NULL;
3766n/a
3767n/a return ExceptHandler(expression, e, suite_seq, LINENO(exc),
3768n/a exc->n_col_offset, c->c_arena);
3769n/a }
3770n/a
3771n/a PyErr_Format(PyExc_SystemError,
3772n/a "wrong number of children for 'except' clause: %d",
3773n/a NCH(exc));
3774n/a return NULL;
3775n/a}
3776n/a
3777n/astatic stmt_ty
3778n/aast_for_try_stmt(struct compiling *c, const node *n)
3779n/a{
3780n/a const int nch = NCH(n);
3781n/a int n_except = (nch - 3)/3;
3782n/a asdl_seq *body, *handlers = NULL, *orelse = NULL, *finally = NULL;
3783n/a
3784n/a REQ(n, try_stmt);
3785n/a
3786n/a body = ast_for_suite(c, CHILD(n, 2));
3787n/a if (body == NULL)
3788n/a return NULL;
3789n/a
3790n/a if (TYPE(CHILD(n, nch - 3)) == NAME) {
3791n/a if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
3792n/a if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
3793n/a /* we can assume it's an "else",
3794n/a because nch >= 9 for try-else-finally and
3795n/a it would otherwise have a type of except_clause */
3796n/a orelse = ast_for_suite(c, CHILD(n, nch - 4));
3797n/a if (orelse == NULL)
3798n/a return NULL;
3799n/a n_except--;
3800n/a }
3801n/a
3802n/a finally = ast_for_suite(c, CHILD(n, nch - 1));
3803n/a if (finally == NULL)
3804n/a return NULL;
3805n/a n_except--;
3806n/a }
3807n/a else {
3808n/a /* we can assume it's an "else",
3809n/a otherwise it would have a type of except_clause */
3810n/a orelse = ast_for_suite(c, CHILD(n, nch - 1));
3811n/a if (orelse == NULL)
3812n/a return NULL;
3813n/a n_except--;
3814n/a }
3815n/a }
3816n/a else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
3817n/a ast_error(c, n, "malformed 'try' statement");
3818n/a return NULL;
3819n/a }
3820n/a
3821n/a if (n_except > 0) {
3822n/a int i;
3823n/a /* process except statements to create a try ... except */
3824n/a handlers = _Py_asdl_seq_new(n_except, c->c_arena);
3825n/a if (handlers == NULL)
3826n/a return NULL;
3827n/a
3828n/a for (i = 0; i < n_except; i++) {
3829n/a excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
3830n/a CHILD(n, 5 + i * 3));
3831n/a if (!e)
3832n/a return NULL;
3833n/a asdl_seq_SET(handlers, i, e);
3834n/a }
3835n/a }
3836n/a
3837n/a assert(finally != NULL || asdl_seq_LEN(handlers));
3838n/a return Try(body, handlers, orelse, finally, LINENO(n), n->n_col_offset, c->c_arena);
3839n/a}
3840n/a
3841n/a/* with_item: test ['as' expr] */
3842n/astatic withitem_ty
3843n/aast_for_with_item(struct compiling *c, const node *n)
3844n/a{
3845n/a expr_ty context_expr, optional_vars = NULL;
3846n/a
3847n/a REQ(n, with_item);
3848n/a context_expr = ast_for_expr(c, CHILD(n, 0));
3849n/a if (!context_expr)
3850n/a return NULL;
3851n/a if (NCH(n) == 3) {
3852n/a optional_vars = ast_for_expr(c, CHILD(n, 2));
3853n/a
3854n/a if (!optional_vars) {
3855n/a return NULL;
3856n/a }
3857n/a if (!set_context(c, optional_vars, Store, n)) {
3858n/a return NULL;
3859n/a }
3860n/a }
3861n/a
3862n/a return withitem(context_expr, optional_vars, c->c_arena);
3863n/a}
3864n/a
3865n/a/* with_stmt: 'with' with_item (',' with_item)* ':' suite */
3866n/astatic stmt_ty
3867n/aast_for_with_stmt(struct compiling *c, const node *n, int is_async)
3868n/a{
3869n/a int i, n_items;
3870n/a asdl_seq *items, *body;
3871n/a
3872n/a REQ(n, with_stmt);
3873n/a
3874n/a n_items = (NCH(n) - 2) / 2;
3875n/a items = _Py_asdl_seq_new(n_items, c->c_arena);
3876n/a if (!items)
3877n/a return NULL;
3878n/a for (i = 1; i < NCH(n) - 2; i += 2) {
3879n/a withitem_ty item = ast_for_with_item(c, CHILD(n, i));
3880n/a if (!item)
3881n/a return NULL;
3882n/a asdl_seq_SET(items, (i - 1) / 2, item);
3883n/a }
3884n/a
3885n/a body = ast_for_suite(c, CHILD(n, NCH(n) - 1));
3886n/a if (!body)
3887n/a return NULL;
3888n/a
3889n/a if (is_async)
3890n/a return AsyncWith(items, body, LINENO(n), n->n_col_offset, c->c_arena);
3891n/a else
3892n/a return With(items, body, LINENO(n), n->n_col_offset, c->c_arena);
3893n/a}
3894n/a
3895n/astatic stmt_ty
3896n/aast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
3897n/a{
3898n/a /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */
3899n/a PyObject *classname;
3900n/a asdl_seq *s;
3901n/a expr_ty call;
3902n/a
3903n/a REQ(n, classdef);
3904n/a
3905n/a if (NCH(n) == 4) { /* class NAME ':' suite */
3906n/a s = ast_for_suite(c, CHILD(n, 3));
3907n/a if (!s)
3908n/a return NULL;
3909n/a classname = NEW_IDENTIFIER(CHILD(n, 1));
3910n/a if (!classname)
3911n/a return NULL;
3912n/a if (forbidden_name(c, classname, CHILD(n, 3), 0))
3913n/a return NULL;
3914n/a return ClassDef(classname, NULL, NULL, s, decorator_seq, LINENO(n),
3915n/a n->n_col_offset, c->c_arena);
3916n/a }
3917n/a
3918n/a if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */
3919n/a s = ast_for_suite(c, CHILD(n,5));
3920n/a if (!s)
3921n/a return NULL;
3922n/a classname = NEW_IDENTIFIER(CHILD(n, 1));
3923n/a if (!classname)
3924n/a return NULL;
3925n/a if (forbidden_name(c, classname, CHILD(n, 3), 0))
3926n/a return NULL;
3927n/a return ClassDef(classname, NULL, NULL, s, decorator_seq, LINENO(n),
3928n/a n->n_col_offset, c->c_arena);
3929n/a }
3930n/a
3931n/a /* class NAME '(' arglist ')' ':' suite */
3932n/a /* build up a fake Call node so we can extract its pieces */
3933n/a {
3934n/a PyObject *dummy_name;
3935n/a expr_ty dummy;
3936n/a dummy_name = NEW_IDENTIFIER(CHILD(n, 1));
3937n/a if (!dummy_name)
3938n/a return NULL;
3939n/a dummy = Name(dummy_name, Load, LINENO(n), n->n_col_offset, c->c_arena);
3940n/a call = ast_for_call(c, CHILD(n, 3), dummy);
3941n/a if (!call)
3942n/a return NULL;
3943n/a }
3944n/a s = ast_for_suite(c, CHILD(n, 6));
3945n/a if (!s)
3946n/a return NULL;
3947n/a classname = NEW_IDENTIFIER(CHILD(n, 1));
3948n/a if (!classname)
3949n/a return NULL;
3950n/a if (forbidden_name(c, classname, CHILD(n, 1), 0))
3951n/a return NULL;
3952n/a
3953n/a return ClassDef(classname, call->v.Call.args, call->v.Call.keywords, s,
3954n/a decorator_seq, LINENO(n), n->n_col_offset, c->c_arena);
3955n/a}
3956n/a
3957n/astatic stmt_ty
3958n/aast_for_stmt(struct compiling *c, const node *n)
3959n/a{
3960n/a if (TYPE(n) == stmt) {
3961n/a assert(NCH(n) == 1);
3962n/a n = CHILD(n, 0);
3963n/a }
3964n/a if (TYPE(n) == simple_stmt) {
3965n/a assert(num_stmts(n) == 1);
3966n/a n = CHILD(n, 0);
3967n/a }
3968n/a if (TYPE(n) == small_stmt) {
3969n/a n = CHILD(n, 0);
3970n/a /* small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt
3971n/a | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
3972n/a */
3973n/a switch (TYPE(n)) {
3974n/a case expr_stmt:
3975n/a return ast_for_expr_stmt(c, n);
3976n/a case del_stmt:
3977n/a return ast_for_del_stmt(c, n);
3978n/a case pass_stmt:
3979n/a return Pass(LINENO(n), n->n_col_offset, c->c_arena);
3980n/a case flow_stmt:
3981n/a return ast_for_flow_stmt(c, n);
3982n/a case import_stmt:
3983n/a return ast_for_import_stmt(c, n);
3984n/a case global_stmt:
3985n/a return ast_for_global_stmt(c, n);
3986n/a case nonlocal_stmt:
3987n/a return ast_for_nonlocal_stmt(c, n);
3988n/a case assert_stmt:
3989n/a return ast_for_assert_stmt(c, n);
3990n/a default:
3991n/a PyErr_Format(PyExc_SystemError,
3992n/a "unhandled small_stmt: TYPE=%d NCH=%d\n",
3993n/a TYPE(n), NCH(n));
3994n/a return NULL;
3995n/a }
3996n/a }
3997n/a else {
3998n/a /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
3999n/a | funcdef | classdef | decorated | async_stmt
4000n/a */
4001n/a node *ch = CHILD(n, 0);
4002n/a REQ(n, compound_stmt);
4003n/a switch (TYPE(ch)) {
4004n/a case if_stmt:
4005n/a return ast_for_if_stmt(c, ch);
4006n/a case while_stmt:
4007n/a return ast_for_while_stmt(c, ch);
4008n/a case for_stmt:
4009n/a return ast_for_for_stmt(c, ch, 0);
4010n/a case try_stmt:
4011n/a return ast_for_try_stmt(c, ch);
4012n/a case with_stmt:
4013n/a return ast_for_with_stmt(c, ch, 0);
4014n/a case funcdef:
4015n/a return ast_for_funcdef(c, ch, NULL);
4016n/a case classdef:
4017n/a return ast_for_classdef(c, ch, NULL);
4018n/a case decorated:
4019n/a return ast_for_decorated(c, ch);
4020n/a case async_stmt:
4021n/a return ast_for_async_stmt(c, ch);
4022n/a default:
4023n/a PyErr_Format(PyExc_SystemError,
4024n/a "unhandled small_stmt: TYPE=%d NCH=%d\n",
4025n/a TYPE(n), NCH(n));
4026n/a return NULL;
4027n/a }
4028n/a }
4029n/a}
4030n/a
4031n/astatic PyObject *
4032n/aparsenumber_raw(struct compiling *c, const char *s)
4033n/a{
4034n/a const char *end;
4035n/a long x;
4036n/a double dx;
4037n/a Py_complex compl;
4038n/a int imflag;
4039n/a
4040n/a assert(s != NULL);
4041n/a errno = 0;
4042n/a end = s + strlen(s) - 1;
4043n/a imflag = *end == 'j' || *end == 'J';
4044n/a if (s[0] == '0') {
4045n/a x = (long) PyOS_strtoul(s, (char **)&end, 0);
4046n/a if (x < 0 && errno == 0) {
4047n/a return PyLong_FromString(s, (char **)0, 0);
4048n/a }
4049n/a }
4050n/a else
4051n/a x = PyOS_strtol(s, (char **)&end, 0);
4052n/a if (*end == '\0') {
4053n/a if (errno != 0)
4054n/a return PyLong_FromString(s, (char **)0, 0);
4055n/a return PyLong_FromLong(x);
4056n/a }
4057n/a /* XXX Huge floats may silently fail */
4058n/a if (imflag) {
4059n/a compl.real = 0.;
4060n/a compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
4061n/a if (compl.imag == -1.0 && PyErr_Occurred())
4062n/a return NULL;
4063n/a return PyComplex_FromCComplex(compl);
4064n/a }
4065n/a else
4066n/a {
4067n/a dx = PyOS_string_to_double(s, NULL, NULL);
4068n/a if (dx == -1.0 && PyErr_Occurred())
4069n/a return NULL;
4070n/a return PyFloat_FromDouble(dx);
4071n/a }
4072n/a}
4073n/a
4074n/astatic PyObject *
4075n/aparsenumber(struct compiling *c, const char *s)
4076n/a{
4077n/a char *dup, *end;
4078n/a PyObject *res = NULL;
4079n/a
4080n/a assert(s != NULL);
4081n/a
4082n/a if (strchr(s, '_') == NULL) {
4083n/a return parsenumber_raw(c, s);
4084n/a }
4085n/a /* Create a duplicate without underscores. */
4086n/a dup = PyMem_Malloc(strlen(s) + 1);
4087n/a end = dup;
4088n/a for (; *s; s++) {
4089n/a if (*s != '_') {
4090n/a *end++ = *s;
4091n/a }
4092n/a }
4093n/a *end = '\0';
4094n/a res = parsenumber_raw(c, dup);
4095n/a PyMem_Free(dup);
4096n/a return res;
4097n/a}
4098n/a
4099n/astatic PyObject *
4100n/adecode_utf8(struct compiling *c, const char **sPtr, const char *end)
4101n/a{
4102n/a const char *s, *t;
4103n/a t = s = *sPtr;
4104n/a /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
4105n/a while (s < end && (*s & 0x80)) s++;
4106n/a *sPtr = s;
4107n/a return PyUnicode_DecodeUTF8(t, s - t, NULL);
4108n/a}
4109n/a
4110n/astatic int
4111n/awarn_invalid_escape_sequence(struct compiling *c, const node *n,
4112n/a char first_invalid_escape_char)
4113n/a{
4114n/a PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
4115n/a first_invalid_escape_char);
4116n/a if (msg == NULL) {
4117n/a return -1;
4118n/a }
4119n/a if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
4120n/a c->c_filename, LINENO(n),
4121n/a NULL, NULL) < 0 &&
4122n/a PyErr_ExceptionMatches(PyExc_DeprecationWarning))
4123n/a {
4124n/a const char *s;
4125n/a
4126n/a /* Replace the DeprecationWarning exception with a SyntaxError
4127n/a to get a more accurate error report */
4128n/a PyErr_Clear();
4129n/a
4130n/a s = PyUnicode_AsUTF8(msg);
4131n/a if (s != NULL) {
4132n/a ast_error(c, n, s);
4133n/a }
4134n/a Py_DECREF(msg);
4135n/a return -1;
4136n/a }
4137n/a Py_DECREF(msg);
4138n/a return 0;
4139n/a}
4140n/a
4141n/astatic PyObject *
4142n/adecode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
4143n/a size_t len)
4144n/a{
4145n/a PyObject *v, *u;
4146n/a char *buf;
4147n/a char *p;
4148n/a const char *end;
4149n/a
4150n/a /* check for integer overflow */
4151n/a if (len > SIZE_MAX / 6)
4152n/a return NULL;
4153n/a /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
4154n/a "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
4155n/a u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
4156n/a if (u == NULL)
4157n/a return NULL;
4158n/a p = buf = PyBytes_AsString(u);
4159n/a end = s + len;
4160n/a while (s < end) {
4161n/a if (*s == '\\') {
4162n/a *p++ = *s++;
4163n/a if (*s & 0x80) {
4164n/a strcpy(p, "u005c");
4165n/a p += 5;
4166n/a }
4167n/a }
4168n/a if (*s & 0x80) { /* XXX inefficient */
4169n/a PyObject *w;
4170n/a int kind;
4171n/a void *data;
4172n/a Py_ssize_t len, i;
4173n/a w = decode_utf8(c, &s, end);
4174n/a if (w == NULL) {
4175n/a Py_DECREF(u);
4176n/a return NULL;
4177n/a }
4178n/a kind = PyUnicode_KIND(w);
4179n/a data = PyUnicode_DATA(w);
4180n/a len = PyUnicode_GET_LENGTH(w);
4181n/a for (i = 0; i < len; i++) {
4182n/a Py_UCS4 chr = PyUnicode_READ(kind, data, i);
4183n/a sprintf(p, "\\U%08x", chr);
4184n/a p += 10;
4185n/a }
4186n/a /* Should be impossible to overflow */
4187n/a assert(p - buf <= Py_SIZE(u));
4188n/a Py_DECREF(w);
4189n/a } else {
4190n/a *p++ = *s++;
4191n/a }
4192n/a }
4193n/a len = p - buf;
4194n/a s = buf;
4195n/a
4196n/a const char *first_invalid_escape;
4197n/a v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
4198n/a
4199n/a if (v != NULL && first_invalid_escape != NULL) {
4200n/a if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4201n/a /* We have not decref u before because first_invalid_escape points
4202n/a inside u. */
4203n/a Py_XDECREF(u);
4204n/a Py_DECREF(v);
4205n/a return NULL;
4206n/a }
4207n/a }
4208n/a Py_XDECREF(u);
4209n/a return v;
4210n/a}
4211n/a
4212n/astatic PyObject *
4213n/adecode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
4214n/a size_t len)
4215n/a{
4216n/a const char *first_invalid_escape;
4217n/a PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
4218n/a &first_invalid_escape);
4219n/a if (result == NULL)
4220n/a return NULL;
4221n/a
4222n/a if (first_invalid_escape != NULL) {
4223n/a if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4224n/a Py_DECREF(result);
4225n/a return NULL;
4226n/a }
4227n/a }
4228n/a return result;
4229n/a}
4230n/a
4231n/a/* Compile this expression in to an expr_ty. Add parens around the
4232n/a expression, in order to allow leading spaces in the expression. */
4233n/astatic expr_ty
4234n/afstring_compile_expr(const char *expr_start, const char *expr_end,
4235n/a struct compiling *c, const node *n)
4236n/a
4237n/a{
4238n/a int all_whitespace = 1;
4239n/a int kind;
4240n/a void *data;
4241n/a PyCompilerFlags cf;
4242n/a mod_ty mod;
4243n/a char *str;
4244n/a PyObject *o;
4245n/a Py_ssize_t len;
4246n/a Py_ssize_t i;
4247n/a
4248n/a assert(expr_end >= expr_start);
4249n/a assert(*(expr_start-1) == '{');
4250n/a assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':');
4251n/a
4252n/a /* We know there are no escapes here, because backslashes are not allowed,
4253n/a and we know it's utf-8 encoded (per PEP 263). But, in order to check
4254n/a that each char is not whitespace, we need to decode it to unicode.
4255n/a Which is unfortunate, but such is life. */
4256n/a
4257n/a /* If the substring is all whitespace, it's an error. We need to catch
4258n/a this here, and not when we call PyParser_ASTFromString, because turning
4259n/a the expression '' in to '()' would go from being invalid to valid. */
4260n/a /* Note that this code says an empty string is all whitespace. That's
4261n/a important. There's a test for it: f'{}'. */
4262n/a o = PyUnicode_DecodeUTF8(expr_start, expr_end-expr_start, NULL);
4263n/a if (o == NULL)
4264n/a return NULL;
4265n/a len = PyUnicode_GET_LENGTH(o);
4266n/a kind = PyUnicode_KIND(o);
4267n/a data = PyUnicode_DATA(o);
4268n/a for (i = 0; i < len; i++) {
4269n/a if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) {
4270n/a all_whitespace = 0;
4271n/a break;
4272n/a }
4273n/a }
4274n/a Py_DECREF(o);
4275n/a if (all_whitespace) {
4276n/a ast_error(c, n, "f-string: empty expression not allowed");
4277n/a return NULL;
4278n/a }
4279n/a
4280n/a /* Reuse len to be the length of the utf-8 input string. */
4281n/a len = expr_end - expr_start;
4282n/a /* Allocate 3 extra bytes: open paren, close paren, null byte. */
4283n/a str = PyMem_RawMalloc(len + 3);
4284n/a if (str == NULL)
4285n/a return NULL;
4286n/a
4287n/a str[0] = '(';
4288n/a memcpy(str+1, expr_start, len);
4289n/a str[len+1] = ')';
4290n/a str[len+2] = 0;
4291n/a
4292n/a cf.cf_flags = PyCF_ONLY_AST;
4293n/a mod = PyParser_ASTFromString(str, "<fstring>",
4294n/a Py_eval_input, &cf, c->c_arena);
4295n/a PyMem_RawFree(str);
4296n/a if (!mod)
4297n/a return NULL;
4298n/a return mod->v.Expression.body;
4299n/a}
4300n/a
4301n/a/* Return -1 on error.
4302n/a
4303n/a Return 0 if we reached the end of the literal.
4304n/a
4305n/a Return 1 if we haven't reached the end of the literal, but we want
4306n/a the caller to process the literal up to this point. Used for
4307n/a doubled braces.
4308n/a*/
4309n/astatic int
4310n/afstring_find_literal(const char **str, const char *end, int raw,
4311n/a PyObject **literal, int recurse_lvl,
4312n/a struct compiling *c, const node *n)
4313n/a{
4314n/a /* Get any literal string. It ends when we hit an un-doubled left
4315n/a brace (which isn't part of a unicode name escape such as
4316n/a "\N{EULER CONSTANT}"), or the end of the string. */
4317n/a
4318n/a const char *literal_start = *str;
4319n/a const char *literal_end;
4320n/a int in_named_escape = 0;
4321n/a int result = 0;
4322n/a
4323n/a assert(*literal == NULL);
4324n/a for (; *str < end; (*str)++) {
4325n/a char ch = **str;
4326n/a if (!in_named_escape && ch == '{' && (*str)-literal_start >= 2 &&
4327n/a *(*str-2) == '\\' && *(*str-1) == 'N') {
4328n/a in_named_escape = 1;
4329n/a } else if (in_named_escape && ch == '}') {
4330n/a in_named_escape = 0;
4331n/a } else if (ch == '{' || ch == '}') {
4332n/a /* Check for doubled braces, but only at the top level. If
4333n/a we checked at every level, then f'{0:{3}}' would fail
4334n/a with the two closing braces. */
4335n/a if (recurse_lvl == 0) {
4336n/a if (*str+1 < end && *(*str+1) == ch) {
4337n/a /* We're going to tell the caller that the literal ends
4338n/a here, but that they should continue scanning. But also
4339n/a skip over the second brace when we resume scanning. */
4340n/a literal_end = *str+1;
4341n/a *str += 2;
4342n/a result = 1;
4343n/a goto done;
4344n/a }
4345n/a
4346n/a /* Where a single '{' is the start of a new expression, a
4347n/a single '}' is not allowed. */
4348n/a if (ch == '}') {
4349n/a ast_error(c, n, "f-string: single '}' is not allowed");
4350n/a return -1;
4351n/a }
4352n/a }
4353n/a /* We're either at a '{', which means we're starting another
4354n/a expression; or a '}', which means we're at the end of this
4355n/a f-string (for a nested format_spec). */
4356n/a break;
4357n/a }
4358n/a }
4359n/a literal_end = *str;
4360n/a assert(*str <= end);
4361n/a assert(*str == end || **str == '{' || **str == '}');
4362n/adone:
4363n/a if (literal_start != literal_end) {
4364n/a if (raw)
4365n/a *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
4366n/a literal_end-literal_start,
4367n/a NULL, NULL);
4368n/a else
4369n/a *literal = decode_unicode_with_escapes(c, n, literal_start,
4370n/a literal_end-literal_start);
4371n/a if (!*literal)
4372n/a return -1;
4373n/a }
4374n/a return result;
4375n/a}
4376n/a
4377n/a/* Forward declaration because parsing is recursive. */
4378n/astatic expr_ty
4379n/afstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
4380n/a struct compiling *c, const node *n);
4381n/a
4382n/a/* Parse the f-string at *str, ending at end. We know *str starts an
4383n/a expression (so it must be a '{'). Returns the FormattedValue node,
4384n/a which includes the expression, conversion character, and
4385n/a format_spec expression.
4386n/a
4387n/a Note that I don't do a perfect job here: I don't make sure that a
4388n/a closing brace doesn't match an opening paren, for example. It
4389n/a doesn't need to error on all invalid expressions, just correctly
4390n/a find the end of all valid ones. Any errors inside the expression
4391n/a will be caught when we parse it later. */
4392n/astatic int
4393n/afstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
4394n/a expr_ty *expression, struct compiling *c, const node *n)
4395n/a{
4396n/a /* Return -1 on error, else 0. */
4397n/a
4398n/a const char *expr_start;
4399n/a const char *expr_end;
4400n/a expr_ty simple_expression;
4401n/a expr_ty format_spec = NULL; /* Optional format specifier. */
4402n/a int conversion = -1; /* The conversion char. -1 if not specified. */
4403n/a
4404n/a /* 0 if we're not in a string, else the quote char we're trying to
4405n/a match (single or double quote). */
4406n/a char quote_char = 0;
4407n/a
4408n/a /* If we're inside a string, 1=normal, 3=triple-quoted. */
4409n/a int string_type = 0;
4410n/a
4411n/a /* Keep track of nesting level for braces/parens/brackets in
4412n/a expressions. */
4413n/a Py_ssize_t nested_depth = 0;
4414n/a
4415n/a /* Can only nest one level deep. */
4416n/a if (recurse_lvl >= 2) {
4417n/a ast_error(c, n, "f-string: expressions nested too deeply");
4418n/a return -1;
4419n/a }
4420n/a
4421n/a /* The first char must be a left brace, or we wouldn't have gotten
4422n/a here. Skip over it. */
4423n/a assert(**str == '{');
4424n/a *str += 1;
4425n/a
4426n/a expr_start = *str;
4427n/a for (; *str < end; (*str)++) {
4428n/a char ch;
4429n/a
4430n/a /* Loop invariants. */
4431n/a assert(nested_depth >= 0);
4432n/a assert(*str >= expr_start && *str < end);
4433n/a if (quote_char)
4434n/a assert(string_type == 1 || string_type == 3);
4435n/a else
4436n/a assert(string_type == 0);
4437n/a
4438n/a ch = **str;
4439n/a /* Nowhere inside an expression is a backslash allowed. */
4440n/a if (ch == '\\') {
4441n/a /* Error: can't include a backslash character, inside
4442n/a parens or strings or not. */
4443n/a ast_error(c, n, "f-string expression part "
4444n/a "cannot include a backslash");
4445n/a return -1;
4446n/a }
4447n/a if (quote_char) {
4448n/a /* We're inside a string. See if we're at the end. */
4449n/a /* This code needs to implement the same non-error logic
4450n/a as tok_get from tokenizer.c, at the letter_quote
4451n/a label. To actually share that code would be a
4452n/a nightmare. But, it's unlikely to change and is small,
4453n/a so duplicate it here. Note we don't need to catch all
4454n/a of the errors, since they'll be caught when parsing the
4455n/a expression. We just need to match the non-error
4456n/a cases. Thus we can ignore \n in single-quoted strings,
4457n/a for example. Or non-terminated strings. */
4458n/a if (ch == quote_char) {
4459n/a /* Does this match the string_type (single or triple
4460n/a quoted)? */
4461n/a if (string_type == 3) {
4462n/a if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
4463n/a /* We're at the end of a triple quoted string. */
4464n/a *str += 2;
4465n/a string_type = 0;
4466n/a quote_char = 0;
4467n/a continue;
4468n/a }
4469n/a } else {
4470n/a /* We're at the end of a normal string. */
4471n/a quote_char = 0;
4472n/a string_type = 0;
4473n/a continue;
4474n/a }
4475n/a }
4476n/a } else if (ch == '\'' || ch == '"') {
4477n/a /* Is this a triple quoted string? */
4478n/a if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
4479n/a string_type = 3;
4480n/a *str += 2;
4481n/a } else {
4482n/a /* Start of a normal string. */
4483n/a string_type = 1;
4484n/a }
4485n/a /* Start looking for the end of the string. */
4486n/a quote_char = ch;
4487n/a } else if (ch == '[' || ch == '{' || ch == '(') {
4488n/a nested_depth++;
4489n/a } else if (nested_depth != 0 &&
4490n/a (ch == ']' || ch == '}' || ch == ')')) {
4491n/a nested_depth--;
4492n/a } else if (ch == '#') {
4493n/a /* Error: can't include a comment character, inside parens
4494n/a or not. */
4495n/a ast_error(c, n, "f-string expression part cannot include '#'");
4496n/a return -1;
4497n/a } else if (nested_depth == 0 &&
4498n/a (ch == '!' || ch == ':' || ch == '}')) {
4499n/a /* First, test for the special case of "!=". Since '=' is
4500n/a not an allowed conversion character, nothing is lost in
4501n/a this test. */
4502n/a if (ch == '!' && *str+1 < end && *(*str+1) == '=') {
4503n/a /* This isn't a conversion character, just continue. */
4504n/a continue;
4505n/a }
4506n/a /* Normal way out of this loop. */
4507n/a break;
4508n/a } else {
4509n/a /* Just consume this char and loop around. */
4510n/a }
4511n/a }
4512n/a expr_end = *str;
4513n/a /* If we leave this loop in a string or with mismatched parens, we
4514n/a don't care. We'll get a syntax error when compiling the
4515n/a expression. But, we can produce a better error message, so
4516n/a let's just do that.*/
4517n/a if (quote_char) {
4518n/a ast_error(c, n, "f-string: unterminated string");
4519n/a return -1;
4520n/a }
4521n/a if (nested_depth) {
4522n/a ast_error(c, n, "f-string: mismatched '(', '{', or '['");
4523n/a return -1;
4524n/a }
4525n/a
4526n/a if (*str >= end)
4527n/a goto unexpected_end_of_string;
4528n/a
4529n/a /* Compile the expression as soon as possible, so we show errors
4530n/a related to the expression before errors related to the
4531n/a conversion or format_spec. */
4532n/a simple_expression = fstring_compile_expr(expr_start, expr_end, c, n);
4533n/a if (!simple_expression)
4534n/a return -1;
4535n/a
4536n/a /* Check for a conversion char, if present. */
4537n/a if (**str == '!') {
4538n/a *str += 1;
4539n/a if (*str >= end)
4540n/a goto unexpected_end_of_string;
4541n/a
4542n/a conversion = **str;
4543n/a *str += 1;
4544n/a
4545n/a /* Validate the conversion. */
4546n/a if (!(conversion == 's' || conversion == 'r'
4547n/a || conversion == 'a')) {
4548n/a ast_error(c, n, "f-string: invalid conversion character: "
4549n/a "expected 's', 'r', or 'a'");
4550n/a return -1;
4551n/a }
4552n/a }
4553n/a
4554n/a /* Check for the format spec, if present. */
4555n/a if (*str >= end)
4556n/a goto unexpected_end_of_string;
4557n/a if (**str == ':') {
4558n/a *str += 1;
4559n/a if (*str >= end)
4560n/a goto unexpected_end_of_string;
4561n/a
4562n/a /* Parse the format spec. */
4563n/a format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n);
4564n/a if (!format_spec)
4565n/a return -1;
4566n/a }
4567n/a
4568n/a if (*str >= end || **str != '}')
4569n/a goto unexpected_end_of_string;
4570n/a
4571n/a /* We're at a right brace. Consume it. */
4572n/a assert(*str < end);
4573n/a assert(**str == '}');
4574n/a *str += 1;
4575n/a
4576n/a /* And now create the FormattedValue node that represents this
4577n/a entire expression with the conversion and format spec. */
4578n/a *expression = FormattedValue(simple_expression, conversion,
4579n/a format_spec, LINENO(n), n->n_col_offset,
4580n/a c->c_arena);
4581n/a if (!*expression)
4582n/a return -1;
4583n/a
4584n/a return 0;
4585n/a
4586n/aunexpected_end_of_string:
4587n/a ast_error(c, n, "f-string: expecting '}'");
4588n/a return -1;
4589n/a}
4590n/a
4591n/a/* Return -1 on error.
4592n/a
4593n/a Return 0 if we have a literal (possible zero length) and an
4594n/a expression (zero length if at the end of the string.
4595n/a
4596n/a Return 1 if we have a literal, but no expression, and we want the
4597n/a caller to call us again. This is used to deal with doubled
4598n/a braces.
4599n/a
4600n/a When called multiple times on the string 'a{{b{0}c', this function
4601n/a will return:
4602n/a
4603n/a 1. the literal 'a{' with no expression, and a return value
4604n/a of 1. Despite the fact that there's no expression, the return
4605n/a value of 1 means we're not finished yet.
4606n/a
4607n/a 2. the literal 'b' and the expression '0', with a return value of
4608n/a 0. The fact that there's an expression means we're not finished.
4609n/a
4610n/a 3. literal 'c' with no expression and a return value of 0. The
4611n/a combination of the return value of 0 with no expression means
4612n/a we're finished.
4613n/a*/
4614n/astatic int
4615n/afstring_find_literal_and_expr(const char **str, const char *end, int raw,
4616n/a int recurse_lvl, PyObject **literal,
4617n/a expr_ty *expression,
4618n/a struct compiling *c, const node *n)
4619n/a{
4620n/a int result;
4621n/a
4622n/a assert(*literal == NULL && *expression == NULL);
4623n/a
4624n/a /* Get any literal string. */
4625n/a result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n);
4626n/a if (result < 0)
4627n/a goto error;
4628n/a
4629n/a assert(result == 0 || result == 1);
4630n/a
4631n/a if (result == 1)
4632n/a /* We have a literal, but don't look at the expression. */
4633n/a return 1;
4634n/a
4635n/a if (*str >= end || **str == '}')
4636n/a /* We're at the end of the string or the end of a nested
4637n/a f-string: no expression. The top-level error case where we
4638n/a expect to be at the end of the string but we're at a '}' is
4639n/a handled later. */
4640n/a return 0;
4641n/a
4642n/a /* We must now be the start of an expression, on a '{'. */
4643n/a assert(**str == '{');
4644n/a
4645n/a if (fstring_find_expr(str, end, raw, recurse_lvl, expression, c, n) < 0)
4646n/a goto error;
4647n/a
4648n/a return 0;
4649n/a
4650n/aerror:
4651n/a Py_CLEAR(*literal);
4652n/a return -1;
4653n/a}
4654n/a
4655n/a#define EXPRLIST_N_CACHED 64
4656n/a
4657n/atypedef struct {
4658n/a /* Incrementally build an array of expr_ty, so be used in an
4659n/a asdl_seq. Cache some small but reasonably sized number of
4660n/a expr_ty's, and then after that start dynamically allocating,
4661n/a doubling the number allocated each time. Note that the f-string
4662n/a f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
4663n/a Str for the literal 'a'. So you add expr_ty's about twice as
4664n/a fast as you add exressions in an f-string. */
4665n/a
4666n/a Py_ssize_t allocated; /* Number we've allocated. */
4667n/a Py_ssize_t size; /* Number we've used. */
4668n/a expr_ty *p; /* Pointer to the memory we're actually
4669n/a using. Will point to 'data' until we
4670n/a start dynamically allocating. */
4671n/a expr_ty data[EXPRLIST_N_CACHED];
4672n/a} ExprList;
4673n/a
4674n/a#ifdef NDEBUG
4675n/a#define ExprList_check_invariants(l)
4676n/a#else
4677n/astatic void
4678n/aExprList_check_invariants(ExprList *l)
4679n/a{
4680n/a /* Check our invariants. Make sure this object is "live", and
4681n/a hasn't been deallocated. */
4682n/a assert(l->size >= 0);
4683n/a assert(l->p != NULL);
4684n/a if (l->size <= EXPRLIST_N_CACHED)
4685n/a assert(l->data == l->p);
4686n/a}
4687n/a#endif
4688n/a
4689n/astatic void
4690n/aExprList_Init(ExprList *l)
4691n/a{
4692n/a l->allocated = EXPRLIST_N_CACHED;
4693n/a l->size = 0;
4694n/a
4695n/a /* Until we start allocating dynamically, p points to data. */
4696n/a l->p = l->data;
4697n/a
4698n/a ExprList_check_invariants(l);
4699n/a}
4700n/a
4701n/astatic int
4702n/aExprList_Append(ExprList *l, expr_ty exp)
4703n/a{
4704n/a ExprList_check_invariants(l);
4705n/a if (l->size >= l->allocated) {
4706n/a /* We need to alloc (or realloc) the memory. */
4707n/a Py_ssize_t new_size = l->allocated * 2;
4708n/a
4709n/a /* See if we've ever allocated anything dynamically. */
4710n/a if (l->p == l->data) {
4711n/a Py_ssize_t i;
4712n/a /* We're still using the cached data. Switch to
4713n/a alloc-ing. */
4714n/a l->p = PyMem_RawMalloc(sizeof(expr_ty) * new_size);
4715n/a if (!l->p)
4716n/a return -1;
4717n/a /* Copy the cached data into the new buffer. */
4718n/a for (i = 0; i < l->size; i++)
4719n/a l->p[i] = l->data[i];
4720n/a } else {
4721n/a /* Just realloc. */
4722n/a expr_ty *tmp = PyMem_RawRealloc(l->p, sizeof(expr_ty) * new_size);
4723n/a if (!tmp) {
4724n/a PyMem_RawFree(l->p);
4725n/a l->p = NULL;
4726n/a return -1;
4727n/a }
4728n/a l->p = tmp;
4729n/a }
4730n/a
4731n/a l->allocated = new_size;
4732n/a assert(l->allocated == 2 * l->size);
4733n/a }
4734n/a
4735n/a l->p[l->size++] = exp;
4736n/a
4737n/a ExprList_check_invariants(l);
4738n/a return 0;
4739n/a}
4740n/a
4741n/astatic void
4742n/aExprList_Dealloc(ExprList *l)
4743n/a{
4744n/a ExprList_check_invariants(l);
4745n/a
4746n/a /* If there's been an error, or we've never dynamically allocated,
4747n/a do nothing. */
4748n/a if (!l->p || l->p == l->data) {
4749n/a /* Do nothing. */
4750n/a } else {
4751n/a /* We have dynamically allocated. Free the memory. */
4752n/a PyMem_RawFree(l->p);
4753n/a }
4754n/a l->p = NULL;
4755n/a l->size = -1;
4756n/a}
4757n/a
4758n/astatic asdl_seq *
4759n/aExprList_Finish(ExprList *l, PyArena *arena)
4760n/a{
4761n/a asdl_seq *seq;
4762n/a
4763n/a ExprList_check_invariants(l);
4764n/a
4765n/a /* Allocate the asdl_seq and copy the expressions in to it. */
4766n/a seq = _Py_asdl_seq_new(l->size, arena);
4767n/a if (seq) {
4768n/a Py_ssize_t i;
4769n/a for (i = 0; i < l->size; i++)
4770n/a asdl_seq_SET(seq, i, l->p[i]);
4771n/a }
4772n/a ExprList_Dealloc(l);
4773n/a return seq;
4774n/a}
4775n/a
4776n/a/* The FstringParser is designed to add a mix of strings and
4777n/a f-strings, and concat them together as needed. Ultimately, it
4778n/a generates an expr_ty. */
4779n/atypedef struct {
4780n/a PyObject *last_str;
4781n/a ExprList expr_list;
4782n/a int fmode;
4783n/a} FstringParser;
4784n/a
4785n/a#ifdef NDEBUG
4786n/a#define FstringParser_check_invariants(state)
4787n/a#else
4788n/astatic void
4789n/aFstringParser_check_invariants(FstringParser *state)
4790n/a{
4791n/a if (state->last_str)
4792n/a assert(PyUnicode_CheckExact(state->last_str));
4793n/a ExprList_check_invariants(&state->expr_list);
4794n/a}
4795n/a#endif
4796n/a
4797n/astatic void
4798n/aFstringParser_Init(FstringParser *state)
4799n/a{
4800n/a state->last_str = NULL;
4801n/a state->fmode = 0;
4802n/a ExprList_Init(&state->expr_list);
4803n/a FstringParser_check_invariants(state);
4804n/a}
4805n/a
4806n/astatic void
4807n/aFstringParser_Dealloc(FstringParser *state)
4808n/a{
4809n/a FstringParser_check_invariants(state);
4810n/a
4811n/a Py_XDECREF(state->last_str);
4812n/a ExprList_Dealloc(&state->expr_list);
4813n/a}
4814n/a
4815n/a/* Make a Str node, but decref the PyUnicode object being added. */
4816n/astatic expr_ty
4817n/amake_str_node_and_del(PyObject **str, struct compiling *c, const node* n)
4818n/a{
4819n/a PyObject *s = *str;
4820n/a *str = NULL;
4821n/a assert(PyUnicode_CheckExact(s));
4822n/a if (PyArena_AddPyObject(c->c_arena, s) < 0) {
4823n/a Py_DECREF(s);
4824n/a return NULL;
4825n/a }
4826n/a return Str(s, LINENO(n), n->n_col_offset, c->c_arena);
4827n/a}
4828n/a
4829n/a/* Add a non-f-string (that is, a regular literal string). str is
4830n/a decref'd. */
4831n/astatic int
4832n/aFstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
4833n/a{
4834n/a FstringParser_check_invariants(state);
4835n/a
4836n/a assert(PyUnicode_CheckExact(str));
4837n/a
4838n/a if (PyUnicode_GET_LENGTH(str) == 0) {
4839n/a Py_DECREF(str);
4840n/a return 0;
4841n/a }
4842n/a
4843n/a if (!state->last_str) {
4844n/a /* We didn't have a string before, so just remember this one. */
4845n/a state->last_str = str;
4846n/a } else {
4847n/a /* Concatenate this with the previous string. */
4848n/a PyUnicode_AppendAndDel(&state->last_str, str);
4849n/a if (!state->last_str)
4850n/a return -1;
4851n/a }
4852n/a FstringParser_check_invariants(state);
4853n/a return 0;
4854n/a}
4855n/a
4856n/a/* Parse an f-string. The f-string is in *str to end, with no
4857n/a 'f' or quotes. */
4858n/astatic int
4859n/aFstringParser_ConcatFstring(FstringParser *state, const char **str,
4860n/a const char *end, int raw, int recurse_lvl,
4861n/a struct compiling *c, const node *n)
4862n/a{
4863n/a FstringParser_check_invariants(state);
4864n/a state->fmode = 1;
4865n/a
4866n/a /* Parse the f-string. */
4867n/a while (1) {
4868n/a PyObject *literal = NULL;
4869n/a expr_ty expression = NULL;
4870n/a
4871n/a /* If there's a zero length literal in front of the
4872n/a expression, literal will be NULL. If we're at the end of
4873n/a the f-string, expression will be NULL (unless result == 1,
4874n/a see below). */
4875n/a int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
4876n/a &literal, &expression,
4877n/a c, n);
4878n/a if (result < 0)
4879n/a return -1;
4880n/a
4881n/a /* Add the literal, if any. */
4882n/a if (!literal) {
4883n/a /* Do nothing. Just leave last_str alone (and possibly
4884n/a NULL). */
4885n/a } else if (!state->last_str) {
4886n/a state->last_str = literal;
4887n/a literal = NULL;
4888n/a } else {
4889n/a /* We have a literal, concatenate it. */
4890n/a assert(PyUnicode_GET_LENGTH(literal) != 0);
4891n/a if (FstringParser_ConcatAndDel(state, literal) < 0)
4892n/a return -1;
4893n/a literal = NULL;
4894n/a }
4895n/a assert(!state->last_str ||
4896n/a PyUnicode_GET_LENGTH(state->last_str) != 0);
4897n/a
4898n/a /* We've dealt with the literal now. It can't be leaked on further
4899n/a errors. */
4900n/a assert(literal == NULL);
4901n/a
4902n/a /* See if we should just loop around to get the next literal
4903n/a and expression, while ignoring the expression this
4904n/a time. This is used for un-doubling braces, as an
4905n/a optimization. */
4906n/a if (result == 1)
4907n/a continue;
4908n/a
4909n/a if (!expression)
4910n/a /* We're done with this f-string. */
4911n/a break;
4912n/a
4913n/a /* We know we have an expression. Convert any existing string
4914n/a to a Str node. */
4915n/a if (!state->last_str) {
4916n/a /* Do nothing. No previous literal. */
4917n/a } else {
4918n/a /* Convert the existing last_str literal to a Str node. */
4919n/a expr_ty str = make_str_node_and_del(&state->last_str, c, n);
4920n/a if (!str || ExprList_Append(&state->expr_list, str) < 0)
4921n/a return -1;
4922n/a }
4923n/a
4924n/a if (ExprList_Append(&state->expr_list, expression) < 0)
4925n/a return -1;
4926n/a }
4927n/a
4928n/a /* If recurse_lvl is zero, then we must be at the end of the
4929n/a string. Otherwise, we must be at a right brace. */
4930n/a
4931n/a if (recurse_lvl == 0 && *str < end-1) {
4932n/a ast_error(c, n, "f-string: unexpected end of string");
4933n/a return -1;
4934n/a }
4935n/a if (recurse_lvl != 0 && **str != '}') {
4936n/a ast_error(c, n, "f-string: expecting '}'");
4937n/a return -1;
4938n/a }
4939n/a
4940n/a FstringParser_check_invariants(state);
4941n/a return 0;
4942n/a}
4943n/a
4944n/a/* Convert the partial state reflected in last_str and expr_list to an
4945n/a expr_ty. The expr_ty can be a Str, or a JoinedStr. */
4946n/astatic expr_ty
4947n/aFstringParser_Finish(FstringParser *state, struct compiling *c,
4948n/a const node *n)
4949n/a{
4950n/a asdl_seq *seq;
4951n/a
4952n/a FstringParser_check_invariants(state);
4953n/a
4954n/a /* If we're just a constant string with no expressions, return
4955n/a that. */
4956n/a if (!state->fmode) {
4957n/a assert(!state->expr_list.size);
4958n/a if (!state->last_str) {
4959n/a /* Create a zero length string. */
4960n/a state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
4961n/a if (!state->last_str)
4962n/a goto error;
4963n/a }
4964n/a return make_str_node_and_del(&state->last_str, c, n);
4965n/a }
4966n/a
4967n/a /* Create a Str node out of last_str, if needed. It will be the
4968n/a last node in our expression list. */
4969n/a if (state->last_str) {
4970n/a expr_ty str = make_str_node_and_del(&state->last_str, c, n);
4971n/a if (!str || ExprList_Append(&state->expr_list, str) < 0)
4972n/a goto error;
4973n/a }
4974n/a /* This has already been freed. */
4975n/a assert(state->last_str == NULL);
4976n/a
4977n/a seq = ExprList_Finish(&state->expr_list, c->c_arena);
4978n/a if (!seq)
4979n/a goto error;
4980n/a
4981n/a return JoinedStr(seq, LINENO(n), n->n_col_offset, c->c_arena);
4982n/a
4983n/aerror:
4984n/a FstringParser_Dealloc(state);
4985n/a return NULL;
4986n/a}
4987n/a
4988n/a/* Given an f-string (with no 'f' or quotes) that's in *str and ends
4989n/a at end, parse it into an expr_ty. Return NULL on error. Adjust
4990n/a str to point past the parsed portion. */
4991n/astatic expr_ty
4992n/afstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
4993n/a struct compiling *c, const node *n)
4994n/a{
4995n/a FstringParser state;
4996n/a
4997n/a FstringParser_Init(&state);
4998n/a if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl,
4999n/a c, n) < 0) {
5000n/a FstringParser_Dealloc(&state);
5001n/a return NULL;
5002n/a }
5003n/a
5004n/a return FstringParser_Finish(&state, c, n);
5005n/a}
5006n/a
5007n/a/* n is a Python string literal, including the bracketing quote
5008n/a characters, and r, b, u, &/or f prefixes (if any), and embedded
5009n/a escape sequences (if any). parsestr parses it, and sets *result to
5010n/a decoded Python string object. If the string is an f-string, set
5011n/a *fstr and *fstrlen to the unparsed string object. Return 0 if no
5012n/a errors occurred.
5013n/a*/
5014n/astatic int
5015n/aparsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
5016n/a PyObject **result, const char **fstr, Py_ssize_t *fstrlen)
5017n/a{
5018n/a size_t len;
5019n/a const char *s = STR(n);
5020n/a int quote = Py_CHARMASK(*s);
5021n/a int fmode = 0;
5022n/a *bytesmode = 0;
5023n/a *rawmode = 0;
5024n/a *result = NULL;
5025n/a *fstr = NULL;
5026n/a if (Py_ISALPHA(quote)) {
5027n/a while (!*bytesmode || !*rawmode) {
5028n/a if (quote == 'b' || quote == 'B') {
5029n/a quote = *++s;
5030n/a *bytesmode = 1;
5031n/a }
5032n/a else if (quote == 'u' || quote == 'U') {
5033n/a quote = *++s;
5034n/a }
5035n/a else if (quote == 'r' || quote == 'R') {
5036n/a quote = *++s;
5037n/a *rawmode = 1;
5038n/a }
5039n/a else if (quote == 'f' || quote == 'F') {
5040n/a quote = *++s;
5041n/a fmode = 1;
5042n/a }
5043n/a else {
5044n/a break;
5045n/a }
5046n/a }
5047n/a }
5048n/a if (fmode && *bytesmode) {
5049n/a PyErr_BadInternalCall();
5050n/a return -1;
5051n/a }
5052n/a if (quote != '\'' && quote != '\"') {
5053n/a PyErr_BadInternalCall();
5054n/a return -1;
5055n/a }
5056n/a /* Skip the leading quote char. */
5057n/a s++;
5058n/a len = strlen(s);
5059n/a if (len > INT_MAX) {
5060n/a PyErr_SetString(PyExc_OverflowError,
5061n/a "string to parse is too long");
5062n/a return -1;
5063n/a }
5064n/a if (s[--len] != quote) {
5065n/a /* Last quote char must match the first. */
5066n/a PyErr_BadInternalCall();
5067n/a return -1;
5068n/a }
5069n/a if (len >= 4 && s[0] == quote && s[1] == quote) {
5070n/a /* A triple quoted string. We've already skipped one quote at
5071n/a the start and one at the end of the string. Now skip the
5072n/a two at the start. */
5073n/a s += 2;
5074n/a len -= 2;
5075n/a /* And check that the last two match. */
5076n/a if (s[--len] != quote || s[--len] != quote) {
5077n/a PyErr_BadInternalCall();
5078n/a return -1;
5079n/a }
5080n/a }
5081n/a
5082n/a if (fmode) {
5083n/a /* Just return the bytes. The caller will parse the resulting
5084n/a string. */
5085n/a *fstr = s;
5086n/a *fstrlen = len;
5087n/a return 0;
5088n/a }
5089n/a
5090n/a /* Not an f-string. */
5091n/a /* Avoid invoking escape decoding routines if possible. */
5092n/a *rawmode = *rawmode || strchr(s, '\\') == NULL;
5093n/a if (*bytesmode) {
5094n/a /* Disallow non-ASCII characters. */
5095n/a const char *ch;
5096n/a for (ch = s; *ch; ch++) {
5097n/a if (Py_CHARMASK(*ch) >= 0x80) {
5098n/a ast_error(c, n, "bytes can only contain ASCII "
5099n/a "literal characters.");
5100n/a return -1;
5101n/a }
5102n/a }
5103n/a if (*rawmode)
5104n/a *result = PyBytes_FromStringAndSize(s, len);
5105n/a else
5106n/a *result = decode_bytes_with_escapes(c, n, s, len);
5107n/a } else {
5108n/a if (*rawmode)
5109n/a *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
5110n/a else
5111n/a *result = decode_unicode_with_escapes(c, n, s, len);
5112n/a }
5113n/a return *result == NULL ? -1 : 0;
5114n/a}
5115n/a
5116n/a/* Accepts a STRING+ atom, and produces an expr_ty node. Run through
5117n/a each STRING atom, and process it as needed. For bytes, just
5118n/a concatenate them together, and the result will be a Bytes node. For
5119n/a normal strings and f-strings, concatenate them together. The result
5120n/a will be a Str node if there were no f-strings; a FormattedValue
5121n/a node if there's just an f-string (with no leading or trailing
5122n/a literals), or a JoinedStr node if there are multiple f-strings or
5123n/a any literals involved. */
5124n/astatic expr_ty
5125n/aparsestrplus(struct compiling *c, const node *n)
5126n/a{
5127n/a int bytesmode = 0;
5128n/a PyObject *bytes_str = NULL;
5129n/a int i;
5130n/a
5131n/a FstringParser state;
5132n/a FstringParser_Init(&state);
5133n/a
5134n/a for (i = 0; i < NCH(n); i++) {
5135n/a int this_bytesmode;
5136n/a int this_rawmode;
5137n/a PyObject *s;
5138n/a const char *fstr;
5139n/a Py_ssize_t fstrlen = -1; /* Silence a compiler warning. */
5140n/a
5141n/a REQ(CHILD(n, i), STRING);
5142n/a if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s,
5143n/a &fstr, &fstrlen) != 0)
5144n/a goto error;
5145n/a
5146n/a /* Check that we're not mixing bytes with unicode. */
5147n/a if (i != 0 && bytesmode != this_bytesmode) {
5148n/a ast_error(c, n, "cannot mix bytes and nonbytes literals");
5149n/a /* s is NULL if the current string part is an f-string. */
5150n/a Py_XDECREF(s);
5151n/a goto error;
5152n/a }
5153n/a bytesmode = this_bytesmode;
5154n/a
5155n/a if (fstr != NULL) {
5156n/a int result;
5157n/a assert(s == NULL && !bytesmode);
5158n/a /* This is an f-string. Parse and concatenate it. */
5159n/a result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen,
5160n/a this_rawmode, 0, c, n);
5161n/a if (result < 0)
5162n/a goto error;
5163n/a } else {
5164n/a /* A string or byte string. */
5165n/a assert(s != NULL && fstr == NULL);
5166n/a
5167n/a assert(bytesmode ? PyBytes_CheckExact(s) :
5168n/a PyUnicode_CheckExact(s));
5169n/a
5170n/a if (bytesmode) {
5171n/a /* For bytes, concat as we go. */
5172n/a if (i == 0) {
5173n/a /* First time, just remember this value. */
5174n/a bytes_str = s;
5175n/a } else {
5176n/a PyBytes_ConcatAndDel(&bytes_str, s);
5177n/a if (!bytes_str)
5178n/a goto error;
5179n/a }
5180n/a } else {
5181n/a /* This is a regular string. Concatenate it. */
5182n/a if (FstringParser_ConcatAndDel(&state, s) < 0)
5183n/a goto error;
5184n/a }
5185n/a }
5186n/a }
5187n/a if (bytesmode) {
5188n/a /* Just return the bytes object and we're done. */
5189n/a if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)
5190n/a goto error;
5191n/a return Bytes(bytes_str, LINENO(n), n->n_col_offset, c->c_arena);
5192n/a }
5193n/a
5194n/a /* We're not a bytes string, bytes_str should never have been set. */
5195n/a assert(bytes_str == NULL);
5196n/a
5197n/a return FstringParser_Finish(&state, c, n);
5198n/a
5199n/aerror:
5200n/a Py_XDECREF(bytes_str);
5201n/a FstringParser_Dealloc(&state);
5202n/a return NULL;
5203n/a}