ยปCore Development>Code coverage>Python/peephole.c

Python code coverage for Python/peephole.c

#countcontent
1n/a/* Peephole optimizations for bytecode compiler. */
2n/a
3n/a#include "Python.h"
4n/a
5n/a#include "Python-ast.h"
6n/a#include "node.h"
7n/a#include "ast.h"
8n/a#include "code.h"
9n/a#include "symtable.h"
10n/a#include "opcode.h"
11n/a#include "wordcode_helpers.h"
12n/a
13n/a#define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
14n/a#define CONDITIONAL_JUMP(op) (op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
15n/a || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
16n/a#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP \
17n/a || op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
18n/a || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
19n/a#define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP)
20n/a#define GETJUMPTGT(arr, i) (get_arg(arr, i) / sizeof(_Py_CODEUNIT) + \
21n/a (ABSOLUTE_JUMP(_Py_OPCODE(arr[i])) ? 0 : i+1))
22n/a#define ISBASICBLOCK(blocks, start, end) \
23n/a (blocks[start]==blocks[end])
24n/a
25n/a
26n/a#define CONST_STACK_CREATE() { \
27n/a const_stack_size = 256; \
28n/a const_stack = PyMem_New(PyObject *, const_stack_size); \
29n/a if (!const_stack) { \
30n/a PyErr_NoMemory(); \
31n/a goto exitError; \
32n/a } \
33n/a }
34n/a
35n/a#define CONST_STACK_DELETE() do { \
36n/a if (const_stack) \
37n/a PyMem_Free(const_stack); \
38n/a } while(0)
39n/a
40n/a#define CONST_STACK_LEN() ((unsigned)(const_stack_top + 1))
41n/a
42n/a#define CONST_STACK_PUSH_OP(i) do { \
43n/a PyObject *_x; \
44n/a assert(_Py_OPCODE(codestr[i]) == LOAD_CONST); \
45n/a assert(PyList_GET_SIZE(consts) > (Py_ssize_t)get_arg(codestr, i)); \
46n/a _x = PyList_GET_ITEM(consts, get_arg(codestr, i)); \
47n/a if (++const_stack_top >= const_stack_size) { \
48n/a const_stack_size *= 2; \
49n/a PyMem_Resize(const_stack, PyObject *, const_stack_size); \
50n/a if (!const_stack) { \
51n/a PyErr_NoMemory(); \
52n/a goto exitError; \
53n/a } \
54n/a } \
55n/a const_stack[const_stack_top] = _x; \
56n/a in_consts = 1; \
57n/a } while(0)
58n/a
59n/a#define CONST_STACK_RESET() do { \
60n/a const_stack_top = -1; \
61n/a } while(0)
62n/a
63n/a#define CONST_STACK_LASTN(i) \
64n/a &const_stack[CONST_STACK_LEN() - i]
65n/a
66n/a#define CONST_STACK_POP(i) do { \
67n/a assert(CONST_STACK_LEN() >= i); \
68n/a const_stack_top -= i; \
69n/a } while(0)
70n/a
71n/a/* Scans back N consecutive LOAD_CONST instructions, skipping NOPs,
72n/a returns index of the Nth last's LOAD_CONST's EXTENDED_ARG prefix.
73n/a Callers are responsible to check CONST_STACK_LEN beforehand.
74n/a*/
75n/astatic Py_ssize_t
76n/alastn_const_start(const _Py_CODEUNIT *codestr, Py_ssize_t i, Py_ssize_t n)
77n/a{
78n/a assert(n > 0);
79n/a for (;;) {
80n/a i--;
81n/a assert(i >= 0);
82n/a if (_Py_OPCODE(codestr[i]) == LOAD_CONST) {
83n/a if (!--n) {
84n/a while (i > 0 && _Py_OPCODE(codestr[i-1]) == EXTENDED_ARG) {
85n/a i--;
86n/a }
87n/a return i;
88n/a }
89n/a }
90n/a else {
91n/a assert(_Py_OPCODE(codestr[i]) == NOP ||
92n/a _Py_OPCODE(codestr[i]) == EXTENDED_ARG);
93n/a }
94n/a }
95n/a}
96n/a
97n/a/* Scans through EXTENDED ARGs, seeking the index of the effective opcode */
98n/astatic Py_ssize_t
99n/afind_op(const _Py_CODEUNIT *codestr, Py_ssize_t i)
100n/a{
101n/a while (_Py_OPCODE(codestr[i]) == EXTENDED_ARG) {
102n/a i++;
103n/a }
104n/a return i;
105n/a}
106n/a
107n/a/* Given the index of the effective opcode,
108n/a scan back to construct the oparg with EXTENDED_ARG */
109n/astatic unsigned int
110n/aget_arg(const _Py_CODEUNIT *codestr, Py_ssize_t i)
111n/a{
112n/a _Py_CODEUNIT word;
113n/a unsigned int oparg = _Py_OPARG(codestr[i]);
114n/a if (i >= 1 && _Py_OPCODE(word = codestr[i-1]) == EXTENDED_ARG) {
115n/a oparg |= _Py_OPARG(word) << 8;
116n/a if (i >= 2 && _Py_OPCODE(word = codestr[i-2]) == EXTENDED_ARG) {
117n/a oparg |= _Py_OPARG(word) << 16;
118n/a if (i >= 3 && _Py_OPCODE(word = codestr[i-3]) == EXTENDED_ARG) {
119n/a oparg |= _Py_OPARG(word) << 24;
120n/a }
121n/a }
122n/a }
123n/a return oparg;
124n/a}
125n/a
126n/a/* Fill the region with NOPs. */
127n/astatic void
128n/afill_nops(_Py_CODEUNIT *codestr, Py_ssize_t start, Py_ssize_t end)
129n/a{
130n/a memset(codestr + start, NOP, (end - start) * sizeof(_Py_CODEUNIT));
131n/a}
132n/a
133n/a/* Given the index of the effective opcode,
134n/a attempt to replace the argument, taking into account EXTENDED_ARG.
135n/a Returns -1 on failure, or the new op index on success */
136n/astatic Py_ssize_t
137n/aset_arg(_Py_CODEUNIT *codestr, Py_ssize_t i, unsigned int oparg)
138n/a{
139n/a unsigned int curarg = get_arg(codestr, i);
140n/a int curilen, newilen;
141n/a if (curarg == oparg)
142n/a return i;
143n/a curilen = instrsize(curarg);
144n/a newilen = instrsize(oparg);
145n/a if (curilen < newilen) {
146n/a return -1;
147n/a }
148n/a
149n/a write_op_arg(codestr + i + 1 - curilen, _Py_OPCODE(codestr[i]), oparg, newilen);
150n/a fill_nops(codestr, i + 1 - curilen + newilen, i + 1);
151n/a return i-curilen+newilen;
152n/a}
153n/a
154n/a/* Attempt to write op/arg at end of specified region of memory.
155n/a Preceding memory in the region is overwritten with NOPs.
156n/a Returns -1 on failure, op index on success */
157n/astatic Py_ssize_t
158n/acopy_op_arg(_Py_CODEUNIT *codestr, Py_ssize_t i, unsigned char op,
159n/a unsigned int oparg, Py_ssize_t maxi)
160n/a{
161n/a int ilen = instrsize(oparg);
162n/a if (i + ilen > maxi) {
163n/a return -1;
164n/a }
165n/a write_op_arg(codestr + maxi - ilen, op, oparg, ilen);
166n/a fill_nops(codestr, i, maxi - ilen);
167n/a return maxi - 1;
168n/a}
169n/a
170n/a/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n
171n/a with LOAD_CONST (c1, c2, ... cn).
172n/a The consts table must still be in list form so that the
173n/a new constant (c1, c2, ... cn) can be appended.
174n/a Called with codestr pointing to the first LOAD_CONST.
175n/a Bails out with no change if one or more of the LOAD_CONSTs is missing.
176n/a Also works for BUILD_LIST and BUILT_SET when followed by an "in" or "not in"
177n/a test; for BUILD_SET it assembles a frozenset rather than a tuple.
178n/a*/
179n/astatic Py_ssize_t
180n/afold_tuple_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
181n/a Py_ssize_t opcode_end, unsigned char opcode,
182n/a PyObject *consts, PyObject **objs, int n)
183n/a{
184n/a PyObject *newconst, *constant;
185n/a Py_ssize_t i, len_consts;
186n/a
187n/a /* Pre-conditions */
188n/a assert(PyList_CheckExact(consts));
189n/a
190n/a /* Buildup new tuple of constants */
191n/a newconst = PyTuple_New(n);
192n/a if (newconst == NULL) {
193n/a return -1;
194n/a }
195n/a for (i=0 ; i<n ; i++) {
196n/a constant = objs[i];
197n/a Py_INCREF(constant);
198n/a PyTuple_SET_ITEM(newconst, i, constant);
199n/a }
200n/a
201n/a /* If it's a BUILD_SET, use the PyTuple we just built to create a
202n/a PyFrozenSet, and use that as the constant instead: */
203n/a if (opcode == BUILD_SET) {
204n/a Py_SETREF(newconst, PyFrozenSet_New(newconst));
205n/a if (newconst == NULL) {
206n/a return -1;
207n/a }
208n/a }
209n/a
210n/a /* Append folded constant onto consts */
211n/a len_consts = PyList_GET_SIZE(consts);
212n/a if (PyList_Append(consts, newconst)) {
213n/a Py_DECREF(newconst);
214n/a return -1;
215n/a }
216n/a Py_DECREF(newconst);
217n/a
218n/a return copy_op_arg(codestr, c_start, LOAD_CONST, len_consts, opcode_end);
219n/a}
220n/a
221n/a/* Replace LOAD_CONST c1, LOAD_CONST c2, BINOP
222n/a with LOAD_CONST binop(c1,c2)
223n/a The consts table must still be in list form so that the
224n/a new constant can be appended.
225n/a Called with codestr pointing to the BINOP.
226n/a Abandons the transformation if the folding fails (i.e. 1+'a').
227n/a If the new constant is a sequence, only folds when the size
228n/a is below a threshold value. That keeps pyc files from
229n/a becoming large in the presence of code like: (None,)*1000.
230n/a*/
231n/astatic Py_ssize_t
232n/afold_binops_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
233n/a Py_ssize_t opcode_end, unsigned char opcode,
234n/a PyObject *consts, PyObject **objs)
235n/a{
236n/a PyObject *newconst, *v, *w;
237n/a Py_ssize_t len_consts, size;
238n/a
239n/a /* Pre-conditions */
240n/a assert(PyList_CheckExact(consts));
241n/a len_consts = PyList_GET_SIZE(consts);
242n/a
243n/a /* Create new constant */
244n/a v = objs[0];
245n/a w = objs[1];
246n/a switch (opcode) {
247n/a case BINARY_POWER:
248n/a newconst = PyNumber_Power(v, w, Py_None);
249n/a break;
250n/a case BINARY_MULTIPLY:
251n/a newconst = PyNumber_Multiply(v, w);
252n/a break;
253n/a case BINARY_TRUE_DIVIDE:
254n/a newconst = PyNumber_TrueDivide(v, w);
255n/a break;
256n/a case BINARY_FLOOR_DIVIDE:
257n/a newconst = PyNumber_FloorDivide(v, w);
258n/a break;
259n/a case BINARY_MODULO:
260n/a newconst = PyNumber_Remainder(v, w);
261n/a break;
262n/a case BINARY_ADD:
263n/a newconst = PyNumber_Add(v, w);
264n/a break;
265n/a case BINARY_SUBTRACT:
266n/a newconst = PyNumber_Subtract(v, w);
267n/a break;
268n/a case BINARY_SUBSCR:
269n/a newconst = PyObject_GetItem(v, w);
270n/a break;
271n/a case BINARY_LSHIFT:
272n/a newconst = PyNumber_Lshift(v, w);
273n/a break;
274n/a case BINARY_RSHIFT:
275n/a newconst = PyNumber_Rshift(v, w);
276n/a break;
277n/a case BINARY_AND:
278n/a newconst = PyNumber_And(v, w);
279n/a break;
280n/a case BINARY_XOR:
281n/a newconst = PyNumber_Xor(v, w);
282n/a break;
283n/a case BINARY_OR:
284n/a newconst = PyNumber_Or(v, w);
285n/a break;
286n/a default:
287n/a /* Called with an unknown opcode */
288n/a PyErr_Format(PyExc_SystemError,
289n/a "unexpected binary operation %d on a constant",
290n/a opcode);
291n/a return -1;
292n/a }
293n/a if (newconst == NULL) {
294n/a if(!PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
295n/a PyErr_Clear();
296n/a }
297n/a return -1;
298n/a }
299n/a size = PyObject_Size(newconst);
300n/a if (size == -1) {
301n/a if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
302n/a return -1;
303n/a }
304n/a PyErr_Clear();
305n/a } else if (size > 20) {
306n/a Py_DECREF(newconst);
307n/a return -1;
308n/a }
309n/a
310n/a /* Append folded constant into consts table */
311n/a if (PyList_Append(consts, newconst)) {
312n/a Py_DECREF(newconst);
313n/a return -1;
314n/a }
315n/a Py_DECREF(newconst);
316n/a
317n/a return copy_op_arg(codestr, c_start, LOAD_CONST, len_consts, opcode_end);
318n/a}
319n/a
320n/astatic Py_ssize_t
321n/afold_unaryops_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,
322n/a Py_ssize_t opcode_end, unsigned char opcode,
323n/a PyObject *consts, PyObject *v)
324n/a{
325n/a PyObject *newconst;
326n/a Py_ssize_t len_consts;
327n/a
328n/a /* Pre-conditions */
329n/a assert(PyList_CheckExact(consts));
330n/a len_consts = PyList_GET_SIZE(consts);
331n/a
332n/a /* Create new constant */
333n/a switch (opcode) {
334n/a case UNARY_NEGATIVE:
335n/a newconst = PyNumber_Negative(v);
336n/a break;
337n/a case UNARY_INVERT:
338n/a newconst = PyNumber_Invert(v);
339n/a break;
340n/a case UNARY_POSITIVE:
341n/a newconst = PyNumber_Positive(v);
342n/a break;
343n/a default:
344n/a /* Called with an unknown opcode */
345n/a PyErr_Format(PyExc_SystemError,
346n/a "unexpected unary operation %d on a constant",
347n/a opcode);
348n/a return -1;
349n/a }
350n/a if (newconst == NULL) {
351n/a if(!PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
352n/a PyErr_Clear();
353n/a }
354n/a return -1;
355n/a }
356n/a
357n/a /* Append folded constant into consts table */
358n/a if (PyList_Append(consts, newconst)) {
359n/a Py_DECREF(newconst);
360n/a PyErr_Clear();
361n/a return -1;
362n/a }
363n/a Py_DECREF(newconst);
364n/a
365n/a return copy_op_arg(codestr, c_start, LOAD_CONST, len_consts, opcode_end);
366n/a}
367n/a
368n/astatic unsigned int *
369n/amarkblocks(_Py_CODEUNIT *code, Py_ssize_t len)
370n/a{
371n/a unsigned int *blocks = PyMem_New(unsigned int, len);
372n/a int i, j, opcode, blockcnt = 0;
373n/a
374n/a if (blocks == NULL) {
375n/a PyErr_NoMemory();
376n/a return NULL;
377n/a }
378n/a memset(blocks, 0, len*sizeof(int));
379n/a
380n/a /* Mark labels in the first pass */
381n/a for (i = 0; i < len; i++) {
382n/a opcode = _Py_OPCODE(code[i]);
383n/a switch (opcode) {
384n/a case FOR_ITER:
385n/a case JUMP_FORWARD:
386n/a case JUMP_IF_FALSE_OR_POP:
387n/a case JUMP_IF_TRUE_OR_POP:
388n/a case POP_JUMP_IF_FALSE:
389n/a case POP_JUMP_IF_TRUE:
390n/a case JUMP_ABSOLUTE:
391n/a case CONTINUE_LOOP:
392n/a case SETUP_LOOP:
393n/a case SETUP_EXCEPT:
394n/a case SETUP_FINALLY:
395n/a case SETUP_WITH:
396n/a case SETUP_ASYNC_WITH:
397n/a j = GETJUMPTGT(code, i);
398n/a assert(j < len);
399n/a blocks[j] = 1;
400n/a break;
401n/a }
402n/a }
403n/a /* Build block numbers in the second pass */
404n/a for (i = 0; i < len; i++) {
405n/a blockcnt += blocks[i]; /* increment blockcnt over labels */
406n/a blocks[i] = blockcnt;
407n/a }
408n/a return blocks;
409n/a}
410n/a
411n/a/* Perform basic peephole optimizations to components of a code object.
412n/a The consts object should still be in list form to allow new constants
413n/a to be appended.
414n/a
415n/a To keep the optimizer simple, it bails when the lineno table has complex
416n/a encoding for gaps >= 255.
417n/a
418n/a Optimizations are restricted to simple transformations occurring within a
419n/a single basic block. All transformations keep the code size the same or
420n/a smaller. For those that reduce size, the gaps are initially filled with
421n/a NOPs. Later those NOPs are removed and the jump addresses retargeted in
422n/a a single pass. */
423n/a
424n/aPyObject *
425n/aPyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
426n/a PyObject *lnotab_obj)
427n/a{
428n/a Py_ssize_t h, i, nexti, op_start, codelen, tgt;
429n/a unsigned int j, nops;
430n/a unsigned char opcode, nextop;
431n/a _Py_CODEUNIT *codestr = NULL;
432n/a unsigned char *lnotab;
433n/a unsigned int cum_orig_offset, last_offset;
434n/a Py_ssize_t tabsiz;
435n/a PyObject **const_stack = NULL;
436n/a Py_ssize_t const_stack_top = -1;
437n/a Py_ssize_t const_stack_size = 0;
438n/a int in_consts = 0; /* whether we are in a LOAD_CONST sequence */
439n/a unsigned int *blocks = NULL;
440n/a
441n/a /* Bail out if an exception is set */
442n/a if (PyErr_Occurred())
443n/a goto exitError;
444n/a
445n/a /* Bypass optimization when the lnotab table is too complex */
446n/a assert(PyBytes_Check(lnotab_obj));
447n/a lnotab = (unsigned char*)PyBytes_AS_STRING(lnotab_obj);
448n/a tabsiz = PyBytes_GET_SIZE(lnotab_obj);
449n/a assert(tabsiz == 0 || Py_REFCNT(lnotab_obj) == 1);
450n/a if (memchr(lnotab, 255, tabsiz) != NULL) {
451n/a /* 255 value are used for multibyte bytecode instructions */
452n/a goto exitUnchanged;
453n/a }
454n/a /* Note: -128 and 127 special values for line number delta are ok,
455n/a the peephole optimizer doesn't modify line numbers. */
456n/a
457n/a assert(PyBytes_Check(code));
458n/a codelen = PyBytes_GET_SIZE(code);
459n/a assert(codelen % sizeof(_Py_CODEUNIT) == 0);
460n/a
461n/a /* Make a modifiable copy of the code string */
462n/a codestr = (_Py_CODEUNIT *)PyMem_Malloc(codelen);
463n/a if (codestr == NULL) {
464n/a PyErr_NoMemory();
465n/a goto exitError;
466n/a }
467n/a memcpy(codestr, PyBytes_AS_STRING(code), codelen);
468n/a codelen /= sizeof(_Py_CODEUNIT);
469n/a
470n/a blocks = markblocks(codestr, codelen);
471n/a if (blocks == NULL)
472n/a goto exitError;
473n/a assert(PyList_Check(consts));
474n/a
475n/a CONST_STACK_CREATE();
476n/a
477n/a for (i=find_op(codestr, 0) ; i<codelen ; i=nexti) {
478n/a opcode = _Py_OPCODE(codestr[i]);
479n/a op_start = i;
480n/a while (op_start >= 1 && _Py_OPCODE(codestr[op_start-1]) == EXTENDED_ARG) {
481n/a op_start--;
482n/a }
483n/a
484n/a nexti = i + 1;
485n/a while (nexti < codelen && _Py_OPCODE(codestr[nexti]) == EXTENDED_ARG)
486n/a nexti++;
487n/a nextop = nexti < codelen ? _Py_OPCODE(codestr[nexti]) : 0;
488n/a
489n/a if (!in_consts) {
490n/a CONST_STACK_RESET();
491n/a }
492n/a in_consts = 0;
493n/a
494n/a switch (opcode) {
495n/a /* Replace UNARY_NOT POP_JUMP_IF_FALSE
496n/a with POP_JUMP_IF_TRUE */
497n/a case UNARY_NOT:
498n/a if (nextop != POP_JUMP_IF_FALSE
499n/a || !ISBASICBLOCK(blocks, op_start, i + 1))
500n/a break;
501n/a fill_nops(codestr, op_start, i + 1);
502n/a codestr[nexti] = PACKOPARG(POP_JUMP_IF_TRUE, _Py_OPARG(codestr[nexti]));
503n/a break;
504n/a
505n/a /* not a is b --> a is not b
506n/a not a in b --> a not in b
507n/a not a is not b --> a is b
508n/a not a not in b --> a in b
509n/a */
510n/a case COMPARE_OP:
511n/a j = get_arg(codestr, i);
512n/a if (j < 6 || j > 9 ||
513n/a nextop != UNARY_NOT ||
514n/a !ISBASICBLOCK(blocks, op_start, i + 1))
515n/a break;
516n/a codestr[i] = PACKOPARG(opcode, j^1);
517n/a fill_nops(codestr, i + 1, nexti + 1);
518n/a break;
519n/a
520n/a /* Skip over LOAD_CONST trueconst
521n/a POP_JUMP_IF_FALSE xx. This improves
522n/a "while 1" performance. */
523n/a case LOAD_CONST:
524n/a CONST_STACK_PUSH_OP(i);
525n/a if (nextop != POP_JUMP_IF_FALSE ||
526n/a !ISBASICBLOCK(blocks, op_start, i + 1) ||
527n/a !PyObject_IsTrue(PyList_GET_ITEM(consts, get_arg(codestr, i))))
528n/a break;
529n/a fill_nops(codestr, op_start, nexti + 1);
530n/a CONST_STACK_POP(1);
531n/a break;
532n/a
533n/a /* Try to fold tuples of constants (includes a case for lists
534n/a and sets which are only used for "in" and "not in" tests).
535n/a Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
536n/a Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2.
537n/a Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */
538n/a case BUILD_TUPLE:
539n/a case BUILD_LIST:
540n/a case BUILD_SET:
541n/a j = get_arg(codestr, i);
542n/a if (j > 0 && CONST_STACK_LEN() >= j) {
543n/a h = lastn_const_start(codestr, op_start, j);
544n/a if ((opcode == BUILD_TUPLE &&
545n/a ISBASICBLOCK(blocks, h, op_start)) ||
546n/a ((opcode == BUILD_LIST || opcode == BUILD_SET) &&
547n/a ((nextop==COMPARE_OP &&
548n/a (_Py_OPARG(codestr[nexti]) == PyCmp_IN ||
549n/a _Py_OPARG(codestr[nexti]) == PyCmp_NOT_IN)) ||
550n/a nextop == GET_ITER) && ISBASICBLOCK(blocks, h, i + 1))) {
551n/a h = fold_tuple_on_constants(codestr, h, i + 1, opcode,
552n/a consts, CONST_STACK_LASTN(j), j);
553n/a if (h >= 0) {
554n/a CONST_STACK_POP(j);
555n/a CONST_STACK_PUSH_OP(h);
556n/a }
557n/a break;
558n/a }
559n/a }
560n/a if (nextop != UNPACK_SEQUENCE ||
561n/a !ISBASICBLOCK(blocks, op_start, i + 1) ||
562n/a j != get_arg(codestr, nexti) ||
563n/a opcode == BUILD_SET)
564n/a break;
565n/a if (j < 2) {
566n/a fill_nops(codestr, op_start, nexti + 1);
567n/a } else if (j == 2) {
568n/a codestr[op_start] = PACKOPARG(ROT_TWO, 0);
569n/a fill_nops(codestr, op_start + 1, nexti + 1);
570n/a CONST_STACK_RESET();
571n/a } else if (j == 3) {
572n/a codestr[op_start] = PACKOPARG(ROT_THREE, 0);
573n/a codestr[op_start + 1] = PACKOPARG(ROT_TWO, 0);
574n/a fill_nops(codestr, op_start + 2, nexti + 1);
575n/a CONST_STACK_RESET();
576n/a }
577n/a break;
578n/a
579n/a /* Fold binary ops on constants.
580n/a LOAD_CONST c1 LOAD_CONST c2 BINOP --> LOAD_CONST binop(c1,c2) */
581n/a case BINARY_POWER:
582n/a case BINARY_MULTIPLY:
583n/a case BINARY_TRUE_DIVIDE:
584n/a case BINARY_FLOOR_DIVIDE:
585n/a case BINARY_MODULO:
586n/a case BINARY_ADD:
587n/a case BINARY_SUBTRACT:
588n/a case BINARY_SUBSCR:
589n/a case BINARY_LSHIFT:
590n/a case BINARY_RSHIFT:
591n/a case BINARY_AND:
592n/a case BINARY_XOR:
593n/a case BINARY_OR:
594n/a if (CONST_STACK_LEN() < 2)
595n/a break;
596n/a h = lastn_const_start(codestr, op_start, 2);
597n/a if (ISBASICBLOCK(blocks, h, op_start)) {
598n/a h = fold_binops_on_constants(codestr, h, i + 1, opcode,
599n/a consts, CONST_STACK_LASTN(2));
600n/a if (h >= 0) {
601n/a CONST_STACK_POP(2);
602n/a CONST_STACK_PUSH_OP(h);
603n/a }
604n/a }
605n/a break;
606n/a
607n/a /* Fold unary ops on constants.
608n/a LOAD_CONST c1 UNARY_OP --> LOAD_CONST unary_op(c) */
609n/a case UNARY_NEGATIVE:
610n/a case UNARY_INVERT:
611n/a case UNARY_POSITIVE:
612n/a if (CONST_STACK_LEN() < 1)
613n/a break;
614n/a h = lastn_const_start(codestr, op_start, 1);
615n/a if (ISBASICBLOCK(blocks, h, op_start)) {
616n/a h = fold_unaryops_on_constants(codestr, h, i + 1, opcode,
617n/a consts, *CONST_STACK_LASTN(1));
618n/a if (h >= 0) {
619n/a CONST_STACK_POP(1);
620n/a CONST_STACK_PUSH_OP(h);
621n/a }
622n/a }
623n/a break;
624n/a
625n/a /* Simplify conditional jump to conditional jump where the
626n/a result of the first test implies the success of a similar
627n/a test or the failure of the opposite test.
628n/a Arises in code like:
629n/a "if a and b:"
630n/a "if a or b:"
631n/a "a and b or c"
632n/a "(a and b) and c"
633n/a x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_FALSE_OR_POP z
634n/a --> x:JUMP_IF_FALSE_OR_POP z
635n/a x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_TRUE_OR_POP z
636n/a --> x:POP_JUMP_IF_FALSE y+1
637n/a where y+1 is the instruction following the second test.
638n/a */
639n/a case JUMP_IF_FALSE_OR_POP:
640n/a case JUMP_IF_TRUE_OR_POP:
641n/a h = get_arg(codestr, i) / sizeof(_Py_CODEUNIT);
642n/a tgt = find_op(codestr, h);
643n/a
644n/a j = _Py_OPCODE(codestr[tgt]);
645n/a if (CONDITIONAL_JUMP(j)) {
646n/a /* NOTE: all possible jumps here are absolute. */
647n/a if (JUMPS_ON_TRUE(j) == JUMPS_ON_TRUE(opcode)) {
648n/a /* The second jump will be taken iff the first is.
649n/a The current opcode inherits its target's
650n/a stack effect */
651n/a h = set_arg(codestr, i, get_arg(codestr, tgt));
652n/a } else {
653n/a /* The second jump is not taken if the first is (so
654n/a jump past it), and all conditional jumps pop their
655n/a argument when they're not taken (so change the
656n/a first jump to pop its argument when it's taken). */
657n/a h = set_arg(codestr, i, (tgt + 1) * sizeof(_Py_CODEUNIT));
658n/a j = opcode == JUMP_IF_TRUE_OR_POP ?
659n/a POP_JUMP_IF_TRUE : POP_JUMP_IF_FALSE;
660n/a }
661n/a
662n/a if (h >= 0) {
663n/a nexti = h;
664n/a codestr[nexti] = PACKOPARG(j, _Py_OPARG(codestr[nexti]));
665n/a break;
666n/a }
667n/a }
668n/a /* Intentional fallthrough */
669n/a
670n/a /* Replace jumps to unconditional jumps */
671n/a case POP_JUMP_IF_FALSE:
672n/a case POP_JUMP_IF_TRUE:
673n/a case FOR_ITER:
674n/a case JUMP_FORWARD:
675n/a case JUMP_ABSOLUTE:
676n/a case CONTINUE_LOOP:
677n/a case SETUP_LOOP:
678n/a case SETUP_EXCEPT:
679n/a case SETUP_FINALLY:
680n/a case SETUP_WITH:
681n/a case SETUP_ASYNC_WITH:
682n/a h = GETJUMPTGT(codestr, i);
683n/a tgt = find_op(codestr, h);
684n/a /* Replace JUMP_* to a RETURN into just a RETURN */
685n/a if (UNCONDITIONAL_JUMP(opcode) &&
686n/a _Py_OPCODE(codestr[tgt]) == RETURN_VALUE) {
687n/a codestr[op_start] = PACKOPARG(RETURN_VALUE, 0);
688n/a fill_nops(codestr, op_start + 1, i + 1);
689n/a } else if (UNCONDITIONAL_JUMP(_Py_OPCODE(codestr[tgt]))) {
690n/a j = GETJUMPTGT(codestr, tgt);
691n/a if (opcode == JUMP_FORWARD) { /* JMP_ABS can go backwards */
692n/a opcode = JUMP_ABSOLUTE;
693n/a } else if (!ABSOLUTE_JUMP(opcode)) {
694n/a if ((Py_ssize_t)j < i + 1) {
695n/a break; /* No backward relative jumps */
696n/a }
697n/a j -= i + 1; /* Calc relative jump addr */
698n/a }
699n/a j *= sizeof(_Py_CODEUNIT);
700n/a copy_op_arg(codestr, op_start, opcode, j, i + 1);
701n/a }
702n/a break;
703n/a
704n/a /* Remove unreachable ops after RETURN */
705n/a case RETURN_VALUE:
706n/a h = i + 1;
707n/a while (h < codelen && ISBASICBLOCK(blocks, i, h)) {
708n/a h++;
709n/a }
710n/a if (h > i + 1) {
711n/a fill_nops(codestr, i + 1, h);
712n/a nexti = find_op(codestr, h);
713n/a }
714n/a break;
715n/a }
716n/a }
717n/a
718n/a /* Fixup lnotab */
719n/a for (i = 0, nops = 0; i < codelen; i++) {
720n/a assert(i - nops <= INT_MAX);
721n/a /* original code offset => new code offset */
722n/a blocks[i] = i - nops;
723n/a if (_Py_OPCODE(codestr[i]) == NOP)
724n/a nops++;
725n/a }
726n/a cum_orig_offset = 0;
727n/a last_offset = 0;
728n/a for (i=0 ; i < tabsiz ; i+=2) {
729n/a unsigned int offset_delta, new_offset;
730n/a cum_orig_offset += lnotab[i];
731n/a assert(cum_orig_offset % sizeof(_Py_CODEUNIT) == 0);
732n/a new_offset = blocks[cum_orig_offset / sizeof(_Py_CODEUNIT)] *
733n/a sizeof(_Py_CODEUNIT);
734n/a offset_delta = new_offset - last_offset;
735n/a assert(offset_delta <= 255);
736n/a lnotab[i] = (unsigned char)offset_delta;
737n/a last_offset = new_offset;
738n/a }
739n/a
740n/a /* Remove NOPs and fixup jump targets */
741n/a for (op_start = i = h = 0; i < codelen; i++, op_start = i) {
742n/a j = _Py_OPARG(codestr[i]);
743n/a while (_Py_OPCODE(codestr[i]) == EXTENDED_ARG) {
744n/a i++;
745n/a j = j<<8 | _Py_OPARG(codestr[i]);
746n/a }
747n/a opcode = _Py_OPCODE(codestr[i]);
748n/a switch (opcode) {
749n/a case NOP:continue;
750n/a
751n/a case JUMP_ABSOLUTE:
752n/a case CONTINUE_LOOP:
753n/a case POP_JUMP_IF_FALSE:
754n/a case POP_JUMP_IF_TRUE:
755n/a case JUMP_IF_FALSE_OR_POP:
756n/a case JUMP_IF_TRUE_OR_POP:
757n/a j = blocks[j / sizeof(_Py_CODEUNIT)] * sizeof(_Py_CODEUNIT);
758n/a break;
759n/a
760n/a case FOR_ITER:
761n/a case JUMP_FORWARD:
762n/a case SETUP_LOOP:
763n/a case SETUP_EXCEPT:
764n/a case SETUP_FINALLY:
765n/a case SETUP_WITH:
766n/a case SETUP_ASYNC_WITH:
767n/a j = blocks[j / sizeof(_Py_CODEUNIT) + i + 1] - blocks[i] - 1;
768n/a j *= sizeof(_Py_CODEUNIT);
769n/a break;
770n/a }
771n/a nexti = i - op_start + 1;
772n/a if (instrsize(j) > nexti)
773n/a goto exitUnchanged;
774n/a /* If instrsize(j) < nexti, we'll emit EXTENDED_ARG 0 */
775n/a write_op_arg(codestr + h, opcode, j, nexti);
776n/a h += nexti;
777n/a }
778n/a assert(h + (Py_ssize_t)nops == codelen);
779n/a
780n/a CONST_STACK_DELETE();
781n/a PyMem_Free(blocks);
782n/a code = PyBytes_FromStringAndSize((char *)codestr, h * sizeof(_Py_CODEUNIT));
783n/a PyMem_Free(codestr);
784n/a return code;
785n/a
786n/a exitError:
787n/a code = NULL;
788n/a
789n/a exitUnchanged:
790n/a Py_XINCREF(code);
791n/a CONST_STACK_DELETE();
792n/a PyMem_Free(blocks);
793n/a PyMem_Free(codestr);
794n/a return code;
795n/a}