ยปCore Development>Code coverage>Modules/_sre.c

Python code coverage for Modules/_sre.c

#countcontent
1n/a/*
2n/a * Secret Labs' Regular Expression Engine
3n/a *
4n/a * regular expression matching engine
5n/a *
6n/a * partial history:
7n/a * 1999-10-24 fl created (based on existing template matcher code)
8n/a * 2000-03-06 fl first alpha, sort of
9n/a * 2000-08-01 fl fixes for 1.6b1
10n/a * 2000-08-07 fl use PyOS_CheckStack() if available
11n/a * 2000-09-20 fl added expand method
12n/a * 2001-03-20 fl lots of fixes for 2.1b2
13n/a * 2001-04-15 fl export copyright as Python attribute, not global
14n/a * 2001-04-28 fl added __copy__ methods (work in progress)
15n/a * 2001-05-14 fl fixes for 1.5.2 compatibility
16n/a * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
17n/a * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
18n/a * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
19n/a * 2001-10-21 fl added sub/subn primitive
20n/a * 2001-10-24 fl added finditer primitive (for 2.2 only)
21n/a * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
22n/a * 2002-11-09 fl fixed empty sub/subn return type
23n/a * 2003-04-18 mvl fully support 4-byte codes
24n/a * 2003-10-17 gn implemented non recursive scheme
25n/a * 2013-02-04 mrab added fullmatch primitive
26n/a *
27n/a * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
28n/a *
29n/a * This version of the SRE library can be redistributed under CNRI's
30n/a * Python 1.6 license. For any other use, please contact Secret Labs
31n/a * AB (info@pythonware.com).
32n/a *
33n/a * Portions of this engine have been developed in cooperation with
34n/a * CNRI. Hewlett-Packard provided funding for 1.6 integration and
35n/a * other compatibility work.
36n/a */
37n/a
38n/astatic const char copyright[] =
39n/a " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
40n/a
41n/a#define PY_SSIZE_T_CLEAN
42n/a
43n/a#include "Python.h"
44n/a#include "structmember.h" /* offsetof */
45n/a
46n/a#include "sre.h"
47n/a
48n/a#define SRE_CODE_BITS (8 * sizeof(SRE_CODE))
49n/a
50n/a#include <ctype.h>
51n/a
52n/a/* name of this module, minus the leading underscore */
53n/a#if !defined(SRE_MODULE)
54n/a#define SRE_MODULE "sre"
55n/a#endif
56n/a
57n/a#define SRE_PY_MODULE "re"
58n/a
59n/a/* defining this one enables tracing */
60n/a#undef VERBOSE
61n/a
62n/a/* -------------------------------------------------------------------- */
63n/a/* optional features */
64n/a
65n/a/* enables copy/deepcopy handling (work in progress) */
66n/a#undef USE_BUILTIN_COPY
67n/a
68n/a/* -------------------------------------------------------------------- */
69n/a
70n/a#if defined(_MSC_VER)
71n/a#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
72n/a#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
73n/a/* fastest possible local call under MSVC */
74n/a#define LOCAL(type) static __inline type __fastcall
75n/a#elif defined(USE_INLINE)
76n/a#define LOCAL(type) static inline type
77n/a#else
78n/a#define LOCAL(type) static type
79n/a#endif
80n/a
81n/a/* error codes */
82n/a#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
83n/a#define SRE_ERROR_STATE -2 /* illegal state */
84n/a#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
85n/a#define SRE_ERROR_MEMORY -9 /* out of memory */
86n/a#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
87n/a
88n/a#if defined(VERBOSE)
89n/a#define TRACE(v) printf v
90n/a#else
91n/a#define TRACE(v)
92n/a#endif
93n/a
94n/a/* -------------------------------------------------------------------- */
95n/a/* search engine state */
96n/a
97n/a#define SRE_IS_DIGIT(ch)\
98n/a ((ch) < 128 && Py_ISDIGIT(ch))
99n/a#define SRE_IS_SPACE(ch)\
100n/a ((ch) < 128 && Py_ISSPACE(ch))
101n/a#define SRE_IS_LINEBREAK(ch)\
102n/a ((ch) == '\n')
103n/a#define SRE_IS_ALNUM(ch)\
104n/a ((ch) < 128 && Py_ISALNUM(ch))
105n/a#define SRE_IS_WORD(ch)\
106n/a ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
107n/a
108n/astatic unsigned int sre_lower(unsigned int ch)
109n/a{
110n/a return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
111n/a}
112n/a
113n/astatic unsigned int sre_upper(unsigned int ch)
114n/a{
115n/a return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
116n/a}
117n/a
118n/a/* locale-specific character predicates */
119n/a/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
120n/a * warnings when c's type supports only numbers < N+1 */
121n/a#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
122n/a#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
123n/a
124n/astatic unsigned int sre_lower_locale(unsigned int ch)
125n/a{
126n/a return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
127n/a}
128n/a
129n/astatic unsigned int sre_upper_locale(unsigned int ch)
130n/a{
131n/a return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
132n/a}
133n/a
134n/a/* unicode-specific character predicates */
135n/a
136n/a#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
137n/a#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch)
138n/a#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch)
139n/a#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch)
140n/a#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_')
141n/a
142n/astatic unsigned int sre_lower_unicode(unsigned int ch)
143n/a{
144n/a return (unsigned int) Py_UNICODE_TOLOWER(ch);
145n/a}
146n/a
147n/astatic unsigned int sre_upper_unicode(unsigned int ch)
148n/a{
149n/a return (unsigned int) Py_UNICODE_TOUPPER(ch);
150n/a}
151n/a
152n/aLOCAL(int)
153n/asre_category(SRE_CODE category, unsigned int ch)
154n/a{
155n/a switch (category) {
156n/a
157n/a case SRE_CATEGORY_DIGIT:
158n/a return SRE_IS_DIGIT(ch);
159n/a case SRE_CATEGORY_NOT_DIGIT:
160n/a return !SRE_IS_DIGIT(ch);
161n/a case SRE_CATEGORY_SPACE:
162n/a return SRE_IS_SPACE(ch);
163n/a case SRE_CATEGORY_NOT_SPACE:
164n/a return !SRE_IS_SPACE(ch);
165n/a case SRE_CATEGORY_WORD:
166n/a return SRE_IS_WORD(ch);
167n/a case SRE_CATEGORY_NOT_WORD:
168n/a return !SRE_IS_WORD(ch);
169n/a case SRE_CATEGORY_LINEBREAK:
170n/a return SRE_IS_LINEBREAK(ch);
171n/a case SRE_CATEGORY_NOT_LINEBREAK:
172n/a return !SRE_IS_LINEBREAK(ch);
173n/a
174n/a case SRE_CATEGORY_LOC_WORD:
175n/a return SRE_LOC_IS_WORD(ch);
176n/a case SRE_CATEGORY_LOC_NOT_WORD:
177n/a return !SRE_LOC_IS_WORD(ch);
178n/a
179n/a case SRE_CATEGORY_UNI_DIGIT:
180n/a return SRE_UNI_IS_DIGIT(ch);
181n/a case SRE_CATEGORY_UNI_NOT_DIGIT:
182n/a return !SRE_UNI_IS_DIGIT(ch);
183n/a case SRE_CATEGORY_UNI_SPACE:
184n/a return SRE_UNI_IS_SPACE(ch);
185n/a case SRE_CATEGORY_UNI_NOT_SPACE:
186n/a return !SRE_UNI_IS_SPACE(ch);
187n/a case SRE_CATEGORY_UNI_WORD:
188n/a return SRE_UNI_IS_WORD(ch);
189n/a case SRE_CATEGORY_UNI_NOT_WORD:
190n/a return !SRE_UNI_IS_WORD(ch);
191n/a case SRE_CATEGORY_UNI_LINEBREAK:
192n/a return SRE_UNI_IS_LINEBREAK(ch);
193n/a case SRE_CATEGORY_UNI_NOT_LINEBREAK:
194n/a return !SRE_UNI_IS_LINEBREAK(ch);
195n/a }
196n/a return 0;
197n/a}
198n/a
199n/a/* helpers */
200n/a
201n/astatic void
202n/adata_stack_dealloc(SRE_STATE* state)
203n/a{
204n/a if (state->data_stack) {
205n/a PyMem_FREE(state->data_stack);
206n/a state->data_stack = NULL;
207n/a }
208n/a state->data_stack_size = state->data_stack_base = 0;
209n/a}
210n/a
211n/astatic int
212n/adata_stack_grow(SRE_STATE* state, Py_ssize_t size)
213n/a{
214n/a Py_ssize_t minsize, cursize;
215n/a minsize = state->data_stack_base+size;
216n/a cursize = state->data_stack_size;
217n/a if (cursize < minsize) {
218n/a void* stack;
219n/a cursize = minsize+minsize/4+1024;
220n/a TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
221n/a stack = PyMem_REALLOC(state->data_stack, cursize);
222n/a if (!stack) {
223n/a data_stack_dealloc(state);
224n/a return SRE_ERROR_MEMORY;
225n/a }
226n/a state->data_stack = (char *)stack;
227n/a state->data_stack_size = cursize;
228n/a }
229n/a return 0;
230n/a}
231n/a
232n/a/* generate 8-bit version */
233n/a
234n/a#define SRE_CHAR Py_UCS1
235n/a#define SIZEOF_SRE_CHAR 1
236n/a#define SRE(F) sre_ucs1_##F
237n/a#include "sre_lib.h"
238n/a
239n/a/* generate 16-bit unicode version */
240n/a
241n/a#define SRE_CHAR Py_UCS2
242n/a#define SIZEOF_SRE_CHAR 2
243n/a#define SRE(F) sre_ucs2_##F
244n/a#include "sre_lib.h"
245n/a
246n/a/* generate 32-bit unicode version */
247n/a
248n/a#define SRE_CHAR Py_UCS4
249n/a#define SIZEOF_SRE_CHAR 4
250n/a#define SRE(F) sre_ucs4_##F
251n/a#include "sre_lib.h"
252n/a
253n/a/* -------------------------------------------------------------------- */
254n/a/* factories and destructors */
255n/a
256n/a/* see sre.h for object declarations */
257n/astatic PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
258n/astatic PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
259n/a
260n/a
261n/a/*[clinic input]
262n/amodule _sre
263n/aclass _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
264n/aclass _sre.SRE_Match "MatchObject *" "&Match_Type"
265n/aclass _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
266n/a[clinic start generated code]*/
267n/a/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
268n/a
269n/astatic PyTypeObject Pattern_Type;
270n/astatic PyTypeObject Match_Type;
271n/astatic PyTypeObject Scanner_Type;
272n/a
273n/a/*[clinic input]
274n/a_sre.getcodesize -> int
275n/a[clinic start generated code]*/
276n/a
277n/astatic int
278n/a_sre_getcodesize_impl(PyObject *module)
279n/a/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/
280n/a{
281n/a return sizeof(SRE_CODE);
282n/a}
283n/a
284n/a/*[clinic input]
285n/a_sre.getlower -> int
286n/a
287n/a character: int
288n/a flags: int
289n/a /
290n/a
291n/a[clinic start generated code]*/
292n/a
293n/astatic int
294n/a_sre_getlower_impl(PyObject *module, int character, int flags)
295n/a/*[clinic end generated code: output=47eebc4c1214feb5 input=087d2f1c44bbca6f]*/
296n/a{
297n/a if (flags & SRE_FLAG_LOCALE)
298n/a return sre_lower_locale(character);
299n/a if (flags & SRE_FLAG_UNICODE)
300n/a return sre_lower_unicode(character);
301n/a return sre_lower(character);
302n/a}
303n/a
304n/aLOCAL(void)
305n/astate_reset(SRE_STATE* state)
306n/a{
307n/a /* FIXME: dynamic! */
308n/a /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
309n/a
310n/a state->lastmark = -1;
311n/a state->lastindex = -1;
312n/a
313n/a state->repeat = NULL;
314n/a
315n/a data_stack_dealloc(state);
316n/a}
317n/a
318n/astatic void*
319n/agetstring(PyObject* string, Py_ssize_t* p_length,
320n/a int* p_isbytes, int* p_charsize,
321n/a Py_buffer *view)
322n/a{
323n/a /* given a python object, return a data pointer, a length (in
324n/a characters), and a character size. return NULL if the object
325n/a is not a string (or not compatible) */
326n/a
327n/a /* Unicode objects do not support the buffer API. So, get the data
328n/a directly instead. */
329n/a if (PyUnicode_Check(string)) {
330n/a if (PyUnicode_READY(string) == -1)
331n/a return NULL;
332n/a *p_length = PyUnicode_GET_LENGTH(string);
333n/a *p_charsize = PyUnicode_KIND(string);
334n/a *p_isbytes = 0;
335n/a return PyUnicode_DATA(string);
336n/a }
337n/a
338n/a /* get pointer to byte string buffer */
339n/a if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
340n/a PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
341n/a return NULL;
342n/a }
343n/a
344n/a *p_length = view->len;
345n/a *p_charsize = 1;
346n/a *p_isbytes = 1;
347n/a
348n/a if (view->buf == NULL) {
349n/a PyErr_SetString(PyExc_ValueError, "Buffer is NULL");
350n/a PyBuffer_Release(view);
351n/a view->buf = NULL;
352n/a return NULL;
353n/a }
354n/a return view->buf;
355n/a}
356n/a
357n/aLOCAL(PyObject*)
358n/astate_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
359n/a Py_ssize_t start, Py_ssize_t end)
360n/a{
361n/a /* prepare state object */
362n/a
363n/a Py_ssize_t length;
364n/a int isbytes, charsize;
365n/a void* ptr;
366n/a
367n/a memset(state, 0, sizeof(SRE_STATE));
368n/a
369n/a state->mark = PyMem_New(void *, pattern->groups * 2);
370n/a if (!state->mark) {
371n/a PyErr_NoMemory();
372n/a goto err;
373n/a }
374n/a state->lastmark = -1;
375n/a state->lastindex = -1;
376n/a
377n/a state->buffer.buf = NULL;
378n/a ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
379n/a if (!ptr)
380n/a goto err;
381n/a
382n/a if (isbytes && pattern->isbytes == 0) {
383n/a PyErr_SetString(PyExc_TypeError,
384n/a "cannot use a string pattern on a bytes-like object");
385n/a goto err;
386n/a }
387n/a if (!isbytes && pattern->isbytes > 0) {
388n/a PyErr_SetString(PyExc_TypeError,
389n/a "cannot use a bytes pattern on a string-like object");
390n/a goto err;
391n/a }
392n/a
393n/a /* adjust boundaries */
394n/a if (start < 0)
395n/a start = 0;
396n/a else if (start > length)
397n/a start = length;
398n/a
399n/a if (end < 0)
400n/a end = 0;
401n/a else if (end > length)
402n/a end = length;
403n/a
404n/a state->isbytes = isbytes;
405n/a state->charsize = charsize;
406n/a
407n/a state->beginning = ptr;
408n/a
409n/a state->start = (void*) ((char*) ptr + start * state->charsize);
410n/a state->end = (void*) ((char*) ptr + end * state->charsize);
411n/a
412n/a Py_INCREF(string);
413n/a state->string = string;
414n/a state->pos = start;
415n/a state->endpos = end;
416n/a
417n/a if (pattern->flags & SRE_FLAG_LOCALE) {
418n/a state->lower = sre_lower_locale;
419n/a state->upper = sre_upper_locale;
420n/a }
421n/a else if (pattern->flags & SRE_FLAG_UNICODE) {
422n/a state->lower = sre_lower_unicode;
423n/a state->upper = sre_upper_unicode;
424n/a }
425n/a else {
426n/a state->lower = sre_lower;
427n/a state->upper = sre_upper;
428n/a }
429n/a
430n/a return string;
431n/a err:
432n/a PyMem_Del(state->mark);
433n/a state->mark = NULL;
434n/a if (state->buffer.buf)
435n/a PyBuffer_Release(&state->buffer);
436n/a return NULL;
437n/a}
438n/a
439n/aLOCAL(void)
440n/astate_fini(SRE_STATE* state)
441n/a{
442n/a if (state->buffer.buf)
443n/a PyBuffer_Release(&state->buffer);
444n/a Py_XDECREF(state->string);
445n/a data_stack_dealloc(state);
446n/a PyMem_Del(state->mark);
447n/a state->mark = NULL;
448n/a}
449n/a
450n/a/* calculate offset from start of string */
451n/a#define STATE_OFFSET(state, member)\
452n/a (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
453n/a
454n/aLOCAL(PyObject*)
455n/agetslice(int isbytes, const void *ptr,
456n/a PyObject* string, Py_ssize_t start, Py_ssize_t end)
457n/a{
458n/a if (isbytes) {
459n/a if (PyBytes_CheckExact(string) &&
460n/a start == 0 && end == PyBytes_GET_SIZE(string)) {
461n/a Py_INCREF(string);
462n/a return string;
463n/a }
464n/a return PyBytes_FromStringAndSize(
465n/a (const char *)ptr + start, end - start);
466n/a }
467n/a else {
468n/a return PyUnicode_Substring(string, start, end);
469n/a }
470n/a}
471n/a
472n/aLOCAL(PyObject*)
473n/astate_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
474n/a{
475n/a Py_ssize_t i, j;
476n/a
477n/a index = (index - 1) * 2;
478n/a
479n/a if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
480n/a if (empty)
481n/a /* want empty string */
482n/a i = j = 0;
483n/a else {
484n/a Py_RETURN_NONE;
485n/a }
486n/a } else {
487n/a i = STATE_OFFSET(state, state->mark[index]);
488n/a j = STATE_OFFSET(state, state->mark[index+1]);
489n/a }
490n/a
491n/a return getslice(state->isbytes, state->beginning, string, i, j);
492n/a}
493n/a
494n/astatic void
495n/apattern_error(Py_ssize_t status)
496n/a{
497n/a switch (status) {
498n/a case SRE_ERROR_RECURSION_LIMIT:
499n/a /* This error code seems to be unused. */
500n/a PyErr_SetString(
501n/a PyExc_RecursionError,
502n/a "maximum recursion limit exceeded"
503n/a );
504n/a break;
505n/a case SRE_ERROR_MEMORY:
506n/a PyErr_NoMemory();
507n/a break;
508n/a case SRE_ERROR_INTERRUPTED:
509n/a /* An exception has already been raised, so let it fly */
510n/a break;
511n/a default:
512n/a /* other error codes indicate compiler/engine bugs */
513n/a PyErr_SetString(
514n/a PyExc_RuntimeError,
515n/a "internal error in regular expression engine"
516n/a );
517n/a }
518n/a}
519n/a
520n/astatic void
521n/apattern_dealloc(PatternObject* self)
522n/a{
523n/a if (self->weakreflist != NULL)
524n/a PyObject_ClearWeakRefs((PyObject *) self);
525n/a Py_XDECREF(self->pattern);
526n/a Py_XDECREF(self->groupindex);
527n/a Py_XDECREF(self->indexgroup);
528n/a PyObject_DEL(self);
529n/a}
530n/a
531n/aLOCAL(Py_ssize_t)
532n/asre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
533n/a{
534n/a if (state->charsize == 1)
535n/a return sre_ucs1_match(state, pattern, match_all);
536n/a if (state->charsize == 2)
537n/a return sre_ucs2_match(state, pattern, match_all);
538n/a assert(state->charsize == 4);
539n/a return sre_ucs4_match(state, pattern, match_all);
540n/a}
541n/a
542n/aLOCAL(Py_ssize_t)
543n/asre_search(SRE_STATE* state, SRE_CODE* pattern)
544n/a{
545n/a if (state->charsize == 1)
546n/a return sre_ucs1_search(state, pattern);
547n/a if (state->charsize == 2)
548n/a return sre_ucs2_search(state, pattern);
549n/a assert(state->charsize == 4);
550n/a return sre_ucs4_search(state, pattern);
551n/a}
552n/a
553n/a/*[clinic input]
554n/a_sre.SRE_Pattern.match
555n/a
556n/a string: object
557n/a pos: Py_ssize_t = 0
558n/a endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
559n/a
560n/aMatches zero or more characters at the beginning of the string.
561n/a[clinic start generated code]*/
562n/a
563n/astatic PyObject *
564n/a_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
565n/a Py_ssize_t pos, Py_ssize_t endpos)
566n/a/*[clinic end generated code: output=ea2d838888510661 input=a2ba191647abebe5]*/
567n/a{
568n/a SRE_STATE state;
569n/a Py_ssize_t status;
570n/a PyObject *match;
571n/a
572n/a if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
573n/a return NULL;
574n/a
575n/a state.ptr = state.start;
576n/a
577n/a TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
578n/a
579n/a status = sre_match(&state, PatternObject_GetCode(self), 0);
580n/a
581n/a TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
582n/a if (PyErr_Occurred()) {
583n/a state_fini(&state);
584n/a return NULL;
585n/a }
586n/a
587n/a match = pattern_new_match(self, &state, status);
588n/a state_fini(&state);
589n/a return match;
590n/a}
591n/a
592n/a/*[clinic input]
593n/a_sre.SRE_Pattern.fullmatch
594n/a
595n/a string: object
596n/a pos: Py_ssize_t = 0
597n/a endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
598n/a
599n/aMatches against all of the string
600n/a[clinic start generated code]*/
601n/a
602n/astatic PyObject *
603n/a_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
604n/a Py_ssize_t pos, Py_ssize_t endpos)
605n/a/*[clinic end generated code: output=5833c47782a35f4a input=a6f640614aaefceb]*/
606n/a{
607n/a SRE_STATE state;
608n/a Py_ssize_t status;
609n/a PyObject *match;
610n/a
611n/a if (!state_init(&state, self, string, pos, endpos))
612n/a return NULL;
613n/a
614n/a state.ptr = state.start;
615n/a
616n/a TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
617n/a
618n/a status = sre_match(&state, PatternObject_GetCode(self), 1);
619n/a
620n/a TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
621n/a if (PyErr_Occurred()) {
622n/a state_fini(&state);
623n/a return NULL;
624n/a }
625n/a
626n/a match = pattern_new_match(self, &state, status);
627n/a state_fini(&state);
628n/a return match;
629n/a}
630n/a
631n/a/*[clinic input]
632n/a_sre.SRE_Pattern.search
633n/a
634n/a string: object
635n/a pos: Py_ssize_t = 0
636n/a endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
637n/a
638n/aScan through string looking for a match, and return a corresponding match object instance.
639n/a
640n/aReturn None if no position in the string matches.
641n/a[clinic start generated code]*/
642n/a
643n/astatic PyObject *
644n/a_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
645n/a Py_ssize_t pos, Py_ssize_t endpos)
646n/a/*[clinic end generated code: output=25f302a644e951e8 input=4ae5cb7dc38fed1b]*/
647n/a{
648n/a SRE_STATE state;
649n/a Py_ssize_t status;
650n/a PyObject *match;
651n/a
652n/a if (!state_init(&state, self, string, pos, endpos))
653n/a return NULL;
654n/a
655n/a TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
656n/a
657n/a status = sre_search(&state, PatternObject_GetCode(self));
658n/a
659n/a TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
660n/a
661n/a if (PyErr_Occurred()) {
662n/a state_fini(&state);
663n/a return NULL;
664n/a }
665n/a
666n/a match = pattern_new_match(self, &state, status);
667n/a state_fini(&state);
668n/a return match;
669n/a}
670n/a
671n/astatic PyObject*
672n/acall(const char* module, const char* function, PyObject* args)
673n/a{
674n/a PyObject* name;
675n/a PyObject* mod;
676n/a PyObject* func;
677n/a PyObject* result;
678n/a
679n/a if (!args)
680n/a return NULL;
681n/a name = PyUnicode_FromString(module);
682n/a if (!name)
683n/a return NULL;
684n/a mod = PyImport_Import(name);
685n/a Py_DECREF(name);
686n/a if (!mod)
687n/a return NULL;
688n/a func = PyObject_GetAttrString(mod, function);
689n/a Py_DECREF(mod);
690n/a if (!func)
691n/a return NULL;
692n/a result = PyObject_CallObject(func, args);
693n/a Py_DECREF(func);
694n/a Py_DECREF(args);
695n/a return result;
696n/a}
697n/a
698n/a#ifdef USE_BUILTIN_COPY
699n/astatic int
700n/adeepcopy(PyObject** object, PyObject* memo)
701n/a{
702n/a PyObject* copy;
703n/a
704n/a copy = call(
705n/a "copy", "deepcopy",
706n/a PyTuple_Pack(2, *object, memo)
707n/a );
708n/a if (!copy)
709n/a return 0;
710n/a
711n/a Py_SETREF(*object, copy);
712n/a
713n/a return 1; /* success */
714n/a}
715n/a#endif
716n/a
717n/a/*[clinic input]
718n/a_sre.SRE_Pattern.findall
719n/a
720n/a string: object
721n/a pos: Py_ssize_t = 0
722n/a endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
723n/a
724n/aReturn a list of all non-overlapping matches of pattern in string.
725n/a[clinic start generated code]*/
726n/a
727n/astatic PyObject *
728n/a_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
729n/a Py_ssize_t pos, Py_ssize_t endpos)
730n/a/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/
731n/a{
732n/a SRE_STATE state;
733n/a PyObject* list;
734n/a Py_ssize_t status;
735n/a Py_ssize_t i, b, e;
736n/a
737n/a if (!state_init(&state, self, string, pos, endpos))
738n/a return NULL;
739n/a
740n/a list = PyList_New(0);
741n/a if (!list) {
742n/a state_fini(&state);
743n/a return NULL;
744n/a }
745n/a
746n/a while (state.start <= state.end) {
747n/a
748n/a PyObject* item;
749n/a
750n/a state_reset(&state);
751n/a
752n/a state.ptr = state.start;
753n/a
754n/a status = sre_search(&state, PatternObject_GetCode(self));
755n/a if (PyErr_Occurred())
756n/a goto error;
757n/a
758n/a if (status <= 0) {
759n/a if (status == 0)
760n/a break;
761n/a pattern_error(status);
762n/a goto error;
763n/a }
764n/a
765n/a /* don't bother to build a match object */
766n/a switch (self->groups) {
767n/a case 0:
768n/a b = STATE_OFFSET(&state, state.start);
769n/a e = STATE_OFFSET(&state, state.ptr);
770n/a item = getslice(state.isbytes, state.beginning,
771n/a string, b, e);
772n/a if (!item)
773n/a goto error;
774n/a break;
775n/a case 1:
776n/a item = state_getslice(&state, 1, string, 1);
777n/a if (!item)
778n/a goto error;
779n/a break;
780n/a default:
781n/a item = PyTuple_New(self->groups);
782n/a if (!item)
783n/a goto error;
784n/a for (i = 0; i < self->groups; i++) {
785n/a PyObject* o = state_getslice(&state, i+1, string, 1);
786n/a if (!o) {
787n/a Py_DECREF(item);
788n/a goto error;
789n/a }
790n/a PyTuple_SET_ITEM(item, i, o);
791n/a }
792n/a break;
793n/a }
794n/a
795n/a status = PyList_Append(list, item);
796n/a Py_DECREF(item);
797n/a if (status < 0)
798n/a goto error;
799n/a
800n/a if (state.ptr == state.start)
801n/a state.start = (void*) ((char*) state.ptr + state.charsize);
802n/a else
803n/a state.start = state.ptr;
804n/a
805n/a }
806n/a
807n/a state_fini(&state);
808n/a return list;
809n/a
810n/aerror:
811n/a Py_DECREF(list);
812n/a state_fini(&state);
813n/a return NULL;
814n/a
815n/a}
816n/a
817n/a/*[clinic input]
818n/a_sre.SRE_Pattern.finditer
819n/a
820n/a string: object
821n/a pos: Py_ssize_t = 0
822n/a endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
823n/a
824n/aReturn an iterator over all non-overlapping matches for the RE pattern in string.
825n/a
826n/aFor each match, the iterator returns a match object.
827n/a[clinic start generated code]*/
828n/a
829n/astatic PyObject *
830n/a_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
831n/a Py_ssize_t pos, Py_ssize_t endpos)
832n/a/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
833n/a{
834n/a PyObject* scanner;
835n/a PyObject* search;
836n/a PyObject* iterator;
837n/a
838n/a scanner = pattern_scanner(self, string, pos, endpos);
839n/a if (!scanner)
840n/a return NULL;
841n/a
842n/a search = PyObject_GetAttrString(scanner, "search");
843n/a Py_DECREF(scanner);
844n/a if (!search)
845n/a return NULL;
846n/a
847n/a iterator = PyCallIter_New(search, Py_None);
848n/a Py_DECREF(search);
849n/a
850n/a return iterator;
851n/a}
852n/a
853n/a/*[clinic input]
854n/a_sre.SRE_Pattern.scanner
855n/a
856n/a string: object
857n/a pos: Py_ssize_t = 0
858n/a endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
859n/a
860n/a[clinic start generated code]*/
861n/a
862n/astatic PyObject *
863n/a_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
864n/a Py_ssize_t pos, Py_ssize_t endpos)
865n/a/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
866n/a{
867n/a return pattern_scanner(self, string, pos, endpos);
868n/a}
869n/a
870n/a/*[clinic input]
871n/a_sre.SRE_Pattern.split
872n/a
873n/a string: object
874n/a maxsplit: Py_ssize_t = 0
875n/a
876n/aSplit string by the occurrences of pattern.
877n/a[clinic start generated code]*/
878n/a
879n/astatic PyObject *
880n/a_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
881n/a Py_ssize_t maxsplit)
882n/a/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/
883n/a{
884n/a SRE_STATE state;
885n/a PyObject* list;
886n/a PyObject* item;
887n/a Py_ssize_t status;
888n/a Py_ssize_t n;
889n/a Py_ssize_t i;
890n/a void* last;
891n/a
892n/a assert(self->codesize != 0);
893n/a if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
894n/a if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
895n/a PyErr_SetString(PyExc_ValueError,
896n/a "split() requires a non-empty pattern match.");
897n/a return NULL;
898n/a }
899n/a if (PyErr_WarnEx(PyExc_FutureWarning,
900n/a "split() requires a non-empty pattern match.",
901n/a 1) < 0)
902n/a return NULL;
903n/a }
904n/a
905n/a if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
906n/a return NULL;
907n/a
908n/a list = PyList_New(0);
909n/a if (!list) {
910n/a state_fini(&state);
911n/a return NULL;
912n/a }
913n/a
914n/a n = 0;
915n/a last = state.start;
916n/a
917n/a while (!maxsplit || n < maxsplit) {
918n/a
919n/a state_reset(&state);
920n/a
921n/a state.ptr = state.start;
922n/a
923n/a status = sre_search(&state, PatternObject_GetCode(self));
924n/a if (PyErr_Occurred())
925n/a goto error;
926n/a
927n/a if (status <= 0) {
928n/a if (status == 0)
929n/a break;
930n/a pattern_error(status);
931n/a goto error;
932n/a }
933n/a
934n/a if (state.start == state.ptr) {
935n/a if (last == state.end || state.ptr == state.end)
936n/a break;
937n/a /* skip one character */
938n/a state.start = (void*) ((char*) state.ptr + state.charsize);
939n/a continue;
940n/a }
941n/a
942n/a /* get segment before this match */
943n/a item = getslice(state.isbytes, state.beginning,
944n/a string, STATE_OFFSET(&state, last),
945n/a STATE_OFFSET(&state, state.start)
946n/a );
947n/a if (!item)
948n/a goto error;
949n/a status = PyList_Append(list, item);
950n/a Py_DECREF(item);
951n/a if (status < 0)
952n/a goto error;
953n/a
954n/a /* add groups (if any) */
955n/a for (i = 0; i < self->groups; i++) {
956n/a item = state_getslice(&state, i+1, string, 0);
957n/a if (!item)
958n/a goto error;
959n/a status = PyList_Append(list, item);
960n/a Py_DECREF(item);
961n/a if (status < 0)
962n/a goto error;
963n/a }
964n/a
965n/a n = n + 1;
966n/a
967n/a last = state.start = state.ptr;
968n/a
969n/a }
970n/a
971n/a /* get segment following last match (even if empty) */
972n/a item = getslice(state.isbytes, state.beginning,
973n/a string, STATE_OFFSET(&state, last), state.endpos
974n/a );
975n/a if (!item)
976n/a goto error;
977n/a status = PyList_Append(list, item);
978n/a Py_DECREF(item);
979n/a if (status < 0)
980n/a goto error;
981n/a
982n/a state_fini(&state);
983n/a return list;
984n/a
985n/aerror:
986n/a Py_DECREF(list);
987n/a state_fini(&state);
988n/a return NULL;
989n/a
990n/a}
991n/a
992n/astatic PyObject*
993n/apattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
994n/a Py_ssize_t count, Py_ssize_t subn)
995n/a{
996n/a SRE_STATE state;
997n/a PyObject* list;
998n/a PyObject* joiner;
999n/a PyObject* item;
1000n/a PyObject* filter;
1001n/a PyObject* match;
1002n/a void* ptr;
1003n/a Py_ssize_t status;
1004n/a Py_ssize_t n;
1005n/a Py_ssize_t i, b, e;
1006n/a int isbytes, charsize;
1007n/a int filter_is_callable;
1008n/a Py_buffer view;
1009n/a
1010n/a if (PyCallable_Check(ptemplate)) {
1011n/a /* sub/subn takes either a function or a template */
1012n/a filter = ptemplate;
1013n/a Py_INCREF(filter);
1014n/a filter_is_callable = 1;
1015n/a } else {
1016n/a /* if not callable, check if it's a literal string */
1017n/a int literal;
1018n/a view.buf = NULL;
1019n/a ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view);
1020n/a b = charsize;
1021n/a if (ptr) {
1022n/a if (charsize == 1)
1023n/a literal = memchr(ptr, '\\', n) == NULL;
1024n/a else
1025n/a literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1;
1026n/a } else {
1027n/a PyErr_Clear();
1028n/a literal = 0;
1029n/a }
1030n/a if (view.buf)
1031n/a PyBuffer_Release(&view);
1032n/a if (literal) {
1033n/a filter = ptemplate;
1034n/a Py_INCREF(filter);
1035n/a filter_is_callable = 0;
1036n/a } else {
1037n/a /* not a literal; hand it over to the template compiler */
1038n/a filter = call(
1039n/a SRE_PY_MODULE, "_subx",
1040n/a PyTuple_Pack(2, self, ptemplate)
1041n/a );
1042n/a if (!filter)
1043n/a return NULL;
1044n/a filter_is_callable = PyCallable_Check(filter);
1045n/a }
1046n/a }
1047n/a
1048n/a if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
1049n/a Py_DECREF(filter);
1050n/a return NULL;
1051n/a }
1052n/a
1053n/a list = PyList_New(0);
1054n/a if (!list) {
1055n/a Py_DECREF(filter);
1056n/a state_fini(&state);
1057n/a return NULL;
1058n/a }
1059n/a
1060n/a n = i = 0;
1061n/a
1062n/a while (!count || n < count) {
1063n/a
1064n/a state_reset(&state);
1065n/a
1066n/a state.ptr = state.start;
1067n/a
1068n/a status = sre_search(&state, PatternObject_GetCode(self));
1069n/a if (PyErr_Occurred())
1070n/a goto error;
1071n/a
1072n/a if (status <= 0) {
1073n/a if (status == 0)
1074n/a break;
1075n/a pattern_error(status);
1076n/a goto error;
1077n/a }
1078n/a
1079n/a b = STATE_OFFSET(&state, state.start);
1080n/a e = STATE_OFFSET(&state, state.ptr);
1081n/a
1082n/a if (i < b) {
1083n/a /* get segment before this match */
1084n/a item = getslice(state.isbytes, state.beginning,
1085n/a string, i, b);
1086n/a if (!item)
1087n/a goto error;
1088n/a status = PyList_Append(list, item);
1089n/a Py_DECREF(item);
1090n/a if (status < 0)
1091n/a goto error;
1092n/a
1093n/a } else if (i == b && i == e && n > 0)
1094n/a /* ignore empty match on latest position */
1095n/a goto next;
1096n/a
1097n/a if (filter_is_callable) {
1098n/a /* pass match object through filter */
1099n/a match = pattern_new_match(self, &state, 1);
1100n/a if (!match)
1101n/a goto error;
1102n/a item = PyObject_CallFunctionObjArgs(filter, match, NULL);
1103n/a Py_DECREF(match);
1104n/a if (!item)
1105n/a goto error;
1106n/a } else {
1107n/a /* filter is literal string */
1108n/a item = filter;
1109n/a Py_INCREF(item);
1110n/a }
1111n/a
1112n/a /* add to list */
1113n/a if (item != Py_None) {
1114n/a status = PyList_Append(list, item);
1115n/a Py_DECREF(item);
1116n/a if (status < 0)
1117n/a goto error;
1118n/a }
1119n/a
1120n/a i = e;
1121n/a n = n + 1;
1122n/a
1123n/anext:
1124n/a /* move on */
1125n/a if (state.ptr == state.end)
1126n/a break;
1127n/a if (state.ptr == state.start)
1128n/a state.start = (void*) ((char*) state.ptr + state.charsize);
1129n/a else
1130n/a state.start = state.ptr;
1131n/a
1132n/a }
1133n/a
1134n/a /* get segment following last match */
1135n/a if (i < state.endpos) {
1136n/a item = getslice(state.isbytes, state.beginning,
1137n/a string, i, state.endpos);
1138n/a if (!item)
1139n/a goto error;
1140n/a status = PyList_Append(list, item);
1141n/a Py_DECREF(item);
1142n/a if (status < 0)
1143n/a goto error;
1144n/a }
1145n/a
1146n/a state_fini(&state);
1147n/a
1148n/a Py_DECREF(filter);
1149n/a
1150n/a /* convert list to single string (also removes list) */
1151n/a joiner = getslice(state.isbytes, state.beginning, string, 0, 0);
1152n/a if (!joiner) {
1153n/a Py_DECREF(list);
1154n/a return NULL;
1155n/a }
1156n/a if (PyList_GET_SIZE(list) == 0) {
1157n/a Py_DECREF(list);
1158n/a item = joiner;
1159n/a }
1160n/a else {
1161n/a if (state.isbytes)
1162n/a item = _PyBytes_Join(joiner, list);
1163n/a else
1164n/a item = PyUnicode_Join(joiner, list);
1165n/a Py_DECREF(joiner);
1166n/a Py_DECREF(list);
1167n/a if (!item)
1168n/a return NULL;
1169n/a }
1170n/a
1171n/a if (subn)
1172n/a return Py_BuildValue("Nn", item, n);
1173n/a
1174n/a return item;
1175n/a
1176n/aerror:
1177n/a Py_DECREF(list);
1178n/a state_fini(&state);
1179n/a Py_DECREF(filter);
1180n/a return NULL;
1181n/a
1182n/a}
1183n/a
1184n/a/*[clinic input]
1185n/a_sre.SRE_Pattern.sub
1186n/a
1187n/a repl: object
1188n/a string: object
1189n/a count: Py_ssize_t = 0
1190n/a
1191n/aReturn the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
1192n/a[clinic start generated code]*/
1193n/a
1194n/astatic PyObject *
1195n/a_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
1196n/a PyObject *string, Py_ssize_t count)
1197n/a/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
1198n/a{
1199n/a return pattern_subx(self, repl, string, count, 0);
1200n/a}
1201n/a
1202n/a/*[clinic input]
1203n/a_sre.SRE_Pattern.subn
1204n/a
1205n/a repl: object
1206n/a string: object
1207n/a count: Py_ssize_t = 0
1208n/a
1209n/aReturn the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
1210n/a[clinic start generated code]*/
1211n/a
1212n/astatic PyObject *
1213n/a_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
1214n/a PyObject *string, Py_ssize_t count)
1215n/a/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
1216n/a{
1217n/a return pattern_subx(self, repl, string, count, 1);
1218n/a}
1219n/a
1220n/a/*[clinic input]
1221n/a_sre.SRE_Pattern.__copy__
1222n/a
1223n/a[clinic start generated code]*/
1224n/a
1225n/astatic PyObject *
1226n/a_sre_SRE_Pattern___copy___impl(PatternObject *self)
1227n/a/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
1228n/a{
1229n/a#ifdef USE_BUILTIN_COPY
1230n/a PatternObject* copy;
1231n/a int offset;
1232n/a
1233n/a copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
1234n/a if (!copy)
1235n/a return NULL;
1236n/a
1237n/a offset = offsetof(PatternObject, groups);
1238n/a
1239n/a Py_XINCREF(self->groupindex);
1240n/a Py_XINCREF(self->indexgroup);
1241n/a Py_XINCREF(self->pattern);
1242n/a
1243n/a memcpy((char*) copy + offset, (char*) self + offset,
1244n/a sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
1245n/a copy->weakreflist = NULL;
1246n/a
1247n/a return (PyObject*) copy;
1248n/a#else
1249n/a PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
1250n/a return NULL;
1251n/a#endif
1252n/a}
1253n/a
1254n/a/*[clinic input]
1255n/a_sre.SRE_Pattern.__deepcopy__
1256n/a
1257n/a memo: object
1258n/a
1259n/a[clinic start generated code]*/
1260n/a
1261n/astatic PyObject *
1262n/a_sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
1263n/a/*[clinic end generated code: output=75efe69bd12c5d7d input=3959719482c07f70]*/
1264n/a{
1265n/a#ifdef USE_BUILTIN_COPY
1266n/a PatternObject* copy;
1267n/a
1268n/a copy = (PatternObject*) pattern_copy(self);
1269n/a if (!copy)
1270n/a return NULL;
1271n/a
1272n/a if (!deepcopy(&copy->groupindex, memo) ||
1273n/a !deepcopy(&copy->indexgroup, memo) ||
1274n/a !deepcopy(&copy->pattern, memo)) {
1275n/a Py_DECREF(copy);
1276n/a return NULL;
1277n/a }
1278n/a
1279n/a#else
1280n/a PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
1281n/a return NULL;
1282n/a#endif
1283n/a}
1284n/a
1285n/astatic PyObject *
1286n/apattern_repr(PatternObject *obj)
1287n/a{
1288n/a static const struct {
1289n/a const char *name;
1290n/a int value;
1291n/a } flag_names[] = {
1292n/a {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
1293n/a {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
1294n/a {"re.LOCALE", SRE_FLAG_LOCALE},
1295n/a {"re.MULTILINE", SRE_FLAG_MULTILINE},
1296n/a {"re.DOTALL", SRE_FLAG_DOTALL},
1297n/a {"re.UNICODE", SRE_FLAG_UNICODE},
1298n/a {"re.VERBOSE", SRE_FLAG_VERBOSE},
1299n/a {"re.DEBUG", SRE_FLAG_DEBUG},
1300n/a {"re.ASCII", SRE_FLAG_ASCII},
1301n/a };
1302n/a PyObject *result = NULL;
1303n/a PyObject *flag_items;
1304n/a size_t i;
1305n/a int flags = obj->flags;
1306n/a
1307n/a /* Omit re.UNICODE for valid string patterns. */
1308n/a if (obj->isbytes == 0 &&
1309n/a (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
1310n/a SRE_FLAG_UNICODE)
1311n/a flags &= ~SRE_FLAG_UNICODE;
1312n/a
1313n/a flag_items = PyList_New(0);
1314n/a if (!flag_items)
1315n/a return NULL;
1316n/a
1317n/a for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
1318n/a if (flags & flag_names[i].value) {
1319n/a PyObject *item = PyUnicode_FromString(flag_names[i].name);
1320n/a if (!item)
1321n/a goto done;
1322n/a
1323n/a if (PyList_Append(flag_items, item) < 0) {
1324n/a Py_DECREF(item);
1325n/a goto done;
1326n/a }
1327n/a Py_DECREF(item);
1328n/a flags &= ~flag_names[i].value;
1329n/a }
1330n/a }
1331n/a if (flags) {
1332n/a PyObject *item = PyUnicode_FromFormat("0x%x", flags);
1333n/a if (!item)
1334n/a goto done;
1335n/a
1336n/a if (PyList_Append(flag_items, item) < 0) {
1337n/a Py_DECREF(item);
1338n/a goto done;
1339n/a }
1340n/a Py_DECREF(item);
1341n/a }
1342n/a
1343n/a if (PyList_Size(flag_items) > 0) {
1344n/a PyObject *flags_result;
1345n/a PyObject *sep = PyUnicode_FromString("|");
1346n/a if (!sep)
1347n/a goto done;
1348n/a flags_result = PyUnicode_Join(sep, flag_items);
1349n/a Py_DECREF(sep);
1350n/a if (!flags_result)
1351n/a goto done;
1352n/a result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
1353n/a obj->pattern, flags_result);
1354n/a Py_DECREF(flags_result);
1355n/a }
1356n/a else {
1357n/a result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
1358n/a }
1359n/a
1360n/adone:
1361n/a Py_DECREF(flag_items);
1362n/a return result;
1363n/a}
1364n/a
1365n/aPyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
1366n/a
1367n/a/* PatternObject's 'groupindex' method. */
1368n/astatic PyObject *
1369n/apattern_groupindex(PatternObject *self)
1370n/a{
1371n/a return PyDictProxy_New(self->groupindex);
1372n/a}
1373n/a
1374n/astatic int _validate(PatternObject *self); /* Forward */
1375n/a
1376n/a/*[clinic input]
1377n/a_sre.compile
1378n/a
1379n/a pattern: object
1380n/a flags: int
1381n/a code: object(subclass_of='&PyList_Type')
1382n/a groups: Py_ssize_t
1383n/a groupindex: object(subclass_of='&PyDict_Type')
1384n/a indexgroup: object(subclass_of='&PyTuple_Type')
1385n/a
1386n/a[clinic start generated code]*/
1387n/a
1388n/astatic PyObject *
1389n/a_sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
1390n/a PyObject *code, Py_ssize_t groups, PyObject *groupindex,
1391n/a PyObject *indexgroup)
1392n/a/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
1393n/a{
1394n/a /* "compile" pattern descriptor to pattern object */
1395n/a
1396n/a PatternObject* self;
1397n/a Py_ssize_t i, n;
1398n/a
1399n/a n = PyList_GET_SIZE(code);
1400n/a /* coverity[ampersand_in_size] */
1401n/a self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
1402n/a if (!self)
1403n/a return NULL;
1404n/a self->weakreflist = NULL;
1405n/a self->pattern = NULL;
1406n/a self->groupindex = NULL;
1407n/a self->indexgroup = NULL;
1408n/a
1409n/a self->codesize = n;
1410n/a
1411n/a for (i = 0; i < n; i++) {
1412n/a PyObject *o = PyList_GET_ITEM(code, i);
1413n/a unsigned long value = PyLong_AsUnsignedLong(o);
1414n/a self->code[i] = (SRE_CODE) value;
1415n/a if ((unsigned long) self->code[i] != value) {
1416n/a PyErr_SetString(PyExc_OverflowError,
1417n/a "regular expression code size limit exceeded");
1418n/a break;
1419n/a }
1420n/a }
1421n/a
1422n/a if (PyErr_Occurred()) {
1423n/a Py_DECREF(self);
1424n/a return NULL;
1425n/a }
1426n/a
1427n/a if (pattern == Py_None) {
1428n/a self->isbytes = -1;
1429n/a }
1430n/a else {
1431n/a Py_ssize_t p_length;
1432n/a int charsize;
1433n/a Py_buffer view;
1434n/a view.buf = NULL;
1435n/a if (!getstring(pattern, &p_length, &self->isbytes,
1436n/a &charsize, &view)) {
1437n/a Py_DECREF(self);
1438n/a return NULL;
1439n/a }
1440n/a if (view.buf)
1441n/a PyBuffer_Release(&view);
1442n/a }
1443n/a
1444n/a Py_INCREF(pattern);
1445n/a self->pattern = pattern;
1446n/a
1447n/a self->flags = flags;
1448n/a
1449n/a self->groups = groups;
1450n/a
1451n/a Py_INCREF(groupindex);
1452n/a self->groupindex = groupindex;
1453n/a
1454n/a Py_INCREF(indexgroup);
1455n/a self->indexgroup = indexgroup;
1456n/a
1457n/a if (!_validate(self)) {
1458n/a Py_DECREF(self);
1459n/a return NULL;
1460n/a }
1461n/a
1462n/a return (PyObject*) self;
1463n/a}
1464n/a
1465n/a/* -------------------------------------------------------------------- */
1466n/a/* Code validation */
1467n/a
1468n/a/* To learn more about this code, have a look at the _compile() function in
1469n/a Lib/sre_compile.py. The validation functions below checks the code array
1470n/a for conformance with the code patterns generated there.
1471n/a
1472n/a The nice thing about the generated code is that it is position-independent:
1473n/a all jumps are relative jumps forward. Also, jumps don't cross each other:
1474n/a the target of a later jump is always earlier than the target of an earlier
1475n/a jump. IOW, this is okay:
1476n/a
1477n/a J---------J-------T--------T
1478n/a \ \_____/ /
1479n/a \______________________/
1480n/a
1481n/a but this is not:
1482n/a
1483n/a J---------J-------T--------T
1484n/a \_________\_____/ /
1485n/a \____________/
1486n/a
1487n/a It also helps that SRE_CODE is always an unsigned type.
1488n/a*/
1489n/a
1490n/a/* Defining this one enables tracing of the validator */
1491n/a#undef VVERBOSE
1492n/a
1493n/a/* Trace macro for the validator */
1494n/a#if defined(VVERBOSE)
1495n/a#define VTRACE(v) printf v
1496n/a#else
1497n/a#define VTRACE(v) do {} while(0) /* do nothing */
1498n/a#endif
1499n/a
1500n/a/* Report failure */
1501n/a#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
1502n/a
1503n/a/* Extract opcode, argument, or skip count from code array */
1504n/a#define GET_OP \
1505n/a do { \
1506n/a VTRACE(("%p: ", code)); \
1507n/a if (code >= end) FAIL; \
1508n/a op = *code++; \
1509n/a VTRACE(("%lu (op)\n", (unsigned long)op)); \
1510n/a } while (0)
1511n/a#define GET_ARG \
1512n/a do { \
1513n/a VTRACE(("%p= ", code)); \
1514n/a if (code >= end) FAIL; \
1515n/a arg = *code++; \
1516n/a VTRACE(("%lu (arg)\n", (unsigned long)arg)); \
1517n/a } while (0)
1518n/a#define GET_SKIP_ADJ(adj) \
1519n/a do { \
1520n/a VTRACE(("%p= ", code)); \
1521n/a if (code >= end) FAIL; \
1522n/a skip = *code; \
1523n/a VTRACE(("%lu (skip to %p)\n", \
1524n/a (unsigned long)skip, code+skip)); \
1525n/a if (skip-adj > (uintptr_t)(end - code)) \
1526n/a FAIL; \
1527n/a code++; \
1528n/a } while (0)
1529n/a#define GET_SKIP GET_SKIP_ADJ(0)
1530n/a
1531n/astatic int
1532n/a_validate_charset(SRE_CODE *code, SRE_CODE *end)
1533n/a{
1534n/a /* Some variables are manipulated by the macros above */
1535n/a SRE_CODE op;
1536n/a SRE_CODE arg;
1537n/a SRE_CODE offset;
1538n/a int i;
1539n/a
1540n/a while (code < end) {
1541n/a GET_OP;
1542n/a switch (op) {
1543n/a
1544n/a case SRE_OP_NEGATE:
1545n/a break;
1546n/a
1547n/a case SRE_OP_LITERAL:
1548n/a GET_ARG;
1549n/a break;
1550n/a
1551n/a case SRE_OP_RANGE:
1552n/a case SRE_OP_RANGE_IGNORE:
1553n/a GET_ARG;
1554n/a GET_ARG;
1555n/a break;
1556n/a
1557n/a case SRE_OP_CHARSET:
1558n/a offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */
1559n/a if (offset > (uintptr_t)(end - code))
1560n/a FAIL;
1561n/a code += offset;
1562n/a break;
1563n/a
1564n/a case SRE_OP_BIGCHARSET:
1565n/a GET_ARG; /* Number of blocks */
1566n/a offset = 256/sizeof(SRE_CODE); /* 256-byte table */
1567n/a if (offset > (uintptr_t)(end - code))
1568n/a FAIL;
1569n/a /* Make sure that each byte points to a valid block */
1570n/a for (i = 0; i < 256; i++) {
1571n/a if (((unsigned char *)code)[i] >= arg)
1572n/a FAIL;
1573n/a }
1574n/a code += offset;
1575n/a offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */
1576n/a if (offset > (uintptr_t)(end - code))
1577n/a FAIL;
1578n/a code += offset;
1579n/a break;
1580n/a
1581n/a case SRE_OP_CATEGORY:
1582n/a GET_ARG;
1583n/a switch (arg) {
1584n/a case SRE_CATEGORY_DIGIT:
1585n/a case SRE_CATEGORY_NOT_DIGIT:
1586n/a case SRE_CATEGORY_SPACE:
1587n/a case SRE_CATEGORY_NOT_SPACE:
1588n/a case SRE_CATEGORY_WORD:
1589n/a case SRE_CATEGORY_NOT_WORD:
1590n/a case SRE_CATEGORY_LINEBREAK:
1591n/a case SRE_CATEGORY_NOT_LINEBREAK:
1592n/a case SRE_CATEGORY_LOC_WORD:
1593n/a case SRE_CATEGORY_LOC_NOT_WORD:
1594n/a case SRE_CATEGORY_UNI_DIGIT:
1595n/a case SRE_CATEGORY_UNI_NOT_DIGIT:
1596n/a case SRE_CATEGORY_UNI_SPACE:
1597n/a case SRE_CATEGORY_UNI_NOT_SPACE:
1598n/a case SRE_CATEGORY_UNI_WORD:
1599n/a case SRE_CATEGORY_UNI_NOT_WORD:
1600n/a case SRE_CATEGORY_UNI_LINEBREAK:
1601n/a case SRE_CATEGORY_UNI_NOT_LINEBREAK:
1602n/a break;
1603n/a default:
1604n/a FAIL;
1605n/a }
1606n/a break;
1607n/a
1608n/a default:
1609n/a FAIL;
1610n/a
1611n/a }
1612n/a }
1613n/a
1614n/a return 1;
1615n/a}
1616n/a
1617n/astatic int
1618n/a_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1619n/a{
1620n/a /* Some variables are manipulated by the macros above */
1621n/a SRE_CODE op;
1622n/a SRE_CODE arg;
1623n/a SRE_CODE skip;
1624n/a
1625n/a VTRACE(("code=%p, end=%p\n", code, end));
1626n/a
1627n/a if (code > end)
1628n/a FAIL;
1629n/a
1630n/a while (code < end) {
1631n/a GET_OP;
1632n/a switch (op) {
1633n/a
1634n/a case SRE_OP_MARK:
1635n/a /* We don't check whether marks are properly nested; the
1636n/a sre_match() code is robust even if they don't, and the worst
1637n/a you can get is nonsensical match results. */
1638n/a GET_ARG;
1639n/a if (arg > 2 * (size_t)groups + 1) {
1640n/a VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
1641n/a FAIL;
1642n/a }
1643n/a break;
1644n/a
1645n/a case SRE_OP_LITERAL:
1646n/a case SRE_OP_NOT_LITERAL:
1647n/a case SRE_OP_LITERAL_IGNORE:
1648n/a case SRE_OP_NOT_LITERAL_IGNORE:
1649n/a GET_ARG;
1650n/a /* The arg is just a character, nothing to check */
1651n/a break;
1652n/a
1653n/a case SRE_OP_SUCCESS:
1654n/a case SRE_OP_FAILURE:
1655n/a /* Nothing to check; these normally end the matching process */
1656n/a break;
1657n/a
1658n/a case SRE_OP_AT:
1659n/a GET_ARG;
1660n/a switch (arg) {
1661n/a case SRE_AT_BEGINNING:
1662n/a case SRE_AT_BEGINNING_STRING:
1663n/a case SRE_AT_BEGINNING_LINE:
1664n/a case SRE_AT_END:
1665n/a case SRE_AT_END_LINE:
1666n/a case SRE_AT_END_STRING:
1667n/a case SRE_AT_BOUNDARY:
1668n/a case SRE_AT_NON_BOUNDARY:
1669n/a case SRE_AT_LOC_BOUNDARY:
1670n/a case SRE_AT_LOC_NON_BOUNDARY:
1671n/a case SRE_AT_UNI_BOUNDARY:
1672n/a case SRE_AT_UNI_NON_BOUNDARY:
1673n/a break;
1674n/a default:
1675n/a FAIL;
1676n/a }
1677n/a break;
1678n/a
1679n/a case SRE_OP_ANY:
1680n/a case SRE_OP_ANY_ALL:
1681n/a /* These have no operands */
1682n/a break;
1683n/a
1684n/a case SRE_OP_IN:
1685n/a case SRE_OP_IN_IGNORE:
1686n/a GET_SKIP;
1687n/a /* Stop 1 before the end; we check the FAILURE below */
1688n/a if (!_validate_charset(code, code+skip-2))
1689n/a FAIL;
1690n/a if (code[skip-2] != SRE_OP_FAILURE)
1691n/a FAIL;
1692n/a code += skip-1;
1693n/a break;
1694n/a
1695n/a case SRE_OP_INFO:
1696n/a {
1697n/a /* A minimal info field is
1698n/a <INFO> <1=skip> <2=flags> <3=min> <4=max>;
1699n/a If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
1700n/a more follows. */
1701n/a SRE_CODE flags, i;
1702n/a SRE_CODE *newcode;
1703n/a GET_SKIP;
1704n/a newcode = code+skip-1;
1705n/a GET_ARG; flags = arg;
1706n/a GET_ARG;
1707n/a GET_ARG;
1708n/a /* Check that only valid flags are present */
1709n/a if ((flags & ~(SRE_INFO_PREFIX |
1710n/a SRE_INFO_LITERAL |
1711n/a SRE_INFO_CHARSET)) != 0)
1712n/a FAIL;
1713n/a /* PREFIX and CHARSET are mutually exclusive */
1714n/a if ((flags & SRE_INFO_PREFIX) &&
1715n/a (flags & SRE_INFO_CHARSET))
1716n/a FAIL;
1717n/a /* LITERAL implies PREFIX */
1718n/a if ((flags & SRE_INFO_LITERAL) &&
1719n/a !(flags & SRE_INFO_PREFIX))
1720n/a FAIL;
1721n/a /* Validate the prefix */
1722n/a if (flags & SRE_INFO_PREFIX) {
1723n/a SRE_CODE prefix_len;
1724n/a GET_ARG; prefix_len = arg;
1725n/a GET_ARG;
1726n/a /* Here comes the prefix string */
1727n/a if (prefix_len > (uintptr_t)(newcode - code))
1728n/a FAIL;
1729n/a code += prefix_len;
1730n/a /* And here comes the overlap table */
1731n/a if (prefix_len > (uintptr_t)(newcode - code))
1732n/a FAIL;
1733n/a /* Each overlap value should be < prefix_len */
1734n/a for (i = 0; i < prefix_len; i++) {
1735n/a if (code[i] >= prefix_len)
1736n/a FAIL;
1737n/a }
1738n/a code += prefix_len;
1739n/a }
1740n/a /* Validate the charset */
1741n/a if (flags & SRE_INFO_CHARSET) {
1742n/a if (!_validate_charset(code, newcode-1))
1743n/a FAIL;
1744n/a if (newcode[-1] != SRE_OP_FAILURE)
1745n/a FAIL;
1746n/a code = newcode;
1747n/a }
1748n/a else if (code != newcode) {
1749n/a VTRACE(("code=%p, newcode=%p\n", code, newcode));
1750n/a FAIL;
1751n/a }
1752n/a }
1753n/a break;
1754n/a
1755n/a case SRE_OP_BRANCH:
1756n/a {
1757n/a SRE_CODE *target = NULL;
1758n/a for (;;) {
1759n/a GET_SKIP;
1760n/a if (skip == 0)
1761n/a break;
1762n/a /* Stop 2 before the end; we check the JUMP below */
1763n/a if (!_validate_inner(code, code+skip-3, groups))
1764n/a FAIL;
1765n/a code += skip-3;
1766n/a /* Check that it ends with a JUMP, and that each JUMP
1767n/a has the same target */
1768n/a GET_OP;
1769n/a if (op != SRE_OP_JUMP)
1770n/a FAIL;
1771n/a GET_SKIP;
1772n/a if (target == NULL)
1773n/a target = code+skip-1;
1774n/a else if (code+skip-1 != target)
1775n/a FAIL;
1776n/a }
1777n/a }
1778n/a break;
1779n/a
1780n/a case SRE_OP_REPEAT_ONE:
1781n/a case SRE_OP_MIN_REPEAT_ONE:
1782n/a {
1783n/a SRE_CODE min, max;
1784n/a GET_SKIP;
1785n/a GET_ARG; min = arg;
1786n/a GET_ARG; max = arg;
1787n/a if (min > max)
1788n/a FAIL;
1789n/a if (max > SRE_MAXREPEAT)
1790n/a FAIL;
1791n/a if (!_validate_inner(code, code+skip-4, groups))
1792n/a FAIL;
1793n/a code += skip-4;
1794n/a GET_OP;
1795n/a if (op != SRE_OP_SUCCESS)
1796n/a FAIL;
1797n/a }
1798n/a break;
1799n/a
1800n/a case SRE_OP_REPEAT:
1801n/a {
1802n/a SRE_CODE min, max;
1803n/a GET_SKIP;
1804n/a GET_ARG; min = arg;
1805n/a GET_ARG; max = arg;
1806n/a if (min > max)
1807n/a FAIL;
1808n/a if (max > SRE_MAXREPEAT)
1809n/a FAIL;
1810n/a if (!_validate_inner(code, code+skip-3, groups))
1811n/a FAIL;
1812n/a code += skip-3;
1813n/a GET_OP;
1814n/a if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
1815n/a FAIL;
1816n/a }
1817n/a break;
1818n/a
1819n/a case SRE_OP_GROUPREF:
1820n/a case SRE_OP_GROUPREF_IGNORE:
1821n/a GET_ARG;
1822n/a if (arg >= (size_t)groups)
1823n/a FAIL;
1824n/a break;
1825n/a
1826n/a case SRE_OP_GROUPREF_EXISTS:
1827n/a /* The regex syntax for this is: '(?(group)then|else)', where
1828n/a 'group' is either an integer group number or a group name,
1829n/a 'then' and 'else' are sub-regexes, and 'else' is optional. */
1830n/a GET_ARG;
1831n/a if (arg >= (size_t)groups)
1832n/a FAIL;
1833n/a GET_SKIP_ADJ(1);
1834n/a code--; /* The skip is relative to the first arg! */
1835n/a /* There are two possibilities here: if there is both a 'then'
1836n/a part and an 'else' part, the generated code looks like:
1837n/a
1838n/a GROUPREF_EXISTS
1839n/a <group>
1840n/a <skipyes>
1841n/a ...then part...
1842n/a JUMP
1843n/a <skipno>
1844n/a (<skipyes> jumps here)
1845n/a ...else part...
1846n/a (<skipno> jumps here)
1847n/a
1848n/a If there is only a 'then' part, it looks like:
1849n/a
1850n/a GROUPREF_EXISTS
1851n/a <group>
1852n/a <skip>
1853n/a ...then part...
1854n/a (<skip> jumps here)
1855n/a
1856n/a There is no direct way to decide which it is, and we don't want
1857n/a to allow arbitrary jumps anywhere in the code; so we just look
1858n/a for a JUMP opcode preceding our skip target.
1859n/a */
1860n/a if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
1861n/a code[skip-3] == SRE_OP_JUMP)
1862n/a {
1863n/a VTRACE(("both then and else parts present\n"));
1864n/a if (!_validate_inner(code+1, code+skip-3, groups))
1865n/a FAIL;
1866n/a code += skip-2; /* Position after JUMP, at <skipno> */
1867n/a GET_SKIP;
1868n/a if (!_validate_inner(code, code+skip-1, groups))
1869n/a FAIL;
1870n/a code += skip-1;
1871n/a }
1872n/a else {
1873n/a VTRACE(("only a then part present\n"));
1874n/a if (!_validate_inner(code+1, code+skip-1, groups))
1875n/a FAIL;
1876n/a code += skip-1;
1877n/a }
1878n/a break;
1879n/a
1880n/a case SRE_OP_ASSERT:
1881n/a case SRE_OP_ASSERT_NOT:
1882n/a GET_SKIP;
1883n/a GET_ARG; /* 0 for lookahead, width for lookbehind */
1884n/a code--; /* Back up over arg to simplify math below */
1885n/a if (arg & 0x80000000)
1886n/a FAIL; /* Width too large */
1887n/a /* Stop 1 before the end; we check the SUCCESS below */
1888n/a if (!_validate_inner(code+1, code+skip-2, groups))
1889n/a FAIL;
1890n/a code += skip-2;
1891n/a GET_OP;
1892n/a if (op != SRE_OP_SUCCESS)
1893n/a FAIL;
1894n/a break;
1895n/a
1896n/a default:
1897n/a FAIL;
1898n/a
1899n/a }
1900n/a }
1901n/a
1902n/a VTRACE(("okay\n"));
1903n/a return 1;
1904n/a}
1905n/a
1906n/astatic int
1907n/a_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
1908n/a{
1909n/a if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
1910n/a code >= end || end[-1] != SRE_OP_SUCCESS)
1911n/a FAIL;
1912n/a return _validate_inner(code, end-1, groups);
1913n/a}
1914n/a
1915n/astatic int
1916n/a_validate(PatternObject *self)
1917n/a{
1918n/a if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
1919n/a {
1920n/a PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
1921n/a return 0;
1922n/a }
1923n/a else
1924n/a VTRACE(("Success!\n"));
1925n/a return 1;
1926n/a}
1927n/a
1928n/a/* -------------------------------------------------------------------- */
1929n/a/* match methods */
1930n/a
1931n/astatic void
1932n/amatch_dealloc(MatchObject* self)
1933n/a{
1934n/a Py_XDECREF(self->regs);
1935n/a Py_XDECREF(self->string);
1936n/a Py_DECREF(self->pattern);
1937n/a PyObject_DEL(self);
1938n/a}
1939n/a
1940n/astatic PyObject*
1941n/amatch_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
1942n/a{
1943n/a Py_ssize_t length;
1944n/a int isbytes, charsize;
1945n/a Py_buffer view;
1946n/a PyObject *result;
1947n/a void* ptr;
1948n/a Py_ssize_t i, j;
1949n/a
1950n/a if (index < 0 || index >= self->groups) {
1951n/a /* raise IndexError if we were given a bad group number */
1952n/a PyErr_SetString(
1953n/a PyExc_IndexError,
1954n/a "no such group"
1955n/a );
1956n/a return NULL;
1957n/a }
1958n/a
1959n/a index *= 2;
1960n/a
1961n/a if (self->string == Py_None || self->mark[index] < 0) {
1962n/a /* return default value if the string or group is undefined */
1963n/a Py_INCREF(def);
1964n/a return def;
1965n/a }
1966n/a
1967n/a ptr = getstring(self->string, &length, &isbytes, &charsize, &view);
1968n/a if (ptr == NULL)
1969n/a return NULL;
1970n/a
1971n/a i = self->mark[index];
1972n/a j = self->mark[index+1];
1973n/a i = Py_MIN(i, length);
1974n/a j = Py_MIN(j, length);
1975n/a result = getslice(isbytes, ptr, self->string, i, j);
1976n/a if (isbytes && view.buf != NULL)
1977n/a PyBuffer_Release(&view);
1978n/a return result;
1979n/a}
1980n/a
1981n/astatic Py_ssize_t
1982n/amatch_getindex(MatchObject* self, PyObject* index)
1983n/a{
1984n/a Py_ssize_t i;
1985n/a
1986n/a if (index == NULL)
1987n/a /* Default value */
1988n/a return 0;
1989n/a
1990n/a if (PyIndex_Check(index)) {
1991n/a return PyNumber_AsSsize_t(index, NULL);
1992n/a }
1993n/a
1994n/a i = -1;
1995n/a
1996n/a if (self->pattern->groupindex) {
1997n/a index = PyObject_GetItem(self->pattern->groupindex, index);
1998n/a if (index) {
1999n/a if (PyLong_Check(index))
2000n/a i = PyLong_AsSsize_t(index);
2001n/a Py_DECREF(index);
2002n/a } else
2003n/a PyErr_Clear();
2004n/a }
2005n/a
2006n/a return i;
2007n/a}
2008n/a
2009n/astatic PyObject*
2010n/amatch_getslice(MatchObject* self, PyObject* index, PyObject* def)
2011n/a{
2012n/a return match_getslice_by_index(self, match_getindex(self, index), def);
2013n/a}
2014n/a
2015n/a/*[clinic input]
2016n/a_sre.SRE_Match.expand
2017n/a
2018n/a template: object
2019n/a
2020n/aReturn the string obtained by doing backslash substitution on the string template, as done by the sub() method.
2021n/a[clinic start generated code]*/
2022n/a
2023n/astatic PyObject *
2024n/a_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
2025n/a/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
2026n/a{
2027n/a /* delegate to Python code */
2028n/a return call(
2029n/a SRE_PY_MODULE, "_expand",
2030n/a PyTuple_Pack(3, self->pattern, self, template)
2031n/a );
2032n/a}
2033n/a
2034n/astatic PyObject*
2035n/amatch_group(MatchObject* self, PyObject* args)
2036n/a{
2037n/a PyObject* result;
2038n/a Py_ssize_t i, size;
2039n/a
2040n/a size = PyTuple_GET_SIZE(args);
2041n/a
2042n/a switch (size) {
2043n/a case 0:
2044n/a result = match_getslice(self, Py_False, Py_None);
2045n/a break;
2046n/a case 1:
2047n/a result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
2048n/a break;
2049n/a default:
2050n/a /* fetch multiple items */
2051n/a result = PyTuple_New(size);
2052n/a if (!result)
2053n/a return NULL;
2054n/a for (i = 0; i < size; i++) {
2055n/a PyObject* item = match_getslice(
2056n/a self, PyTuple_GET_ITEM(args, i), Py_None
2057n/a );
2058n/a if (!item) {
2059n/a Py_DECREF(result);
2060n/a return NULL;
2061n/a }
2062n/a PyTuple_SET_ITEM(result, i, item);
2063n/a }
2064n/a break;
2065n/a }
2066n/a return result;
2067n/a}
2068n/a
2069n/astatic PyObject*
2070n/amatch_getitem(MatchObject* self, PyObject* name)
2071n/a{
2072n/a return match_getslice(self, name, Py_None);
2073n/a}
2074n/a
2075n/a/*[clinic input]
2076n/a_sre.SRE_Match.groups
2077n/a
2078n/a default: object = None
2079n/a Is used for groups that did not participate in the match.
2080n/a
2081n/aReturn a tuple containing all the subgroups of the match, from 1.
2082n/a[clinic start generated code]*/
2083n/a
2084n/astatic PyObject *
2085n/a_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
2086n/a/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
2087n/a{
2088n/a PyObject* result;
2089n/a Py_ssize_t index;
2090n/a
2091n/a result = PyTuple_New(self->groups-1);
2092n/a if (!result)
2093n/a return NULL;
2094n/a
2095n/a for (index = 1; index < self->groups; index++) {
2096n/a PyObject* item;
2097n/a item = match_getslice_by_index(self, index, default_value);
2098n/a if (!item) {
2099n/a Py_DECREF(result);
2100n/a return NULL;
2101n/a }
2102n/a PyTuple_SET_ITEM(result, index-1, item);
2103n/a }
2104n/a
2105n/a return result;
2106n/a}
2107n/a
2108n/a/*[clinic input]
2109n/a_sre.SRE_Match.groupdict
2110n/a
2111n/a default: object = None
2112n/a Is used for groups that did not participate in the match.
2113n/a
2114n/aReturn a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
2115n/a[clinic start generated code]*/
2116n/a
2117n/astatic PyObject *
2118n/a_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
2119n/a/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
2120n/a{
2121n/a PyObject* result;
2122n/a PyObject* keys;
2123n/a Py_ssize_t index;
2124n/a
2125n/a result = PyDict_New();
2126n/a if (!result || !self->pattern->groupindex)
2127n/a return result;
2128n/a
2129n/a keys = PyMapping_Keys(self->pattern->groupindex);
2130n/a if (!keys)
2131n/a goto failed;
2132n/a
2133n/a for (index = 0; index < PyList_GET_SIZE(keys); index++) {
2134n/a int status;
2135n/a PyObject* key;
2136n/a PyObject* value;
2137n/a key = PyList_GET_ITEM(keys, index);
2138n/a if (!key)
2139n/a goto failed;
2140n/a value = match_getslice(self, key, default_value);
2141n/a if (!value)
2142n/a goto failed;
2143n/a status = PyDict_SetItem(result, key, value);
2144n/a Py_DECREF(value);
2145n/a if (status < 0)
2146n/a goto failed;
2147n/a }
2148n/a
2149n/a Py_DECREF(keys);
2150n/a
2151n/a return result;
2152n/a
2153n/afailed:
2154n/a Py_XDECREF(keys);
2155n/a Py_DECREF(result);
2156n/a return NULL;
2157n/a}
2158n/a
2159n/a/*[clinic input]
2160n/a_sre.SRE_Match.start -> Py_ssize_t
2161n/a
2162n/a group: object(c_default="NULL") = 0
2163n/a /
2164n/a
2165n/aReturn index of the start of the substring matched by group.
2166n/a[clinic start generated code]*/
2167n/a
2168n/astatic Py_ssize_t
2169n/a_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
2170n/a/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
2171n/a{
2172n/a Py_ssize_t index = match_getindex(self, group);
2173n/a
2174n/a if (index < 0 || index >= self->groups) {
2175n/a PyErr_SetString(
2176n/a PyExc_IndexError,
2177n/a "no such group"
2178n/a );
2179n/a return -1;
2180n/a }
2181n/a
2182n/a /* mark is -1 if group is undefined */
2183n/a return self->mark[index*2];
2184n/a}
2185n/a
2186n/a/*[clinic input]
2187n/a_sre.SRE_Match.end -> Py_ssize_t
2188n/a
2189n/a group: object(c_default="NULL") = 0
2190n/a /
2191n/a
2192n/aReturn index of the end of the substring matched by group.
2193n/a[clinic start generated code]*/
2194n/a
2195n/astatic Py_ssize_t
2196n/a_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
2197n/a/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
2198n/a{
2199n/a Py_ssize_t index = match_getindex(self, group);
2200n/a
2201n/a if (index < 0 || index >= self->groups) {
2202n/a PyErr_SetString(
2203n/a PyExc_IndexError,
2204n/a "no such group"
2205n/a );
2206n/a return -1;
2207n/a }
2208n/a
2209n/a /* mark is -1 if group is undefined */
2210n/a return self->mark[index*2+1];
2211n/a}
2212n/a
2213n/aLOCAL(PyObject*)
2214n/a_pair(Py_ssize_t i1, Py_ssize_t i2)
2215n/a{
2216n/a PyObject* pair;
2217n/a PyObject* item;
2218n/a
2219n/a pair = PyTuple_New(2);
2220n/a if (!pair)
2221n/a return NULL;
2222n/a
2223n/a item = PyLong_FromSsize_t(i1);
2224n/a if (!item)
2225n/a goto error;
2226n/a PyTuple_SET_ITEM(pair, 0, item);
2227n/a
2228n/a item = PyLong_FromSsize_t(i2);
2229n/a if (!item)
2230n/a goto error;
2231n/a PyTuple_SET_ITEM(pair, 1, item);
2232n/a
2233n/a return pair;
2234n/a
2235n/a error:
2236n/a Py_DECREF(pair);
2237n/a return NULL;
2238n/a}
2239n/a
2240n/a/*[clinic input]
2241n/a_sre.SRE_Match.span
2242n/a
2243n/a group: object(c_default="NULL") = 0
2244n/a /
2245n/a
2246n/aFor MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
2247n/a[clinic start generated code]*/
2248n/a
2249n/astatic PyObject *
2250n/a_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
2251n/a/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
2252n/a{
2253n/a Py_ssize_t index = match_getindex(self, group);
2254n/a
2255n/a if (index < 0 || index >= self->groups) {
2256n/a PyErr_SetString(
2257n/a PyExc_IndexError,
2258n/a "no such group"
2259n/a );
2260n/a return NULL;
2261n/a }
2262n/a
2263n/a /* marks are -1 if group is undefined */
2264n/a return _pair(self->mark[index*2], self->mark[index*2+1]);
2265n/a}
2266n/a
2267n/astatic PyObject*
2268n/amatch_regs(MatchObject* self)
2269n/a{
2270n/a PyObject* regs;
2271n/a PyObject* item;
2272n/a Py_ssize_t index;
2273n/a
2274n/a regs = PyTuple_New(self->groups);
2275n/a if (!regs)
2276n/a return NULL;
2277n/a
2278n/a for (index = 0; index < self->groups; index++) {
2279n/a item = _pair(self->mark[index*2], self->mark[index*2+1]);
2280n/a if (!item) {
2281n/a Py_DECREF(regs);
2282n/a return NULL;
2283n/a }
2284n/a PyTuple_SET_ITEM(regs, index, item);
2285n/a }
2286n/a
2287n/a Py_INCREF(regs);
2288n/a self->regs = regs;
2289n/a
2290n/a return regs;
2291n/a}
2292n/a
2293n/a/*[clinic input]
2294n/a_sre.SRE_Match.__copy__
2295n/a
2296n/a[clinic start generated code]*/
2297n/a
2298n/astatic PyObject *
2299n/a_sre_SRE_Match___copy___impl(MatchObject *self)
2300n/a/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
2301n/a{
2302n/a#ifdef USE_BUILTIN_COPY
2303n/a MatchObject* copy;
2304n/a Py_ssize_t slots, offset;
2305n/a
2306n/a slots = 2 * (self->pattern->groups+1);
2307n/a
2308n/a copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
2309n/a if (!copy)
2310n/a return NULL;
2311n/a
2312n/a /* this value a constant, but any compiler should be able to
2313n/a figure that out all by itself */
2314n/a offset = offsetof(MatchObject, string);
2315n/a
2316n/a Py_XINCREF(self->pattern);
2317n/a Py_XINCREF(self->string);
2318n/a Py_XINCREF(self->regs);
2319n/a
2320n/a memcpy((char*) copy + offset, (char*) self + offset,
2321n/a sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
2322n/a
2323n/a return (PyObject*) copy;
2324n/a#else
2325n/a PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
2326n/a return NULL;
2327n/a#endif
2328n/a}
2329n/a
2330n/a/*[clinic input]
2331n/a_sre.SRE_Match.__deepcopy__
2332n/a
2333n/a memo: object
2334n/a
2335n/a[clinic start generated code]*/
2336n/a
2337n/astatic PyObject *
2338n/a_sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
2339n/a/*[clinic end generated code: output=2b657578eb03f4a3 input=b65b72489eac64cc]*/
2340n/a{
2341n/a#ifdef USE_BUILTIN_COPY
2342n/a MatchObject* copy;
2343n/a
2344n/a copy = (MatchObject*) match_copy(self);
2345n/a if (!copy)
2346n/a return NULL;
2347n/a
2348n/a if (!deepcopy((PyObject**) &copy->pattern, memo) ||
2349n/a !deepcopy(&copy->string, memo) ||
2350n/a !deepcopy(&copy->regs, memo)) {
2351n/a Py_DECREF(copy);
2352n/a return NULL;
2353n/a }
2354n/a
2355n/a#else
2356n/a PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
2357n/a return NULL;
2358n/a#endif
2359n/a}
2360n/a
2361n/aPyDoc_STRVAR(match_doc,
2362n/a"The result of re.match() and re.search().\n\
2363n/aMatch objects always have a boolean value of True.");
2364n/a
2365n/aPyDoc_STRVAR(match_group_doc,
2366n/a"group([group1, ...]) -> str or tuple.\n\
2367n/a Return subgroup(s) of the match by indices or names.\n\
2368n/a For 0 returns the entire match.");
2369n/a
2370n/astatic PyObject *
2371n/amatch_lastindex_get(MatchObject *self)
2372n/a{
2373n/a if (self->lastindex >= 0)
2374n/a return PyLong_FromSsize_t(self->lastindex);
2375n/a Py_RETURN_NONE;
2376n/a}
2377n/a
2378n/astatic PyObject *
2379n/amatch_lastgroup_get(MatchObject *self)
2380n/a{
2381n/a if (self->pattern->indexgroup && self->lastindex >= 0) {
2382n/a PyObject* result = PySequence_GetItem(
2383n/a self->pattern->indexgroup, self->lastindex
2384n/a );
2385n/a if (result)
2386n/a return result;
2387n/a PyErr_Clear();
2388n/a }
2389n/a Py_RETURN_NONE;
2390n/a}
2391n/a
2392n/astatic PyObject *
2393n/amatch_regs_get(MatchObject *self)
2394n/a{
2395n/a if (self->regs) {
2396n/a Py_INCREF(self->regs);
2397n/a return self->regs;
2398n/a } else
2399n/a return match_regs(self);
2400n/a}
2401n/a
2402n/astatic PyObject *
2403n/amatch_repr(MatchObject *self)
2404n/a{
2405n/a PyObject *result;
2406n/a PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
2407n/a if (group0 == NULL)
2408n/a return NULL;
2409n/a result = PyUnicode_FromFormat(
2410n/a "<%s object; span=(%d, %d), match=%.50R>",
2411n/a Py_TYPE(self)->tp_name,
2412n/a self->mark[0], self->mark[1], group0);
2413n/a Py_DECREF(group0);
2414n/a return result;
2415n/a}
2416n/a
2417n/a
2418n/astatic PyObject*
2419n/apattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
2420n/a{
2421n/a /* create match object (from state object) */
2422n/a
2423n/a MatchObject* match;
2424n/a Py_ssize_t i, j;
2425n/a char* base;
2426n/a int n;
2427n/a
2428n/a if (status > 0) {
2429n/a
2430n/a /* create match object (with room for extra group marks) */
2431n/a /* coverity[ampersand_in_size] */
2432n/a match = PyObject_NEW_VAR(MatchObject, &Match_Type,
2433n/a 2*(pattern->groups+1));
2434n/a if (!match)
2435n/a return NULL;
2436n/a
2437n/a Py_INCREF(pattern);
2438n/a match->pattern = pattern;
2439n/a
2440n/a Py_INCREF(state->string);
2441n/a match->string = state->string;
2442n/a
2443n/a match->regs = NULL;
2444n/a match->groups = pattern->groups+1;
2445n/a
2446n/a /* fill in group slices */
2447n/a
2448n/a base = (char*) state->beginning;
2449n/a n = state->charsize;
2450n/a
2451n/a match->mark[0] = ((char*) state->start - base) / n;
2452n/a match->mark[1] = ((char*) state->ptr - base) / n;
2453n/a
2454n/a for (i = j = 0; i < pattern->groups; i++, j+=2)
2455n/a if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
2456n/a match->mark[j+2] = ((char*) state->mark[j] - base) / n;
2457n/a match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
2458n/a } else
2459n/a match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
2460n/a
2461n/a match->pos = state->pos;
2462n/a match->endpos = state->endpos;
2463n/a
2464n/a match->lastindex = state->lastindex;
2465n/a
2466n/a return (PyObject*) match;
2467n/a
2468n/a } else if (status == 0) {
2469n/a
2470n/a /* no match */
2471n/a Py_RETURN_NONE;
2472n/a
2473n/a }
2474n/a
2475n/a /* internal error */
2476n/a pattern_error(status);
2477n/a return NULL;
2478n/a}
2479n/a
2480n/a
2481n/a/* -------------------------------------------------------------------- */
2482n/a/* scanner methods (experimental) */
2483n/a
2484n/astatic void
2485n/ascanner_dealloc(ScannerObject* self)
2486n/a{
2487n/a state_fini(&self->state);
2488n/a Py_XDECREF(self->pattern);
2489n/a PyObject_DEL(self);
2490n/a}
2491n/a
2492n/a/*[clinic input]
2493n/a_sre.SRE_Scanner.match
2494n/a
2495n/a[clinic start generated code]*/
2496n/a
2497n/astatic PyObject *
2498n/a_sre_SRE_Scanner_match_impl(ScannerObject *self)
2499n/a/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
2500n/a{
2501n/a SRE_STATE* state = &self->state;
2502n/a PyObject* match;
2503n/a Py_ssize_t status;
2504n/a
2505n/a if (state->start == NULL)
2506n/a Py_RETURN_NONE;
2507n/a
2508n/a state_reset(state);
2509n/a
2510n/a state->ptr = state->start;
2511n/a
2512n/a status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
2513n/a if (PyErr_Occurred())
2514n/a return NULL;
2515n/a
2516n/a match = pattern_new_match((PatternObject*) self->pattern,
2517n/a state, status);
2518n/a
2519n/a if (status == 0)
2520n/a state->start = NULL;
2521n/a else if (state->ptr != state->start)
2522n/a state->start = state->ptr;
2523n/a else if (state->ptr != state->end)
2524n/a state->start = (void*) ((char*) state->ptr + state->charsize);
2525n/a else
2526n/a state->start = NULL;
2527n/a
2528n/a return match;
2529n/a}
2530n/a
2531n/a
2532n/a/*[clinic input]
2533n/a_sre.SRE_Scanner.search
2534n/a
2535n/a[clinic start generated code]*/
2536n/a
2537n/astatic PyObject *
2538n/a_sre_SRE_Scanner_search_impl(ScannerObject *self)
2539n/a/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
2540n/a{
2541n/a SRE_STATE* state = &self->state;
2542n/a PyObject* match;
2543n/a Py_ssize_t status;
2544n/a
2545n/a if (state->start == NULL)
2546n/a Py_RETURN_NONE;
2547n/a
2548n/a state_reset(state);
2549n/a
2550n/a state->ptr = state->start;
2551n/a
2552n/a status = sre_search(state, PatternObject_GetCode(self->pattern));
2553n/a if (PyErr_Occurred())
2554n/a return NULL;
2555n/a
2556n/a match = pattern_new_match((PatternObject*) self->pattern,
2557n/a state, status);
2558n/a
2559n/a if (status == 0)
2560n/a state->start = NULL;
2561n/a else if (state->ptr != state->start)
2562n/a state->start = state->ptr;
2563n/a else if (state->ptr != state->end)
2564n/a state->start = (void*) ((char*) state->ptr + state->charsize);
2565n/a else
2566n/a state->start = NULL;
2567n/a
2568n/a return match;
2569n/a}
2570n/a
2571n/astatic PyObject *
2572n/apattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
2573n/a{
2574n/a ScannerObject* scanner;
2575n/a
2576n/a /* create scanner object */
2577n/a scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
2578n/a if (!scanner)
2579n/a return NULL;
2580n/a scanner->pattern = NULL;
2581n/a
2582n/a /* create search state object */
2583n/a if (!state_init(&scanner->state, self, string, pos, endpos)) {
2584n/a Py_DECREF(scanner);
2585n/a return NULL;
2586n/a }
2587n/a
2588n/a Py_INCREF(self);
2589n/a scanner->pattern = (PyObject*) self;
2590n/a
2591n/a return (PyObject*) scanner;
2592n/a}
2593n/a
2594n/astatic Py_hash_t
2595n/apattern_hash(PatternObject *self)
2596n/a{
2597n/a Py_hash_t hash, hash2;
2598n/a
2599n/a hash = PyObject_Hash(self->pattern);
2600n/a if (hash == -1) {
2601n/a return -1;
2602n/a }
2603n/a
2604n/a hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2605n/a hash ^= hash2;
2606n/a
2607n/a hash ^= self->flags;
2608n/a hash ^= self->isbytes;
2609n/a hash ^= self->codesize;
2610n/a
2611n/a if (hash == -1) {
2612n/a hash = -2;
2613n/a }
2614n/a return hash;
2615n/a}
2616n/a
2617n/astatic PyObject*
2618n/apattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2619n/a{
2620n/a PatternObject *left, *right;
2621n/a int cmp;
2622n/a
2623n/a if (op != Py_EQ && op != Py_NE) {
2624n/a Py_RETURN_NOTIMPLEMENTED;
2625n/a }
2626n/a
2627n/a if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2628n/a Py_RETURN_NOTIMPLEMENTED;
2629n/a }
2630n/a
2631n/a if (lefto == righto) {
2632n/a /* a pattern is equal to itself */
2633n/a return PyBool_FromLong(op == Py_EQ);
2634n/a }
2635n/a
2636n/a left = (PatternObject *)lefto;
2637n/a right = (PatternObject *)righto;
2638n/a
2639n/a cmp = (left->flags == right->flags
2640n/a && left->isbytes == right->isbytes
2641n/a && left->codesize == right->codesize);
2642n/a if (cmp) {
2643n/a /* Compare the code and the pattern because the same pattern can
2644n/a produce different codes depending on the locale used to compile the
2645n/a pattern when the re.LOCALE flag is used. Don't compare groups,
2646n/a indexgroup nor groupindex: they are derivated from the pattern. */
2647n/a cmp = (memcmp(left->code, right->code,
2648n/a sizeof(left->code[0]) * left->codesize) == 0);
2649n/a }
2650n/a if (cmp) {
2651n/a cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2652n/a Py_EQ);
2653n/a if (cmp < 0) {
2654n/a return NULL;
2655n/a }
2656n/a }
2657n/a if (op == Py_NE) {
2658n/a cmp = !cmp;
2659n/a }
2660n/a return PyBool_FromLong(cmp);
2661n/a}
2662n/a
2663n/a#include "clinic/_sre.c.h"
2664n/a
2665n/astatic PyMethodDef pattern_methods[] = {
2666n/a _SRE_SRE_PATTERN_MATCH_METHODDEF
2667n/a _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
2668n/a _SRE_SRE_PATTERN_SEARCH_METHODDEF
2669n/a _SRE_SRE_PATTERN_SUB_METHODDEF
2670n/a _SRE_SRE_PATTERN_SUBN_METHODDEF
2671n/a _SRE_SRE_PATTERN_FINDALL_METHODDEF
2672n/a _SRE_SRE_PATTERN_SPLIT_METHODDEF
2673n/a _SRE_SRE_PATTERN_FINDITER_METHODDEF
2674n/a _SRE_SRE_PATTERN_SCANNER_METHODDEF
2675n/a _SRE_SRE_PATTERN___COPY___METHODDEF
2676n/a _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
2677n/a {NULL, NULL}
2678n/a};
2679n/a
2680n/astatic PyGetSetDef pattern_getset[] = {
2681n/a {"groupindex", (getter)pattern_groupindex, (setter)NULL,
2682n/a "A dictionary mapping group names to group numbers."},
2683n/a {NULL} /* Sentinel */
2684n/a};
2685n/a
2686n/a#define PAT_OFF(x) offsetof(PatternObject, x)
2687n/astatic PyMemberDef pattern_members[] = {
2688n/a {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
2689n/a {"flags", T_INT, PAT_OFF(flags), READONLY},
2690n/a {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
2691n/a {NULL} /* Sentinel */
2692n/a};
2693n/a
2694n/astatic PyTypeObject Pattern_Type = {
2695n/a PyVarObject_HEAD_INIT(NULL, 0)
2696n/a "_" SRE_MODULE ".SRE_Pattern",
2697n/a sizeof(PatternObject), sizeof(SRE_CODE),
2698n/a (destructor)pattern_dealloc, /* tp_dealloc */
2699n/a 0, /* tp_print */
2700n/a 0, /* tp_getattr */
2701n/a 0, /* tp_setattr */
2702n/a 0, /* tp_reserved */
2703n/a (reprfunc)pattern_repr, /* tp_repr */
2704n/a 0, /* tp_as_number */
2705n/a 0, /* tp_as_sequence */
2706n/a 0, /* tp_as_mapping */
2707n/a (hashfunc)pattern_hash, /* tp_hash */
2708n/a 0, /* tp_call */
2709n/a 0, /* tp_str */
2710n/a 0, /* tp_getattro */
2711n/a 0, /* tp_setattro */
2712n/a 0, /* tp_as_buffer */
2713n/a Py_TPFLAGS_DEFAULT, /* tp_flags */
2714n/a pattern_doc, /* tp_doc */
2715n/a 0, /* tp_traverse */
2716n/a 0, /* tp_clear */
2717n/a pattern_richcompare, /* tp_richcompare */
2718n/a offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2719n/a 0, /* tp_iter */
2720n/a 0, /* tp_iternext */
2721n/a pattern_methods, /* tp_methods */
2722n/a pattern_members, /* tp_members */
2723n/a pattern_getset, /* tp_getset */
2724n/a};
2725n/a
2726n/a/* Match objects do not support length or assignment, but do support
2727n/a __getitem__. */
2728n/astatic PyMappingMethods match_as_mapping = {
2729n/a NULL,
2730n/a (binaryfunc)match_getitem,
2731n/a NULL
2732n/a};
2733n/a
2734n/astatic PyMethodDef match_methods[] = {
2735n/a {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
2736n/a _SRE_SRE_MATCH_START_METHODDEF
2737n/a _SRE_SRE_MATCH_END_METHODDEF
2738n/a _SRE_SRE_MATCH_SPAN_METHODDEF
2739n/a _SRE_SRE_MATCH_GROUPS_METHODDEF
2740n/a _SRE_SRE_MATCH_GROUPDICT_METHODDEF
2741n/a _SRE_SRE_MATCH_EXPAND_METHODDEF
2742n/a _SRE_SRE_MATCH___COPY___METHODDEF
2743n/a _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
2744n/a {NULL, NULL}
2745n/a};
2746n/a
2747n/astatic PyGetSetDef match_getset[] = {
2748n/a {"lastindex", (getter)match_lastindex_get, (setter)NULL},
2749n/a {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
2750n/a {"regs", (getter)match_regs_get, (setter)NULL},
2751n/a {NULL}
2752n/a};
2753n/a
2754n/a#define MATCH_OFF(x) offsetof(MatchObject, x)
2755n/astatic PyMemberDef match_members[] = {
2756n/a {"string", T_OBJECT, MATCH_OFF(string), READONLY},
2757n/a {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
2758n/a {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
2759n/a {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
2760n/a {NULL}
2761n/a};
2762n/a
2763n/a/* FIXME: implement setattr("string", None) as a special case (to
2764n/a detach the associated string, if any */
2765n/a
2766n/astatic PyTypeObject Match_Type = {
2767n/a PyVarObject_HEAD_INIT(NULL,0)
2768n/a "_" SRE_MODULE ".SRE_Match",
2769n/a sizeof(MatchObject), sizeof(Py_ssize_t),
2770n/a (destructor)match_dealloc, /* tp_dealloc */
2771n/a 0, /* tp_print */
2772n/a 0, /* tp_getattr */
2773n/a 0, /* tp_setattr */
2774n/a 0, /* tp_reserved */
2775n/a (reprfunc)match_repr, /* tp_repr */
2776n/a 0, /* tp_as_number */
2777n/a 0, /* tp_as_sequence */
2778n/a &match_as_mapping, /* tp_as_mapping */
2779n/a 0, /* tp_hash */
2780n/a 0, /* tp_call */
2781n/a 0, /* tp_str */
2782n/a 0, /* tp_getattro */
2783n/a 0, /* tp_setattro */
2784n/a 0, /* tp_as_buffer */
2785n/a Py_TPFLAGS_DEFAULT, /* tp_flags */
2786n/a match_doc, /* tp_doc */
2787n/a 0, /* tp_traverse */
2788n/a 0, /* tp_clear */
2789n/a 0, /* tp_richcompare */
2790n/a 0, /* tp_weaklistoffset */
2791n/a 0, /* tp_iter */
2792n/a 0, /* tp_iternext */
2793n/a match_methods, /* tp_methods */
2794n/a match_members, /* tp_members */
2795n/a match_getset, /* tp_getset */
2796n/a};
2797n/a
2798n/astatic PyMethodDef scanner_methods[] = {
2799n/a _SRE_SRE_SCANNER_MATCH_METHODDEF
2800n/a _SRE_SRE_SCANNER_SEARCH_METHODDEF
2801n/a {NULL, NULL}
2802n/a};
2803n/a
2804n/a#define SCAN_OFF(x) offsetof(ScannerObject, x)
2805n/astatic PyMemberDef scanner_members[] = {
2806n/a {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY},
2807n/a {NULL} /* Sentinel */
2808n/a};
2809n/a
2810n/astatic PyTypeObject Scanner_Type = {
2811n/a PyVarObject_HEAD_INIT(NULL, 0)
2812n/a "_" SRE_MODULE ".SRE_Scanner",
2813n/a sizeof(ScannerObject), 0,
2814n/a (destructor)scanner_dealloc,/* tp_dealloc */
2815n/a 0, /* tp_print */
2816n/a 0, /* tp_getattr */
2817n/a 0, /* tp_setattr */
2818n/a 0, /* tp_reserved */
2819n/a 0, /* tp_repr */
2820n/a 0, /* tp_as_number */
2821n/a 0, /* tp_as_sequence */
2822n/a 0, /* tp_as_mapping */
2823n/a 0, /* tp_hash */
2824n/a 0, /* tp_call */
2825n/a 0, /* tp_str */
2826n/a 0, /* tp_getattro */
2827n/a 0, /* tp_setattro */
2828n/a 0, /* tp_as_buffer */
2829n/a Py_TPFLAGS_DEFAULT, /* tp_flags */
2830n/a 0, /* tp_doc */
2831n/a 0, /* tp_traverse */
2832n/a 0, /* tp_clear */
2833n/a 0, /* tp_richcompare */
2834n/a 0, /* tp_weaklistoffset */
2835n/a 0, /* tp_iter */
2836n/a 0, /* tp_iternext */
2837n/a scanner_methods, /* tp_methods */
2838n/a scanner_members, /* tp_members */
2839n/a 0, /* tp_getset */
2840n/a};
2841n/a
2842n/astatic PyMethodDef _functions[] = {
2843n/a _SRE_COMPILE_METHODDEF
2844n/a _SRE_GETCODESIZE_METHODDEF
2845n/a _SRE_GETLOWER_METHODDEF
2846n/a {NULL, NULL}
2847n/a};
2848n/a
2849n/astatic struct PyModuleDef sremodule = {
2850n/a PyModuleDef_HEAD_INIT,
2851n/a "_" SRE_MODULE,
2852n/a NULL,
2853n/a -1,
2854n/a _functions,
2855n/a NULL,
2856n/a NULL,
2857n/a NULL,
2858n/a NULL
2859n/a};
2860n/a
2861n/aPyMODINIT_FUNC PyInit__sre(void)
2862n/a{
2863n/a PyObject* m;
2864n/a PyObject* d;
2865n/a PyObject* x;
2866n/a
2867n/a /* Patch object types */
2868n/a if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
2869n/a PyType_Ready(&Scanner_Type))
2870n/a return NULL;
2871n/a
2872n/a m = PyModule_Create(&sremodule);
2873n/a if (m == NULL)
2874n/a return NULL;
2875n/a d = PyModule_GetDict(m);
2876n/a
2877n/a x = PyLong_FromLong(SRE_MAGIC);
2878n/a if (x) {
2879n/a PyDict_SetItemString(d, "MAGIC", x);
2880n/a Py_DECREF(x);
2881n/a }
2882n/a
2883n/a x = PyLong_FromLong(sizeof(SRE_CODE));
2884n/a if (x) {
2885n/a PyDict_SetItemString(d, "CODESIZE", x);
2886n/a Py_DECREF(x);
2887n/a }
2888n/a
2889n/a x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
2890n/a if (x) {
2891n/a PyDict_SetItemString(d, "MAXREPEAT", x);
2892n/a Py_DECREF(x);
2893n/a }
2894n/a
2895n/a x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
2896n/a if (x) {
2897n/a PyDict_SetItemString(d, "MAXGROUPS", x);
2898n/a Py_DECREF(x);
2899n/a }
2900n/a
2901n/a x = PyUnicode_FromString(copyright);
2902n/a if (x) {
2903n/a PyDict_SetItemString(d, "copyright", x);
2904n/a Py_DECREF(x);
2905n/a }
2906n/a return m;
2907n/a}
2908n/a
2909n/a/* vim:ts=4:sw=4:et
2910n/a*/