ยปCore Development>Code coverage>Objects/stringobject.c

Python code coverage for Objects/stringobject.c

#countcontent
1n/a/* String (str/bytes) object implementation */
2n/a
3n/a#define PY_SSIZE_T_CLEAN
4n/a
5n/a#include "Python.h"
6n/a#include <ctype.h>
7n/a#include <stddef.h>
8n/a
9n/a#ifdef COUNT_ALLOCS
10n/aPy_ssize_t null_strings, one_strings;
11n/a#endif
12n/a
13n/astatic PyStringObject *characters[UCHAR_MAX + 1];
14n/astatic PyStringObject *nullstring;
15n/a
16n/a/* This dictionary holds all interned strings. Note that references to
17n/a strings in this dictionary are *not* counted in the string's ob_refcnt.
18n/a When the interned string reaches a refcnt of 0 the string deallocation
19n/a function will delete the reference from this dictionary.
20n/a
21n/a Another way to look at this is that to say that the actual reference
22n/a count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23n/a*/
24n/astatic PyObject *interned;
25n/a
26n/a/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27n/a for a string of length n should request PyStringObject_SIZE + n bytes.
28n/a
29n/a Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30n/a 3 bytes per string allocation on a typical system.
31n/a*/
32n/a#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33n/a
34n/a/*
35n/a For both PyString_FromString() and PyString_FromStringAndSize(), the
36n/a parameter `size' denotes number of characters to allocate, not counting any
37n/a null terminating character.
38n/a
39n/a For PyString_FromString(), the parameter `str' points to a null-terminated
40n/a string containing exactly `size' bytes.
41n/a
42n/a For PyString_FromStringAndSize(), the parameter the parameter `str' is
43n/a either NULL or else points to a string containing at least `size' bytes.
44n/a For PyString_FromStringAndSize(), the string in the `str' parameter does
45n/a not have to be null-terminated. (Therefore it is safe to construct a
46n/a substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
47n/a If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
48n/a bytes (setting the last byte to the null terminating character) and you can
49n/a fill in the data yourself. If `str' is non-NULL then the resulting
50n/a PyString object must be treated as immutable and you must not fill in nor
51n/a alter the data yourself, since the strings may be shared.
52n/a
53n/a The PyObject member `op->ob_size', which denotes the number of "extra
54n/a items" in a variable-size object, will contain the number of bytes
55n/a allocated for string data, not counting the null terminating character. It
56n/a is therefore equal to the equal to the `size' parameter (for
57n/a PyString_FromStringAndSize()) or the length of the string in the `str'
58n/a parameter (for PyString_FromString()).
59n/a*/
60n/aPyObject *
61n/aPyString_FromStringAndSize(const char *str, Py_ssize_t size)
62216587472{
63n/a register PyStringObject *op;
64216587472 if (size < 0) {
650 PyErr_SetString(PyExc_SystemError,
66n/a "Negative size passed to PyString_FromStringAndSize");
670 return NULL;
68n/a }
69216587472 if (size == 0 && (op = nullstring) != NULL) {
70n/a#ifdef COUNT_ALLOCS
71n/a null_strings++;
72n/a#endif
73672721 Py_INCREF(op);
74672721 return (PyObject *)op;
75n/a }
76215914751 if (size == 1 && str != NULL &&
77n/a (op = characters[*str & UCHAR_MAX]) != NULL)
78n/a {
79n/a#ifdef COUNT_ALLOCS
80n/a one_strings++;
81n/a#endif
829142408 Py_INCREF(op);
839142408 return (PyObject *)op;
84n/a }
85n/a
86206772343 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
870 PyErr_SetString(PyExc_OverflowError, "string is too large");
880 return NULL;
89n/a }
90n/a
91n/a /* Inline PyObject_NewVar */
92206772343 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
93206772343 if (op == NULL)
940 return PyErr_NoMemory();
95206772343 PyObject_INIT_VAR(op, &PyString_Type, size);
96206772343 op->ob_shash = -1;
97206772343 op->ob_sstate = SSTATE_NOT_INTERNED;
98206772343 if (str != NULL)
99190861107 Py_MEMCPY(op->ob_sval, str, size);
100206772343 op->ob_sval[size] = '\0';
101n/a /* share short strings */
102206772343 if (size == 0) {
103293 PyObject *t = (PyObject *)op;
104293 PyString_InternInPlace(&t);
105293 op = (PyStringObject *)t;
106293 nullstring = op;
107293 Py_INCREF(op);
108206772050 } else if (size == 1 && str != NULL) {
10927080 PyObject *t = (PyObject *)op;
11027080 PyString_InternInPlace(&t);
11127080 op = (PyStringObject *)t;
11227080 characters[*str & UCHAR_MAX] = op;
11327080 Py_INCREF(op);
114n/a }
115206772343 return (PyObject *) op;
116n/a}
117n/a
118n/aPyObject *
119n/aPyString_FromString(const char *str)
120801461219{
121n/a register size_t size;
122n/a register PyStringObject *op;
123n/a
124801461219 assert(str != NULL);
125801461219 size = strlen(str);
126801461219 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1270 PyErr_SetString(PyExc_OverflowError,
128n/a "string is too long for a Python string");
1290 return NULL;
130n/a }
131801461219 if (size == 0 && (op = nullstring) != NULL) {
132n/a#ifdef COUNT_ALLOCS
133n/a null_strings++;
134n/a#endif
1351049629 Py_INCREF(op);
1361049629 return (PyObject *)op;
137n/a }
138800411590 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
139n/a#ifdef COUNT_ALLOCS
140n/a one_strings++;
141n/a#endif
142379260 Py_INCREF(op);
143379260 return (PyObject *)op;
144n/a }
145n/a
146n/a /* Inline PyObject_NewVar */
147800032330 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
148800032330 if (op == NULL)
1490 return PyErr_NoMemory();
150800032330 PyObject_INIT_VAR(op, &PyString_Type, size);
151800032330 op->ob_shash = -1;
152800032330 op->ob_sstate = SSTATE_NOT_INTERNED;
153800032330 Py_MEMCPY(op->ob_sval, str, size+1);
154n/a /* share short strings */
155800032330 if (size == 0) {
1560 PyObject *t = (PyObject *)op;
1570 PyString_InternInPlace(&t);
1580 op = (PyStringObject *)t;
1590 nullstring = op;
1600 Py_INCREF(op);
161800032330 } else if (size == 1) {
1621215 PyObject *t = (PyObject *)op;
1631215 PyString_InternInPlace(&t);
1641215 op = (PyStringObject *)t;
1651215 characters[*str & UCHAR_MAX] = op;
1661215 Py_INCREF(op);
167n/a }
168800032330 return (PyObject *) op;
169n/a}
170n/a
171n/aPyObject *
172n/aPyString_FromFormatV(const char *format, va_list vargs)
1732611452{
174n/a va_list count;
1752611452 Py_ssize_t n = 0;
176n/a const char* f;
177n/a char *s;
178n/a PyObject* string;
179n/a
180n/a#ifdef VA_LIST_IS_ARRAY
1812611452 Py_MEMCPY(count, vargs, sizeof(va_list));
182n/a#else
183n/a#ifdef __va_copy
184n/a __va_copy(count, vargs);
185n/a#else
186n/a count = vargs;
187n/a#endif
188n/a#endif
189n/a /* step 1: figure out how large a buffer we need */
19082079217 for (f = format; *f; f++) {
19179467766 if (*f == '%') {
192n/a#ifdef HAVE_LONG_LONG
1934643160 int longlongflag = 0;
194n/a#endif
1954643160 const char* p = f;
19619784586 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
197n/a ;
198n/a
199n/a /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
200n/a * they don't affect the amount of space we reserve.
201n/a */
2024643160 if (*f == 'l') {
20399 if (f[1] == 'd' || f[1] == 'u') {
20448 ++f;
205n/a }
206n/a#ifdef HAVE_LONG_LONG
2073 else if (f[1] == 'l' &&
208n/a (f[2] == 'd' || f[2] == 'u')) {
2092 longlongflag = 1;
2102 f += 2;
211n/a }
212n/a#endif
213n/a }
2144643109 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
21536458 ++f;
216n/a }
217n/a
2184643160 switch (*f) {
219n/a case 'c':
22046077 (void)va_arg(count, int);
221n/a /* fall through... */
222n/a case '%':
22346186 n++;
22446186 break;
225n/a case 'd': case 'u': case 'i': case 'x':
226112593 (void) va_arg(count, int);
227n/a#ifdef HAVE_LONG_LONG
228n/a /* Need at most
229n/a ceil(log10(256)*SIZEOF_LONG_LONG) digits,
230n/a plus 1 for the sign. 53/22 is an upper
231n/a bound for log10(256). */
232112593 if (longlongflag)
2332 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
234n/a else
235n/a#endif
236n/a /* 20 bytes is enough to hold a 64-bit
237n/a integer. Decimal takes the most
238n/a space. This isn't enough for
239n/a octal. */
240112591 n += 20;
241n/a
242112593 break;
243n/a case 's':
2444484073 s = va_arg(count, char*);
2454484073 n += strlen(s);
2464484073 break;
247n/a case 'p':
248307 (void) va_arg(count, int);
249n/a /* maximum 64-bit pointer representation:
250n/a * 0xffffffffffffffff
251n/a * so 19 characters is enough.
252n/a * XXX I count 18 -- what's the extra for?
253n/a */
254307 n += 19;
255307 break;
256n/a default:
257n/a /* if we stumble upon an unknown
258n/a formatting code, copy the rest of
259n/a the format string to the output
260n/a string. (we cannot just skip the
261n/a code, since there's no way to know
262n/a what's in the argument list) */
2631 n += strlen(p);
2641 goto expand;
265n/a }
266n/a } else
26774824606 n++;
268n/a }
2692611452 expand:
270n/a /* step 2: fill the buffer */
271n/a /* Since we've analyzed how much space we need for the worst case,
272n/a use sprintf directly instead of the slower PyOS_snprintf. */
2732611452 string = PyString_FromStringAndSize(NULL, n);
2742611452 if (!string)
2750 return NULL;
276n/a
2772611452 s = PyString_AsString(string);
278n/a
27982079217 for (f = format; *f; f++) {
28079467766 if (*f == '%') {
2814643160 const char* p = f++;
282n/a Py_ssize_t i;
2834643160 int longflag = 0;
284n/a#ifdef HAVE_LONG_LONG
2854643160 int longlongflag = 0;
286n/a#endif
2874643160 int size_tflag = 0;
288n/a /* parse the width.precision part (we're only
289n/a interested in the precision value, if any) */
2904643160 n = 0;
2919286461 while (isdigit(Py_CHARMASK(*f)))
292141 n = (n*10) + *f++ - '0';
2934643160 if (*f == '.') {
2944237658 f++;
2954237658 n = 0;
29619378943 while (isdigit(Py_CHARMASK(*f)))
29710903627 n = (n*10) + *f++ - '0';
298n/a }
2999286320 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
3000 f++;
301n/a /* Handle %ld, %lu, %lld and %llu. */
3024643160 if (*f == 'l') {
30399 if (f[1] == 'd' || f[1] == 'u') {
30448 longflag = 1;
30548 ++f;
306n/a }
307n/a#ifdef HAVE_LONG_LONG
3083 else if (f[1] == 'l' &&
309n/a (f[2] == 'd' || f[2] == 'u')) {
3102 longlongflag = 1;
3112 f += 2;
312n/a }
313n/a#endif
314n/a }
315n/a /* handle the size_t flag. */
3164643109 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
31736458 size_tflag = 1;
31836458 ++f;
319n/a }
320n/a
3214643160 switch (*f) {
322n/a case 'c':
32346077 *s++ = va_arg(vargs, int);
32446077 break;
325n/a case 'd':
326112377 if (longflag)
32747 sprintf(s, "%ld", va_arg(vargs, long));
328n/a#ifdef HAVE_LONG_LONG
329112330 else if (longlongflag)
3301 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
331n/a va_arg(vargs, PY_LONG_LONG));
332n/a#endif
333112329 else if (size_tflag)
33436253 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
335n/a va_arg(vargs, Py_ssize_t));
336n/a else
33776076 sprintf(s, "%d", va_arg(vargs, int));
338112377 s += strlen(s);
339112377 break;
340n/a case 'u':
341208 if (longflag)
3421 sprintf(s, "%lu",
343n/a va_arg(vargs, unsigned long));
344n/a#ifdef HAVE_LONG_LONG
345207 else if (longlongflag)
3461 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
347n/a va_arg(vargs, PY_LONG_LONG));
348n/a#endif
349206 else if (size_tflag)
350205 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
351n/a va_arg(vargs, size_t));
352n/a else
3531 sprintf(s, "%u",
354n/a va_arg(vargs, unsigned int));
355208 s += strlen(s);
356208 break;
357n/a case 'i':
3586 sprintf(s, "%i", va_arg(vargs, int));
3596 s += strlen(s);
3606 break;
361n/a case 'x':
3622 sprintf(s, "%x", va_arg(vargs, int));
3632 s += strlen(s);
3642 break;
365n/a case 's':
3664484073 p = va_arg(vargs, char*);
3674484073 i = strlen(p);
3684484073 if (n > 0 && i > n)
36919 i = n;
3704484073 Py_MEMCPY(s, p, i);
3714484073 s += i;
3724484073 break;
373n/a case 'p':
374307 sprintf(s, "%p", va_arg(vargs, void*));
375n/a /* %p is ill-defined: ensure leading 0x. */
376307 if (s[1] == 'X')
3770 s[1] = 'x';
378307 else if (s[1] != 'x') {
3790 memmove(s+2, s, strlen(s)+1);
3800 s[0] = '0';
3810 s[1] = 'x';
382n/a }
383307 s += strlen(s);
384307 break;
385n/a case '%':
386109 *s++ = '%';
387109 break;
388n/a default:
3891 strcpy(s, p);
3901 s += strlen(s);
3911 goto end;
392n/a }
393n/a } else
39474824606 *s++ = *f;
395n/a }
396n/a
3972611452 end:
3982611452 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
3990 return NULL;
4002611452 return string;
401n/a}
402n/a
403n/aPyObject *
404n/aPyString_FromFormat(const char *format, ...)
40585012{
406n/a PyObject* ret;
407n/a va_list vargs;
408n/a
409n/a#ifdef HAVE_STDARG_PROTOTYPES
41085012 va_start(vargs, format);
411n/a#else
412n/a va_start(vargs);
413n/a#endif
41485012 ret = PyString_FromFormatV(format, vargs);
41585012 va_end(vargs);
41685012 return ret;
417n/a}
418n/a
419n/a
420n/aPyObject *PyString_Decode(const char *s,
421n/a Py_ssize_t size,
422n/a const char *encoding,
423n/a const char *errors)
4240{
425n/a PyObject *v, *str;
426n/a
4270 str = PyString_FromStringAndSize(s, size);
4280 if (str == NULL)
4290 return NULL;
4300 v = PyString_AsDecodedString(str, encoding, errors);
4310 Py_DECREF(str);
4320 return v;
433n/a}
434n/a
435n/aPyObject *PyString_AsDecodedObject(PyObject *str,
436n/a const char *encoding,
437n/a const char *errors)
4381313281{
439n/a PyObject *v;
440n/a
4411313281 if (!PyString_Check(str)) {
4420 PyErr_BadArgument();
4430 goto onError;
444n/a }
445n/a
4461313281 if (encoding == NULL) {
447n/a#ifdef Py_USING_UNICODE
4480 encoding = PyUnicode_GetDefaultEncoding();
449n/a#else
450n/a PyErr_SetString(PyExc_ValueError, "no encoding specified");
451n/a goto onError;
452n/a#endif
453n/a }
454n/a
455n/a /* Decode via the codec registry */
4561313281 v = PyCodec_Decode(str, encoding, errors);
4571313281 if (v == NULL)
458550 goto onError;
459n/a
4601312731 return v;
461n/a
462550 onError:
463550 return NULL;
464n/a}
465n/a
466n/aPyObject *PyString_AsDecodedString(PyObject *str,
467n/a const char *encoding,
468n/a const char *errors)
4690{
470n/a PyObject *v;
471n/a
4720 v = PyString_AsDecodedObject(str, encoding, errors);
4730 if (v == NULL)
4740 goto onError;
475n/a
476n/a#ifdef Py_USING_UNICODE
477n/a /* Convert Unicode to a string using the default encoding */
4780 if (PyUnicode_Check(v)) {
4790 PyObject *temp = v;
4800 v = PyUnicode_AsEncodedString(v, NULL, NULL);
4810 Py_DECREF(temp);
4820 if (v == NULL)
4830 goto onError;
484n/a }
485n/a#endif
4860 if (!PyString_Check(v)) {
4870 PyErr_Format(PyExc_TypeError,
488n/a "decoder did not return a string object (type=%.400s)",
489n/a Py_TYPE(v)->tp_name);
4900 Py_DECREF(v);
4910 goto onError;
492n/a }
493n/a
4940 return v;
495n/a
4960 onError:
4970 return NULL;
498n/a}
499n/a
500n/aPyObject *PyString_Encode(const char *s,
501n/a Py_ssize_t size,
502n/a const char *encoding,
503n/a const char *errors)
5040{
505n/a PyObject *v, *str;
506n/a
5070 str = PyString_FromStringAndSize(s, size);
5080 if (str == NULL)
5090 return NULL;
5100 v = PyString_AsEncodedString(str, encoding, errors);
5110 Py_DECREF(str);
5120 return v;
513n/a}
514n/a
515n/aPyObject *PyString_AsEncodedObject(PyObject *str,
516n/a const char *encoding,
517n/a const char *errors)
5184746{
519n/a PyObject *v;
520n/a
5214746 if (!PyString_Check(str)) {
5220 PyErr_BadArgument();
5230 goto onError;
524n/a }
525n/a
5264746 if (encoding == NULL) {
527n/a#ifdef Py_USING_UNICODE
5280 encoding = PyUnicode_GetDefaultEncoding();
529n/a#else
530n/a PyErr_SetString(PyExc_ValueError, "no encoding specified");
531n/a goto onError;
532n/a#endif
533n/a }
534n/a
535n/a /* Encode via the codec registry */
5364746 v = PyCodec_Encode(str, encoding, errors);
5374746 if (v == NULL)
5382 goto onError;
539n/a
5404744 return v;
541n/a
5422 onError:
5432 return NULL;
544n/a}
545n/a
546n/aPyObject *PyString_AsEncodedString(PyObject *str,
547n/a const char *encoding,
548n/a const char *errors)
5490{
550n/a PyObject *v;
551n/a
5520 v = PyString_AsEncodedObject(str, encoding, errors);
5530 if (v == NULL)
5540 goto onError;
555n/a
556n/a#ifdef Py_USING_UNICODE
557n/a /* Convert Unicode to a string using the default encoding */
5580 if (PyUnicode_Check(v)) {
5590 PyObject *temp = v;
5600 v = PyUnicode_AsEncodedString(v, NULL, NULL);
5610 Py_DECREF(temp);
5620 if (v == NULL)
5630 goto onError;
564n/a }
565n/a#endif
5660 if (!PyString_Check(v)) {
5670 PyErr_Format(PyExc_TypeError,
568n/a "encoder did not return a string object (type=%.400s)",
569n/a Py_TYPE(v)->tp_name);
5700 Py_DECREF(v);
5710 goto onError;
572n/a }
573n/a
5740 return v;
575n/a
5760 onError:
5770 return NULL;
578n/a}
579n/a
580n/astatic void
581n/astring_dealloc(PyObject *op)
5821008101876{
5831008101876 switch (PyString_CHECK_INTERNED(op)) {
584n/a case SSTATE_NOT_INTERNED:
5851007373014 break;
586n/a
587n/a case SSTATE_INTERNED_MORTAL:
588n/a /* revive dead object temporarily for DelItem */
589728862 Py_REFCNT(op) = 3;
590728862 if (PyDict_DelItem(interned, op) != 0)
5910 Py_FatalError(
592n/a "deletion of interned string failed");
593728862 break;
594n/a
595n/a case SSTATE_INTERNED_IMMORTAL:
5960 Py_FatalError("Immortal interned string died.");
597n/a
598n/a default:
5990 Py_FatalError("Inconsistent interned string state.");
600n/a }
6011008101876 Py_TYPE(op)->tp_free(op);
6021008101876}
603n/a
604n/a/* Unescape a backslash-escaped string. If unicode is non-zero,
605n/a the string is a u-literal. If recode_encoding is non-zero,
606n/a the string is UTF-8 encoded and should be re-encoded in the
607n/a specified encoding. */
608n/a
609n/aPyObject *PyString_DecodeEscape(const char *s,
610n/a Py_ssize_t len,
611n/a const char *errors,
612n/a Py_ssize_t unicode,
613n/a const char *recode_encoding)
61423935{
615n/a int c;
616n/a char *p, *buf;
617n/a const char *end;
618n/a PyObject *v;
61923935 Py_ssize_t newlen = recode_encoding ? 4*len:len;
62023935 v = PyString_FromStringAndSize((char *)NULL, newlen);
62123935 if (v == NULL)
6220 return NULL;
62323935 p = buf = PyString_AsString(v);
62423935 end = s + len;
6251520492 while (s < end) {
6261472631 if (*s != '\\') {
6271397212 non_esc:
628n/a#ifdef Py_USING_UNICODE
6291397212 if (recode_encoding && (*s & 0x80)) {
630n/a PyObject *u, *w;
631n/a char *r;
632n/a const char* t;
633n/a Py_ssize_t rn;
6340 t = s;
635n/a /* Decode non-ASCII bytes as UTF-8. */
6360 while (t < end && (*t & 0x80)) t++;
6370 u = PyUnicode_DecodeUTF8(s, t - s, errors);
6380 if(!u) goto failed;
639n/a
640n/a /* Recode them in target encoding. */
6410 w = PyUnicode_AsEncodedString(
642n/a u, recode_encoding, errors);
6430 Py_DECREF(u);
6440 if (!w) goto failed;
645n/a
646n/a /* Append bytes to output buffer. */
6470 assert(PyString_Check(w));
6480 r = PyString_AS_STRING(w);
6490 rn = PyString_GET_SIZE(w);
6500 Py_MEMCPY(p, r, rn);
6510 p += rn;
6520 Py_DECREF(w);
6530 s = t;
654n/a } else {
6551397212 *p++ = *s++;
656n/a }
657n/a#else
658n/a *p++ = *s++;
659n/a#endif
6601397212 continue;
661n/a }
66276196 s++;
66376196 if (s==end) {
6649 PyErr_SetString(PyExc_ValueError,
665n/a "Trailing \\ in string");
6669 goto failed;
667n/a }
66876187 switch (*s++) {
669n/a /* XXX This assumes ASCII! */
670998 case '\n': break;
6712619 case '\\': *p++ = '\\'; break;
672264 case '\'': *p++ = '\''; break;
673452 case '\"': *p++ = '\"'; break;
67448 case 'b': *p++ = '\b'; break;
67571 case 'f': *p++ = '\014'; break; /* FF */
6761087 case 't': *p++ = '\t'; break;
67724754 case 'n': *p++ = '\n'; break;
6781412 case 'r': *p++ = '\r'; break;
67969 case 'v': *p++ = '\013'; break; /* VT */
68039 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
681n/a case '0': case '1': case '2': case '3':
682n/a case '4': case '5': case '6': case '7':
6832313 c = s[-1] - '0';
6842313 if (s < end && '0' <= *s && *s <= '7') {
6851627 c = (c<<3) + *s++ - '0';
6861627 if (s < end && '0' <= *s && *s <= '7')
6871601 c = (c<<3) + *s++ - '0';
688n/a }
6892313 *p++ = c;
6902313 break;
691n/a case 'x':
69241284 if (s+1 < end &&
693n/a isxdigit(Py_CHARMASK(s[0])) &&
694n/a isxdigit(Py_CHARMASK(s[1])))
695n/a {
69641284 unsigned int x = 0;
69741284 c = Py_CHARMASK(*s);
69841284 s++;
69941284 if (isdigit(c))
70018937 x = c - '0';
70122347 else if (islower(c))
70222283 x = 10 + c - 'a';
703n/a else
70464 x = 10 + c - 'A';
70541284 x = x << 4;
70641284 c = Py_CHARMASK(*s);
70741284 s++;
70841284 if (isdigit(c))
70929912 x += c - '0';
71011372 else if (islower(c))
71111294 x += 10 + c - 'a';
712n/a else
71378 x += 10 + c - 'A';
71441284 *p++ = x;
71541284 break;
716n/a }
7170 if (!errors || strcmp(errors, "strict") == 0) {
7180 PyErr_SetString(PyExc_ValueError,
719n/a "invalid \\x escape");
7200 goto failed;
721n/a }
7220 if (strcmp(errors, "replace") == 0) {
7230 *p++ = '?';
7240 } else if (strcmp(errors, "ignore") == 0)
725n/a /* do nothing */;
726n/a else {
7270 PyErr_Format(PyExc_ValueError,
728n/a "decoding error; "
729n/a "unknown error handling code: %.400s",
730n/a errors);
7310 goto failed;
732n/a }
733n/a#ifndef Py_USING_UNICODE
734n/a case 'u':
735n/a case 'U':
736n/a case 'N':
737n/a if (unicode) {
738n/a PyErr_SetString(PyExc_ValueError,
739n/a "Unicode escapes not legal "
740n/a "when Unicode disabled");
741n/a goto failed;
742n/a }
743n/a#endif
744n/a default:
745777 *p++ = '\\';
746777 s--;
747777 goto non_esc; /* an arbitry number of unescaped
748n/a UTF-8 bytes may follow. */
749n/a }
750n/a }
75123926 if (p-buf < newlen && _PyString_Resize(&v, p - buf))
7520 goto failed;
75323926 return v;
7549 failed:
7559 Py_DECREF(v);
7569 return NULL;
757n/a}
758n/a
759n/a/* -------------------------------------------------------------------- */
760n/a/* object api */
761n/a
762n/astatic Py_ssize_t
763n/astring_getsize(register PyObject *op)
764419940{
765n/a char *s;
766n/a Py_ssize_t len;
767419940 if (PyString_AsStringAndSize(op, &s, &len))
7688 return -1;
769419932 return len;
770n/a}
771n/a
772n/astatic /*const*/ char *
773n/astring_getbuffer(register PyObject *op)
7744{
775n/a char *s;
776n/a Py_ssize_t len;
7774 if (PyString_AsStringAndSize(op, &s, &len))
7784 return NULL;
7790 return s;
780n/a}
781n/a
782n/aPy_ssize_t
783n/aPyString_Size(register PyObject *op)
78413588475{
78513588475 if (!PyString_Check(op))
786419940 return string_getsize(op);
78713168535 return Py_SIZE(op);
788n/a}
789n/a
790n/a/*const*/ char *
791n/aPyString_AsString(register PyObject *op)
7922145348248{
7932145348248 if (!PyString_Check(op))
7944 return string_getbuffer(op);
7952145348244 return ((PyStringObject *)op) -> ob_sval;
796n/a}
797n/a
798n/aint
799n/aPyString_AsStringAndSize(register PyObject *obj,
800n/a register char **s,
801n/a register Py_ssize_t *len)
802762660{
803762660 if (s == NULL) {
8040 PyErr_BadInternalCall();
8050 return -1;
806n/a }
807n/a
808762660 if (!PyString_Check(obj)) {
809n/a#ifdef Py_USING_UNICODE
810419949 if (PyUnicode_Check(obj)) {
811419938 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
812419938 if (obj == NULL)
8135 return -1;
814n/a }
815n/a else
816n/a#endif
817n/a {
81811 PyErr_Format(PyExc_TypeError,
819n/a "expected string or Unicode object, "
820n/a "%.200s found", Py_TYPE(obj)->tp_name);
82111 return -1;
822n/a }
823n/a }
824n/a
825762644 *s = PyString_AS_STRING(obj);
826762644 if (len != NULL)
827434710 *len = PyString_GET_SIZE(obj);
828327934 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
8290 PyErr_SetString(PyExc_TypeError,
830n/a "expected string without null bytes");
8310 return -1;
832n/a }
833762644 return 0;
834n/a}
835n/a
836n/a/* -------------------------------------------------------------------- */
837n/a/* Methods */
838n/a
839n/a#include "stringlib/stringdefs.h"
840n/a#include "stringlib/fastsearch.h"
841n/a
842n/a#include "stringlib/count.h"
843n/a#include "stringlib/find.h"
844n/a#include "stringlib/partition.h"
845n/a#include "stringlib/split.h"
846n/a
847n/a#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
848n/a#include "stringlib/localeutil.h"
849n/a
850n/a
851n/a
852n/astatic int
853n/astring_print(PyStringObject *op, FILE *fp, int flags)
8542356{
855n/a Py_ssize_t i, str_len;
856n/a char c;
857n/a int quote;
858n/a
859n/a /* XXX Ought to check for interrupts when writing long strings */
8602356 if (! PyString_CheckExact(op)) {
861n/a int ret;
862n/a /* A str subclass may have its own __str__ method. */
8630 op = (PyStringObject *) PyObject_Str((PyObject *)op);
8640 if (op == NULL)
8650 return -1;
8660 ret = string_print(op, fp, flags);
8670 Py_DECREF(op);
8680 return ret;
869n/a }
8702356 if (flags & Py_PRINT_RAW) {
8712325 char *data = op->ob_sval;
8722325 Py_ssize_t size = Py_SIZE(op);
8732325 Py_BEGIN_ALLOW_THREADS
8744650 while (size > INT_MAX) {
875n/a /* Very long strings cannot be written atomically.
876n/a * But don't write exactly INT_MAX bytes at a time
877n/a * to avoid memory aligment issues.
878n/a */
8790 const int chunk_size = INT_MAX & ~0x3FFF;
8800 fwrite(data, 1, chunk_size, fp);
8810 data += chunk_size;
8820 size -= chunk_size;
883n/a }
884n/a#ifdef __VMS
885n/a if (size) fwrite(data, (int)size, 1, fp);
886n/a#else
8872325 fwrite(data, 1, (int)size, fp);
888n/a#endif
8892325 Py_END_ALLOW_THREADS
8902325 return 0;
891n/a }
892n/a
893n/a /* figure out which quote to use; single is preferred */
89431 quote = '\'';
89531 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
896n/a !memchr(op->ob_sval, '"', Py_SIZE(op)))
8971 quote = '"';
898n/a
89931 str_len = Py_SIZE(op);
90031 Py_BEGIN_ALLOW_THREADS
90131 fputc(quote, fp);
902209 for (i = 0; i < str_len; i++) {
903n/a /* Since strings are immutable and the caller should have a
904n/a reference, accessing the interal buffer should not be an issue
905n/a with the GIL released. */
906178 c = op->ob_sval[i];
907178 if (c == quote || c == '\\')
9080 fprintf(fp, "\\%c", c);
909178 else if (c == '\t')
9100 fprintf(fp, "\\t");
911178 else if (c == '\n')
9120 fprintf(fp, "\\n");
913178 else if (c == '\r')
9140 fprintf(fp, "\\r");
915178 else if (c < ' ' || c >= 0x7f)
9160 fprintf(fp, "\\x%02x", c & 0xff);
917n/a else
918178 fputc(c, fp);
919n/a }
92031 fputc(quote, fp);
92131 Py_END_ALLOW_THREADS
92231 return 0;
923n/a}
924n/a
925n/aPyObject *
926n/aPyString_Repr(PyObject *obj, int smartquotes)
927113711{
928113711 register PyStringObject* op = (PyStringObject*) obj;
929113711 size_t newsize = 2 + 4 * Py_SIZE(op);
930n/a PyObject *v;
931113711 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
9320 PyErr_SetString(PyExc_OverflowError,
933n/a "string is too large to make repr");
9340 return NULL;
935n/a }
936113711 v = PyString_FromStringAndSize((char *)NULL, newsize);
937113711 if (v == NULL) {
9380 return NULL;
939n/a }
940n/a else {
941n/a register Py_ssize_t i;
942n/a register char c;
943n/a register char *p;
944n/a int quote;
945n/a
946n/a /* figure out which quote to use; single is preferred */
947113711 quote = '\'';
948113711 if (smartquotes &&
949n/a memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
950n/a !memchr(op->ob_sval, '"', Py_SIZE(op)))
951746 quote = '"';
952n/a
953113711 p = PyString_AS_STRING(v);
954113711 *p++ = quote;
9551466837 for (i = 0; i < Py_SIZE(op); i++) {
956n/a /* There's at least enough room for a hex escape
957n/a and a closing quote. */
9581353126 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
9591353126 c = op->ob_sval[i];
9601358902 if (c == quote || c == '\\')
9615776 *p++ = '\\', *p++ = c;
9621347350 else if (c == '\t')
963471 *p++ = '\\', *p++ = 't';
9641346879 else if (c == '\n')
965573 *p++ = '\\', *p++ = 'n';
9661346306 else if (c == '\r')
96786 *p++ = '\\', *p++ = 'r';
9681354543 else if (c < ' ' || c >= 0x7f) {
969n/a /* For performance, we don't want to call
970n/a PyOS_snprintf here (extra layers of
971n/a function call). */
9728323 sprintf(p, "\\x%02x", c & 0xff);
9738323 p += 4;
974n/a }
975n/a else
9761337897 *p++ = c;
977n/a }
978113711 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
979113711 *p++ = quote;
980113711 *p = '\0';
981113711 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
9820 return NULL;
983113711 return v;
984n/a }
985n/a}
986n/a
987n/astatic PyObject *
988n/astring_repr(PyObject *op)
989113709{
990113709 return PyString_Repr(op, 1);
991n/a}
992n/a
993n/astatic PyObject *
994n/astring_str(PyObject *s)
995134{
996134 assert(PyString_Check(s));
997134 if (PyString_CheckExact(s)) {
9980 Py_INCREF(s);
9990 return s;
1000n/a }
1001n/a else {
1002n/a /* Subtype -- return genuine string with the same value. */
1003134 PyStringObject *t = (PyStringObject *) s;
1004134 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1005n/a }
1006n/a}
1007n/a
1008n/astatic Py_ssize_t
1009n/astring_length(PyStringObject *a)
101073116496{
101173116496 return Py_SIZE(a);
1012n/a}
1013n/a
1014n/astatic PyObject *
1015n/astring_concat(register PyStringObject *a, register PyObject *bb)
10162683380{
1017n/a register Py_ssize_t size;
1018n/a register PyStringObject *op;
10192683380 if (!PyString_Check(bb)) {
1020n/a#ifdef Py_USING_UNICODE
1021201163 if (PyUnicode_Check(bb))
1022201086 return PyUnicode_Concat((PyObject *)a, bb);
1023n/a#endif
102477 if (PyByteArray_Check(bb))
10252 return PyByteArray_Concat((PyObject *)a, bb);
102675 PyErr_Format(PyExc_TypeError,
1027n/a "cannot concatenate 'str' and '%.200s' objects",
1028n/a Py_TYPE(bb)->tp_name);
102975 return NULL;
1030n/a }
1031n/a#define b ((PyStringObject *)bb)
1032n/a /* Optimize cases with empty left or right operand */
10332482217 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1034n/a PyString_CheckExact(a) && PyString_CheckExact(b)) {
1035704803 if (Py_SIZE(a) == 0) {
1036529445 Py_INCREF(bb);
1037529445 return bb;
1038n/a }
1039175358 Py_INCREF(a);
1040175358 return (PyObject *)a;
1041n/a }
10421777414 size = Py_SIZE(a) + Py_SIZE(b);
1043n/a /* Check that string sizes are not negative, to prevent an
1044n/a overflow in cases where we are passed incorrectly-created
1045n/a strings with negative lengths (due to a bug in other code).
1046n/a */
10471777414 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1048n/a Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
10490 PyErr_SetString(PyExc_OverflowError,
1050n/a "strings are too large to concat");
10510 return NULL;
1052n/a }
1053n/a
1054n/a /* Inline PyObject_NewVar */
10551777414 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
10560 PyErr_SetString(PyExc_OverflowError,
1057n/a "strings are too large to concat");
10580 return NULL;
1059n/a }
10601777414 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
10611777414 if (op == NULL)
10620 return PyErr_NoMemory();
10631777414 PyObject_INIT_VAR(op, &PyString_Type, size);
10641777414 op->ob_shash = -1;
10651777414 op->ob_sstate = SSTATE_NOT_INTERNED;
10661777414 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
10671777414 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
10681777414 op->ob_sval[size] = '\0';
10691777414 return (PyObject *) op;
1070n/a#undef b
1071n/a}
1072n/a
1073n/astatic PyObject *
1074n/astring_repeat(register PyStringObject *a, register Py_ssize_t n)
1075226049{
1076n/a register Py_ssize_t i;
1077n/a register Py_ssize_t j;
1078n/a register Py_ssize_t size;
1079n/a register PyStringObject *op;
1080n/a size_t nbytes;
1081226049 if (n < 0)
10824191 n = 0;
1083n/a /* watch out for overflows: the size can overflow int,
1084n/a * and the # of bytes needed can overflow size_t
1085n/a */
1086226049 size = Py_SIZE(a) * n;
1087226049 if (n && size / n != Py_SIZE(a)) {
10881 PyErr_SetString(PyExc_OverflowError,
1089n/a "repeated string is too long");
10901 return NULL;
1091n/a }
1092226048 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
109387261 Py_INCREF(a);
109487261 return (PyObject *)a;
1095n/a }
1096138787 nbytes = (size_t)size;
1097138787 if (nbytes + PyStringObject_SIZE <= nbytes) {
10980 PyErr_SetString(PyExc_OverflowError,
1099n/a "repeated string is too long");
11000 return NULL;
1101n/a }
1102138787 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1103138787 if (op == NULL)
11040 return PyErr_NoMemory();
1105138787 PyObject_INIT_VAR(op, &PyString_Type, size);
1106138787 op->ob_shash = -1;
1107138787 op->ob_sstate = SSTATE_NOT_INTERNED;
1108138787 op->ob_sval[size] = '\0';
1109138787 if (Py_SIZE(a) == 1 && n > 0) {
111052320 memset(op->ob_sval, a->ob_sval[0] , n);
111152320 return (PyObject *) op;
1112n/a }
111386467 i = 0;
111486467 if (i < size) {
11154279 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
11164279 i = Py_SIZE(a);
1117n/a }
1118191691 while (i < size) {
111918757 j = (i <= size-i) ? i : size-i;
112018757 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
112118757 i += j;
1122n/a }
112386467 return (PyObject *) op;
1124n/a}
1125n/a
1126n/a/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1127n/a
1128n/astatic PyObject *
1129n/astring_slice(register PyStringObject *a, register Py_ssize_t i,
1130n/a register Py_ssize_t j)
1131n/a /* j -- may be negative! */
1132188731764{
1133188731764 if (i < 0)
1134373 i = 0;
1135188731764 if (j < 0)
1136206 j = 0; /* Avoid signed/unsigned bug in next line */
1137188731764 if (j > Py_SIZE(a))
1138122478462 j = Py_SIZE(a);
1139188731764 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1140n/a /* It's the same as a */
11412624154 Py_INCREF(a);
11422624154 return (PyObject *)a;
1143n/a }
1144186107610 if (j < i)
11454847 j = i;
1146186107610 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1147n/a}
1148n/a
1149n/astatic int
1150n/astring_contains(PyObject *str_obj, PyObject *sub_obj)
115110826625{
115210826625 if (!PyString_CheckExact(sub_obj)) {
1153n/a#ifdef Py_USING_UNICODE
1154448897 if (PyUnicode_Check(sub_obj))
1155448896 return PyUnicode_Contains(str_obj, sub_obj);
1156n/a#endif
11571 if (!PyString_Check(sub_obj)) {
11581 PyErr_Format(PyExc_TypeError,
1159n/a "'in <string>' requires string as left operand, "
1160n/a "not %.200s", Py_TYPE(sub_obj)->tp_name);
11611 return -1;
1162n/a }
1163n/a }
1164n/a
116510377728 return stringlib_contains_obj(str_obj, sub_obj);
1166n/a}
1167n/a
1168n/astatic PyObject *
1169n/astring_item(PyStringObject *a, register Py_ssize_t i)
117069734218{
1171n/a char pchar;
1172n/a PyObject *v;
117369734218 if (i < 0 || i >= Py_SIZE(a)) {
1174342910 PyErr_SetString(PyExc_IndexError, "string index out of range");
1175342910 return NULL;
1176n/a }
117769391308 pchar = a->ob_sval[i];
117869391308 v = (PyObject *)characters[pchar & UCHAR_MAX];
117969391308 if (v == NULL)
118023600 v = PyString_FromStringAndSize(&pchar, 1);
1181n/a else {
1182n/a#ifdef COUNT_ALLOCS
1183n/a one_strings++;
1184n/a#endif
118569367708 Py_INCREF(v);
1186n/a }
118769391308 return v;
1188n/a}
1189n/a
1190n/astatic PyObject*
1191n/astring_richcompare(PyStringObject *a, PyStringObject *b, int op)
1192588174882{
1193n/a int c;
1194n/a Py_ssize_t len_a, len_b;
1195n/a Py_ssize_t min_len;
1196n/a PyObject *result;
1197n/a
1198n/a /* Make sure both arguments are strings. */
1199588174882 if (!(PyString_Check(a) && PyString_Check(b))) {
1200627153 result = Py_NotImplemented;
1201627153 goto out;
1202n/a }
1203587547729 if (a == b) {
1204367246528 switch (op) {
1205n/a case Py_EQ:case Py_LE:case Py_GE:
1206366540464 result = Py_True;
1207366540464 goto out;
1208n/a case Py_NE:case Py_LT:case Py_GT:
1209706064 result = Py_False;
1210706064 goto out;
1211n/a }
1212n/a }
1213220301201 if (op == Py_EQ) {
1214n/a /* Supporting Py_NE here as well does not save
1215n/a much time, since Py_NE is rarely used. */
1216172650780 if (Py_SIZE(a) == Py_SIZE(b)
1217n/a && (a->ob_sval[0] == b->ob_sval[0]
1218n/a && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
121916976266 result = Py_True;
1220n/a } else {
1221138698248 result = Py_False;
1222n/a }
1223155674514 goto out;
1224n/a }
122564626687 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
122664626687 min_len = (len_a < len_b) ? len_a : len_b;
122764626687 if (min_len > 0) {
122864622835 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
122964622835 if (c==0)
1230762654 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1231n/a } else
12323852 c = 0;
123364626687 if (c == 0)
123456285 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
123564626687 switch (op) {
12364112427 case Py_LT: c = c < 0; break;
123715323 case Py_LE: c = c <= 0; break;
12380 case Py_EQ: assert(0); break; /* unreachable */
123960479494 case Py_NE: c = c != 0; break;
124018348 case Py_GT: c = c > 0; break;
12411095 case Py_GE: c = c >= 0; break;
1242n/a default:
12430 result = Py_NotImplemented;
12440 goto out;
1245n/a }
124664626687 result = c ? Py_True : Py_False;
1247588174882 out:
1248588174882 Py_INCREF(result);
1249588174882 return result;
1250n/a}
1251n/a
1252n/aint
1253n/a_PyString_Eq(PyObject *o1, PyObject *o2)
1254137216508{
1255137216508 PyStringObject *a = (PyStringObject*) o1;
1256137216508 PyStringObject *b = (PyStringObject*) o2;
1257137216508 return Py_SIZE(a) == Py_SIZE(b)
1258n/a && *a->ob_sval == *b->ob_sval
1259n/a && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1260n/a}
1261n/a
1262n/astatic long
1263n/astring_hash(PyStringObject *a)
12641480466368{
1265n/a register Py_ssize_t len;
1266n/a register unsigned char *p;
1267n/a register long x;
1268n/a
12691480466368 if (a->ob_shash != -1)
1270614001477 return a->ob_shash;
1271866464891 len = Py_SIZE(a);
1272866464891 p = (unsigned char *) a->ob_sval;
1273866464891 x = *p << 7;
127411031594862 while (--len >= 0)
12759298665080 x = (1000003*x) ^ *p++;
1276866464891 x ^= Py_SIZE(a);
1277866464891 if (x == -1)
12780 x = -2;
1279866464891 a->ob_shash = x;
1280866464891 return x;
1281n/a}
1282n/a
1283n/astatic PyObject*
1284n/astring_subscript(PyStringObject* self, PyObject* item)
128567161707{
128667161707 if (PyIndex_Check(item)) {
128767122301 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
128867122301 if (i == -1 && PyErr_Occurred())
12892 return NULL;
129067122299 if (i < 0)
1291135574 i += PyString_GET_SIZE(self);
129267122299 return string_item(self, i);
1293n/a }
129439406 else if (PySlice_Check(item)) {
1295n/a Py_ssize_t start, stop, step, slicelength, cur, i;
1296n/a char* source_buf;
1297n/a char* result_buf;
1298n/a PyObject* result;
1299n/a
130039401 if (PySlice_GetIndicesEx((PySliceObject*)item,
1301n/a PyString_GET_SIZE(self),
1302n/a &start, &stop, &step, &slicelength) < 0) {
13030 return NULL;
1304n/a }
1305n/a
130639401 if (slicelength <= 0) {
13073075 return PyString_FromStringAndSize("", 0);
1308n/a }
130936326 else if (start == 0 && step == 1 &&
1310n/a slicelength == PyString_GET_SIZE(self) &&
1311n/a PyString_CheckExact(self)) {
131244 Py_INCREF(self);
131344 return (PyObject *)self;
1314n/a }
131536282 else if (step == 1) {
1316404 return PyString_FromStringAndSize(
1317n/a PyString_AS_STRING(self) + start,
1318n/a slicelength);
1319n/a }
1320n/a else {
132135878 source_buf = PyString_AsString((PyObject*)self);
132235878 result_buf = (char *)PyMem_Malloc(slicelength);
132335878 if (result_buf == NULL)
13240 return PyErr_NoMemory();
1325n/a
13262666352 for (cur = start, i = 0; i < slicelength;
13272594596 cur += step, i++) {
13282594596 result_buf[i] = source_buf[cur];
1329n/a }
1330n/a
133135878 result = PyString_FromStringAndSize(result_buf,
1332n/a slicelength);
133335878 PyMem_Free(result_buf);
133435878 return result;
1335n/a }
1336n/a }
1337n/a else {
13385 PyErr_Format(PyExc_TypeError,
1339n/a "string indices must be integers, not %.200s",
1340n/a Py_TYPE(item)->tp_name);
13415 return NULL;
1342n/a }
1343n/a}
1344n/a
1345n/astatic Py_ssize_t
1346n/astring_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
13474580286{
13484580286 if ( index != 0 ) {
13490 PyErr_SetString(PyExc_SystemError,
1350n/a "accessing non-existent string segment");
13510 return -1;
1352n/a }
13534580286 *ptr = (void *)self->ob_sval;
13544580286 return Py_SIZE(self);
1355n/a}
1356n/a
1357n/astatic Py_ssize_t
1358n/astring_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
13592{
13602 PyErr_SetString(PyExc_TypeError,
1361n/a "Cannot use string as modifiable buffer");
13622 return -1;
1363n/a}
1364n/a
1365n/astatic Py_ssize_t
1366n/astring_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
13678006689{
13688006689 if ( lenp )
13690 *lenp = Py_SIZE(self);
13708006689 return 1;
1371n/a}
1372n/a
1373n/astatic Py_ssize_t
1374n/astring_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
13752849801{
13762849801 if ( index != 0 ) {
13770 PyErr_SetString(PyExc_SystemError,
1378n/a "accessing non-existent string segment");
13790 return -1;
1380n/a }
13812849801 *ptr = self->ob_sval;
13822849801 return Py_SIZE(self);
1383n/a}
1384n/a
1385n/astatic int
1386n/astring_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1387140714{
1388140714 return PyBuffer_FillInfo(view, (PyObject*)self,
1389n/a (void *)self->ob_sval, Py_SIZE(self),
1390n/a 1, flags);
1391n/a}
1392n/a
1393n/astatic PySequenceMethods string_as_sequence = {
1394n/a (lenfunc)string_length, /*sq_length*/
1395n/a (binaryfunc)string_concat, /*sq_concat*/
1396n/a (ssizeargfunc)string_repeat, /*sq_repeat*/
1397n/a (ssizeargfunc)string_item, /*sq_item*/
1398n/a (ssizessizeargfunc)string_slice, /*sq_slice*/
1399n/a 0, /*sq_ass_item*/
1400n/a 0, /*sq_ass_slice*/
1401n/a (objobjproc)string_contains /*sq_contains*/
1402n/a};
1403n/a
1404n/astatic PyMappingMethods string_as_mapping = {
1405n/a (lenfunc)string_length,
1406n/a (binaryfunc)string_subscript,
1407n/a 0,
1408n/a};
1409n/a
1410n/astatic PyBufferProcs string_as_buffer = {
1411n/a (readbufferproc)string_buffer_getreadbuf,
1412n/a (writebufferproc)string_buffer_getwritebuf,
1413n/a (segcountproc)string_buffer_getsegcount,
1414n/a (charbufferproc)string_buffer_getcharbuf,
1415n/a (getbufferproc)string_buffer_getbuffer,
1416n/a 0, /* XXX */
1417n/a};
1418n/a
1419n/a
1420n/a
1421n/a#define LEFTSTRIP 0
1422n/a#define RIGHTSTRIP 1
1423n/a#define BOTHSTRIP 2
1424n/a
1425n/a/* Arrays indexed by above */
1426n/astatic const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1427n/a
1428n/a#define STRIPNAME(i) (stripformat[i]+3)
1429n/a
1430n/aPyDoc_STRVAR(split__doc__,
1431n/a"S.split([sep [,maxsplit]]) -> list of strings\n\
1432n/a\n\
1433n/aReturn a list of the words in the string S, using sep as the\n\
1434n/adelimiter string. If maxsplit is given, at most maxsplit\n\
1435n/asplits are done. If sep is not specified or is None, any\n\
1436n/awhitespace string is a separator and empty strings are removed\n\
1437n/afrom the result.");
1438n/a
1439n/astatic PyObject *
1440n/astring_split(PyStringObject *self, PyObject *args)
14411284685{
14421284685 Py_ssize_t len = PyString_GET_SIZE(self), n;
14431284685 Py_ssize_t maxsplit = -1;
14441284685 const char *s = PyString_AS_STRING(self), *sub;
14451284685 PyObject *subobj = Py_None;
1446n/a
14471284685 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
14482 return NULL;
14491284683 if (maxsplit < 0)
14501280216 maxsplit = PY_SSIZE_T_MAX;
14511284683 if (subobj == Py_None)
1452333985 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1453950698 if (PyString_Check(subobj)) {
1454950692 sub = PyString_AS_STRING(subobj);
1455950692 n = PyString_GET_SIZE(subobj);
1456n/a }
1457n/a#ifdef Py_USING_UNICODE
14586 else if (PyUnicode_Check(subobj))
14596 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1460n/a#endif
14610 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
14620 return NULL;
1463n/a
1464950692 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1465n/a}
1466n/a
1467n/aPyDoc_STRVAR(partition__doc__,
1468n/a"S.partition(sep) -> (head, sep, tail)\n\
1469n/a\n\
1470n/aSearch for the separator sep in S, and return the part before it,\n\
1471n/athe separator itself, and the part after it. If the separator is not\n\
1472n/afound, return S and two empty strings.");
1473n/a
1474n/astatic PyObject *
1475n/astring_partition(PyStringObject *self, PyObject *sep_obj)
14765982{
1477n/a const char *sep;
1478n/a Py_ssize_t sep_len;
1479n/a
14805982 if (PyString_Check(sep_obj)) {
14815975 sep = PyString_AS_STRING(sep_obj);
14825975 sep_len = PyString_GET_SIZE(sep_obj);
1483n/a }
1484n/a#ifdef Py_USING_UNICODE
14857 else if (PyUnicode_Check(sep_obj))
14864 return PyUnicode_Partition((PyObject *) self, sep_obj);
1487n/a#endif
14883 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
14893 return NULL;
1490n/a
14915975 return stringlib_partition(
1492n/a (PyObject*) self,
1493n/a PyString_AS_STRING(self), PyString_GET_SIZE(self),
1494n/a sep_obj, sep, sep_len
1495n/a );
1496n/a}
1497n/a
1498n/aPyDoc_STRVAR(rpartition__doc__,
1499n/a"S.rpartition(sep) -> (head, sep, tail)\n\
1500n/a\n\
1501n/aSearch for the separator sep in S, starting at the end of S, and return\n\
1502n/athe part before it, the separator itself, and the part after it. If the\n\
1503n/aseparator is not found, return two empty strings and S.");
1504n/a
1505n/astatic PyObject *
1506n/astring_rpartition(PyStringObject *self, PyObject *sep_obj)
15072640{
1508n/a const char *sep;
1509n/a Py_ssize_t sep_len;
1510n/a
15112640 if (PyString_Check(sep_obj)) {
15122633 sep = PyString_AS_STRING(sep_obj);
15132633 sep_len = PyString_GET_SIZE(sep_obj);
1514n/a }
1515n/a#ifdef Py_USING_UNICODE
15167 else if (PyUnicode_Check(sep_obj))
15174 return PyUnicode_RPartition((PyObject *) self, sep_obj);
1518n/a#endif
15193 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
15203 return NULL;
1521n/a
15222633 return stringlib_rpartition(
1523n/a (PyObject*) self,
1524n/a PyString_AS_STRING(self), PyString_GET_SIZE(self),
1525n/a sep_obj, sep, sep_len
1526n/a );
1527n/a}
1528n/a
1529n/aPyDoc_STRVAR(rsplit__doc__,
1530n/a"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1531n/a\n\
1532n/aReturn a list of the words in the string S, using sep as the\n\
1533n/adelimiter string, starting at the end of the string and working\n\
1534n/ato the front. If maxsplit is given, at most maxsplit splits are\n\
1535n/adone. If sep is not specified or is None, any whitespace string\n\
1536n/ais a separator.");
1537n/a
1538n/astatic PyObject *
1539n/astring_rsplit(PyStringObject *self, PyObject *args)
15401109{
15411109 Py_ssize_t len = PyString_GET_SIZE(self), n;
15421109 Py_ssize_t maxsplit = -1;
15431109 const char *s = PyString_AS_STRING(self), *sub;
15441109 PyObject *subobj = Py_None;
1545n/a
15461109 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
15472 return NULL;
15481107 if (maxsplit < 0)
1549125 maxsplit = PY_SSIZE_T_MAX;
15501107 if (subobj == Py_None)
1551100 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
15521007 if (PyString_Check(subobj)) {
15531002 sub = PyString_AS_STRING(subobj);
15541002 n = PyString_GET_SIZE(subobj);
1555n/a }
1556n/a#ifdef Py_USING_UNICODE
15575 else if (PyUnicode_Check(subobj))
15585 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1559n/a#endif
15600 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
15610 return NULL;
1562n/a
15631002 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1564n/a}
1565n/a
1566n/a
1567n/aPyDoc_STRVAR(join__doc__,
1568n/a"S.join(iterable) -> string\n\
1569n/a\n\
1570n/aReturn a string which is the concatenation of the strings in the\n\
1571n/aiterable. The separator between elements is S.");
1572n/a
1573n/astatic PyObject *
1574n/astring_join(PyStringObject *self, PyObject *orig)
1575866867{
1576866867 char *sep = PyString_AS_STRING(self);
1577866867 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1578866867 PyObject *res = NULL;
1579n/a char *p;
1580866867 Py_ssize_t seqlen = 0;
1581866867 size_t sz = 0;
1582n/a Py_ssize_t i;
1583n/a PyObject *seq, *item;
1584n/a
1585866867 seq = PySequence_Fast(orig, "");
1586866867 if (seq == NULL) {
158727 return NULL;
1588n/a }
1589n/a
1590866840 seqlen = PySequence_Size(seq);
1591866840 if (seqlen == 0) {
1592183714 Py_DECREF(seq);
1593183714 return PyString_FromString("");
1594n/a }
1595683126 if (seqlen == 1) {
1596214037 item = PySequence_Fast_GET_ITEM(seq, 0);
1597214037 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1598214034 Py_INCREF(item);
1599214034 Py_DECREF(seq);
1600214034 return item;
1601n/a }
1602n/a }
1603n/a
1604n/a /* There are at least two things to join, or else we have a subclass
1605n/a * of the builtin types in the sequence.
1606n/a * Do a pre-pass to figure out the total amount of space we'll
1607n/a * need (sz), see whether any argument is absurd, and defer to
1608n/a * the Unicode join if appropriate.
1609n/a */
16109996789 for (i = 0; i < seqlen; i++) {
16119530763 const size_t old_sz = sz;
16129530763 item = PySequence_Fast_GET_ITEM(seq, i);
16139530763 if (!PyString_Check(item)){
1614n/a#ifdef Py_USING_UNICODE
16153066 if (PyUnicode_Check(item)) {
1616n/a /* Defer to Unicode join.
1617n/a * CAUTION: There's no gurantee that the
1618n/a * original sequence can be iterated over
1619n/a * again, so we must pass seq here.
1620n/a */
1621n/a PyObject *result;
16223058 result = PyUnicode_Join((PyObject *)self, seq);
16233058 Py_DECREF(seq);
16243058 return result;
1625n/a }
1626n/a#endif
16278 PyErr_Format(PyExc_TypeError,
1628n/a "sequence item %zd: expected string,"
1629n/a " %.80s found",
1630n/a i, Py_TYPE(item)->tp_name);
16318 Py_DECREF(seq);
16328 return NULL;
1633n/a }
16349527697 sz += PyString_GET_SIZE(item);
16359527697 if (i != 0)
16369061530 sz += seplen;
16379527697 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
16380 PyErr_SetString(PyExc_OverflowError,
1639n/a "join() result is too long for a Python string");
16400 Py_DECREF(seq);
16410 return NULL;
1642n/a }
1643n/a }
1644n/a
1645n/a /* Allocate result space. */
1646466026 res = PyString_FromStringAndSize((char*)NULL, sz);
1647466026 if (res == NULL) {
16480 Py_DECREF(seq);
16490 return NULL;
1650n/a }
1651n/a
1652n/a /* Catenate everything. */
1653466026 p = PyString_AS_STRING(res);
16549993572 for (i = 0; i < seqlen; ++i) {
1655n/a size_t n;
16569527546 item = PySequence_Fast_GET_ITEM(seq, i);
16579527546 n = PyString_GET_SIZE(item);
16589527546 Py_MEMCPY(p, PyString_AS_STRING(item), n);
16599527546 p += n;
16609527546 if (i < seqlen - 1) {
16619061520 Py_MEMCPY(p, sep, seplen);
16629061520 p += seplen;
1663n/a }
1664n/a }
1665n/a
1666466026 Py_DECREF(seq);
1667466026 return res;
1668n/a}
1669n/a
1670n/aPyObject *
1671n/a_PyString_Join(PyObject *sep, PyObject *x)
167253626{
167353626 assert(sep != NULL && PyString_Check(sep));
167453626 assert(x != NULL);
167553626 return string_join((PyStringObject *)sep, x);
1676n/a}
1677n/a
1678n/a/* helper macro to fixup start/end slice values */
1679n/a#define ADJUST_INDICES(start, end, len) \
1680n/a if (end > len) \
1681n/a end = len; \
1682n/a else if (end < 0) { \
1683n/a end += len; \
1684n/a if (end < 0) \
1685n/a end = 0; \
1686n/a } \
1687n/a if (start < 0) { \
1688n/a start += len; \
1689n/a if (start < 0) \
1690n/a start = 0; \
1691n/a }
1692n/a
1693n/aPy_LOCAL_INLINE(Py_ssize_t)
1694n/astring_find_internal(PyStringObject *self, PyObject *args, int dir)
1695181131402{
1696n/a PyObject *subobj;
1697n/a const char *sub;
1698n/a Py_ssize_t sub_len;
1699181131402 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1700181131402 PyObject *obj_start=Py_None, *obj_end=Py_None;
1701n/a
1702181131402 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1703n/a &obj_start, &obj_end))
170412 return -2;
1705n/a /* To support None in "start" and "end" arguments, meaning
1706n/a the same as if they were not passed.
1707n/a */
1708181131390 if (obj_start != Py_None)
1709592268 if (!_PyEval_SliceIndex(obj_start, &start))
17100 return -2;
1711181131390 if (obj_end != Py_None)
1712530298 if (!_PyEval_SliceIndex(obj_end, &end))
17130 return -2;
1714n/a
1715181131390 if (PyString_Check(subobj)) {
1716181131351 sub = PyString_AS_STRING(subobj);
1717181131351 sub_len = PyString_GET_SIZE(subobj);
1718n/a }
1719n/a#ifdef Py_USING_UNICODE
172039 else if (PyUnicode_Check(subobj))
172119 return PyUnicode_Find(
1722n/a (PyObject *)self, subobj, start, end, dir);
1723n/a#endif
172420 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1725n/a /* XXX - the "expected a character buffer object" is pretty
1726n/a confusing for a non-expert. remap to something else ? */
172720 return -2;
1728n/a
1729181131351 if (dir > 0)
17301024015 return stringlib_find_slice(
1731n/a PyString_AS_STRING(self), PyString_GET_SIZE(self),
1732n/a sub, sub_len, start, end);
1733n/a else
1734180107336 return stringlib_rfind_slice(
1735n/a PyString_AS_STRING(self), PyString_GET_SIZE(self),
1736n/a sub, sub_len, start, end);
1737n/a}
1738n/a
1739n/a
1740n/aPyDoc_STRVAR(find__doc__,
1741n/a"S.find(sub [,start [,end]]) -> int\n\
1742n/a\n\
1743n/aReturn the lowest index in S where substring sub is found,\n\
1744n/asuch that sub is contained within s[start:end]. Optional\n\
1745n/aarguments start and end are interpreted as in slice notation.\n\
1746n/a\n\
1747n/aReturn -1 on failure.");
1748n/a
1749n/astatic PyObject *
1750n/astring_find(PyStringObject *self, PyObject *args)
17511022259{
17521022259 Py_ssize_t result = string_find_internal(self, args, +1);
17531022259 if (result == -2)
17548 return NULL;
17551022251 return PyInt_FromSsize_t(result);
1756n/a}
1757n/a
1758n/a
1759n/aPyDoc_STRVAR(index__doc__,
1760n/a"S.index(sub [,start [,end]]) -> int\n\
1761n/a\n\
1762n/aLike S.find() but raise ValueError when the substring is not found.");
1763n/a
1764n/astatic PyObject *
1765n/astring_index(PyStringObject *self, PyObject *args)
17661780{
17671780 Py_ssize_t result = string_find_internal(self, args, +1);
17681780 if (result == -2)
17698 return NULL;
17701772 if (result == -1) {
177160 PyErr_SetString(PyExc_ValueError,
1772n/a "substring not found");
177360 return NULL;
1774n/a }
17751712 return PyInt_FromSsize_t(result);
1776n/a}
1777n/a
1778n/a
1779n/aPyDoc_STRVAR(rfind__doc__,
1780n/a"S.rfind(sub [,start [,end]]) -> int\n\
1781n/a\n\
1782n/aReturn the highest index in S where substring sub is found,\n\
1783n/asuch that sub is contained within s[start:end]. Optional\n\
1784n/aarguments start and end are interpreted as in slice notation.\n\
1785n/a\n\
1786n/aReturn -1 on failure.");
1787n/a
1788n/astatic PyObject *
1789n/astring_rfind(PyStringObject *self, PyObject *args)
1790180107198{
1791180107198 Py_ssize_t result = string_find_internal(self, args, -1);
1792180107198 if (result == -2)
17938 return NULL;
1794180107190 return PyInt_FromSsize_t(result);
1795n/a}
1796n/a
1797n/a
1798n/aPyDoc_STRVAR(rindex__doc__,
1799n/a"S.rindex(sub [,start [,end]]) -> int\n\
1800n/a\n\
1801n/aLike S.rfind() but raise ValueError when the substring is not found.");
1802n/a
1803n/astatic PyObject *
1804n/astring_rindex(PyStringObject *self, PyObject *args)
1805165{
1806165 Py_ssize_t result = string_find_internal(self, args, -1);
1807165 if (result == -2)
18088 return NULL;
1809157 if (result == -1) {
181037 PyErr_SetString(PyExc_ValueError,
1811n/a "substring not found");
181237 return NULL;
1813n/a }
1814120 return PyInt_FromSsize_t(result);
1815n/a}
1816n/a
1817n/a
1818n/aPy_LOCAL_INLINE(PyObject *)
1819n/ado_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1820323653{
1821323653 char *s = PyString_AS_STRING(self);
1822323653 Py_ssize_t len = PyString_GET_SIZE(self);
1823323653 char *sep = PyString_AS_STRING(sepobj);
1824323653 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1825n/a Py_ssize_t i, j;
1826n/a
1827323653 i = 0;
1828323653 if (striptype != RIGHTSTRIP) {
18291250735 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1830700199 i++;
1831n/a }
1832n/a }
1833n/a
1834323653 j = len;
1835323653 if (striptype != LEFTSTRIP) {
1836n/a do {
183798789 j--;
183898789 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
183950617 j++;
1840n/a }
1841n/a
1842323653 if (i == 0 && j == len && PyString_CheckExact(self)) {
184311400 Py_INCREF(self);
184411400 return (PyObject*)self;
1845n/a }
1846n/a else
1847312253 return PyString_FromStringAndSize(s+i, j-i);
1848n/a}
1849n/a
1850n/a
1851n/aPy_LOCAL_INLINE(PyObject *)
1852n/ado_strip(PyStringObject *self, int striptype)
1853987339{
1854987339 char *s = PyString_AS_STRING(self);
1855987339 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1856n/a
1857987339 i = 0;
1858987339 if (striptype != RIGHTSTRIP) {
18592562544 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1860606510 i++;
1861n/a }
1862n/a }
1863n/a
1864987339 j = len;
1865987339 if (striptype != LEFTSTRIP) {
1866n/a do {
18671782478 j--;
18681782478 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1869977808 j++;
1870n/a }
1871n/a
1872987339 if (i == 0 && j == len && PyString_CheckExact(self)) {
1873308443 Py_INCREF(self);
1874308443 return (PyObject*)self;
1875n/a }
1876n/a else
1877678896 return PyString_FromStringAndSize(s+i, j-i);
1878n/a}
1879n/a
1880n/a
1881n/aPy_LOCAL_INLINE(PyObject *)
1882n/ado_argstrip(PyStringObject *self, int striptype, PyObject *args)
1883324603{
1884324603 PyObject *sep = NULL;
1885n/a
1886324603 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
18876 return NULL;
1888n/a
1889324597 if (sep != NULL && sep != Py_None) {
1890323668 if (PyString_Check(sep))
1891323653 return do_xstrip(self, striptype, sep);
1892n/a#ifdef Py_USING_UNICODE
189315 else if (PyUnicode_Check(sep)) {
189415 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1895n/a PyObject *res;
189615 if (uniself==NULL)
18970 return NULL;
189815 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1899n/a striptype, sep);
190015 Py_DECREF(uniself);
190115 return res;
1902n/a }
1903n/a#endif
19040 PyErr_Format(PyExc_TypeError,
1905n/a#ifdef Py_USING_UNICODE
1906n/a "%s arg must be None, str or unicode",
1907n/a#else
1908n/a "%s arg must be None or str",
1909n/a#endif
1910n/a STRIPNAME(striptype));
19110 return NULL;
1912n/a }
1913n/a
1914929 return do_strip(self, striptype);
1915n/a}
1916n/a
1917n/a
1918n/aPyDoc_STRVAR(strip__doc__,
1919n/a"S.strip([chars]) -> string or unicode\n\
1920n/a\n\
1921n/aReturn a copy of the string S with leading and trailing\n\
1922n/awhitespace removed.\n\
1923n/aIf chars is given and not None, remove characters in chars instead.\n\
1924n/aIf chars is unicode, S will be converted to unicode before stripping");
1925n/a
1926n/astatic PyObject *
1927n/astring_strip(PyStringObject *self, PyObject *args)
1928970725{
1929970725 if (PyTuple_GET_SIZE(args) == 0)
1930968377 return do_strip(self, BOTHSTRIP); /* Common case */
1931n/a else
19322348 return do_argstrip(self, BOTHSTRIP, args);
1933n/a}
1934n/a
1935n/a
1936n/aPyDoc_STRVAR(lstrip__doc__,
1937n/a"S.lstrip([chars]) -> string or unicode\n\
1938n/a\n\
1939n/aReturn a copy of the string S with leading whitespace removed.\n\
1940n/aIf chars is given and not None, remove characters in chars instead.\n\
1941n/aIf chars is unicode, S will be converted to unicode before stripping");
1942n/a
1943n/astatic PyObject *
1944n/astring_lstrip(PyStringObject *self, PyObject *args)
1945282574{
1946282574 if (PyTuple_GET_SIZE(args) == 0)
19479183 return do_strip(self, LEFTSTRIP); /* Common case */
1948n/a else
1949273391 return do_argstrip(self, LEFTSTRIP, args);
1950n/a}
1951n/a
1952n/a
1953n/aPyDoc_STRVAR(rstrip__doc__,
1954n/a"S.rstrip([chars]) -> string or unicode\n\
1955n/a\n\
1956n/aReturn a copy of the string S with trailing whitespace removed.\n\
1957n/aIf chars is given and not None, remove characters in chars instead.\n\
1958n/aIf chars is unicode, S will be converted to unicode before stripping");
1959n/a
1960n/astatic PyObject *
1961n/astring_rstrip(PyStringObject *self, PyObject *args)
196257714{
196357714 if (PyTuple_GET_SIZE(args) == 0)
19648850 return do_strip(self, RIGHTSTRIP); /* Common case */
1965n/a else
196648864 return do_argstrip(self, RIGHTSTRIP, args);
1967n/a}
1968n/a
1969n/a
1970n/aPyDoc_STRVAR(lower__doc__,
1971n/a"S.lower() -> string\n\
1972n/a\n\
1973n/aReturn a copy of the string S converted to lowercase.");
1974n/a
1975n/a/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1976n/a#ifndef _tolower
1977n/a#define _tolower tolower
1978n/a#endif
1979n/a
1980n/astatic PyObject *
1981n/astring_lower(PyStringObject *self)
1982269806{
1983n/a char *s;
1984269806 Py_ssize_t i, n = PyString_GET_SIZE(self);
1985n/a PyObject *newobj;
1986n/a
1987269806 newobj = PyString_FromStringAndSize(NULL, n);
1988269806 if (!newobj)
19890 return NULL;
1990n/a
1991269806 s = PyString_AS_STRING(newobj);
1992n/a
1993269806 Py_MEMCPY(s, PyString_AS_STRING(self), n);
1994n/a
19952411390 for (i = 0; i < n; i++) {
19962141584 int c = Py_CHARMASK(s[i]);
19972141584 if (isupper(c))
1998255463 s[i] = _tolower(c);
1999n/a }
2000n/a
2001269806 return newobj;
2002n/a}
2003n/a
2004n/aPyDoc_STRVAR(upper__doc__,
2005n/a"S.upper() -> string\n\
2006n/a\n\
2007n/aReturn a copy of the string S converted to uppercase.");
2008n/a
2009n/a#ifndef _toupper
2010n/a#define _toupper toupper
2011n/a#endif
2012n/a
2013n/astatic PyObject *
2014n/astring_upper(PyStringObject *self)
20155793{
2016n/a char *s;
20175793 Py_ssize_t i, n = PyString_GET_SIZE(self);
2018n/a PyObject *newobj;
2019n/a
20205793 newobj = PyString_FromStringAndSize(NULL, n);
20215793 if (!newobj)
20220 return NULL;
2023n/a
20245793 s = PyString_AS_STRING(newobj);
2025n/a
20265793 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2027n/a
202827100 for (i = 0; i < n; i++) {
202921307 int c = Py_CHARMASK(s[i]);
203021307 if (islower(c))
203117983 s[i] = _toupper(c);
2032n/a }
2033n/a
20345793 return newobj;
2035n/a}
2036n/a
2037n/aPyDoc_STRVAR(title__doc__,
2038n/a"S.title() -> string\n\
2039n/a\n\
2040n/aReturn a titlecased version of S, i.e. words start with uppercase\n\
2041n/acharacters, all remaining cased characters have lowercase.");
2042n/a
2043n/astatic PyObject*
2044n/astring_title(PyStringObject *self)
20451000{
20461000 char *s = PyString_AS_STRING(self), *s_new;
20471000 Py_ssize_t i, n = PyString_GET_SIZE(self);
20481000 int previous_is_cased = 0;
2049n/a PyObject *newobj;
2050n/a
20511000 newobj = PyString_FromStringAndSize(NULL, n);
20521000 if (newobj == NULL)
20530 return NULL;
20541000 s_new = PyString_AsString(newobj);
205512509 for (i = 0; i < n; i++) {
205611509 int c = Py_CHARMASK(*s++);
205711509 if (islower(c)) {
20589581 if (!previous_is_cased)
2059904 c = toupper(c);
20609581 previous_is_cased = 1;
20611928 } else if (isupper(c)) {
20621815 if (previous_is_cased)
20631638 c = tolower(c);
20641815 previous_is_cased = 1;
2065n/a } else
2066113 previous_is_cased = 0;
206711509 *s_new++ = c;
2068n/a }
20691000 return newobj;
2070n/a}
2071n/a
2072n/aPyDoc_STRVAR(capitalize__doc__,
2073n/a"S.capitalize() -> string\n\
2074n/a\n\
2075n/aReturn a copy of the string S with only its first character\n\
2076n/acapitalized.");
2077n/a
2078n/astatic PyObject *
2079n/astring_capitalize(PyStringObject *self)
2080397{
2081397 char *s = PyString_AS_STRING(self), *s_new;
2082397 Py_ssize_t i, n = PyString_GET_SIZE(self);
2083n/a PyObject *newobj;
2084n/a
2085397 newobj = PyString_FromStringAndSize(NULL, n);
2086397 if (newobj == NULL)
20870 return NULL;
2088397 s_new = PyString_AsString(newobj);
2089397 if (0 < n) {
2090395 int c = Py_CHARMASK(*s++);
2091395 if (islower(c))
209258 *s_new = toupper(c);
2093n/a else
2094337 *s_new = c;
2095395 s_new++;
2096n/a }
20978077 for (i = 1; i < n; i++) {
20987680 int c = Py_CHARMASK(*s++);
20997680 if (isupper(c))
210053 *s_new = tolower(c);
2101n/a else
21027627 *s_new = c;
21037680 s_new++;
2104n/a }
2105397 return newobj;
2106n/a}
2107n/a
2108n/a
2109n/aPyDoc_STRVAR(count__doc__,
2110n/a"S.count(sub[, start[, end]]) -> int\n\
2111n/a\n\
2112n/aReturn the number of non-overlapping occurrences of substring sub in\n\
2113n/astring S[start:end]. Optional arguments start and end are interpreted\n\
2114n/aas in slice notation.");
2115n/a
2116n/astatic PyObject *
2117n/astring_count(PyStringObject *self, PyObject *args)
2118479147{
2119n/a PyObject *sub_obj;
2120479147 const char *str = PyString_AS_STRING(self), *sub;
2121n/a Py_ssize_t sub_len;
2122479147 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2123n/a
2124479147 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2125n/a _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
21263 return NULL;
2127n/a
2128479144 if (PyString_Check(sub_obj)) {
2129479137 sub = PyString_AS_STRING(sub_obj);
2130479137 sub_len = PyString_GET_SIZE(sub_obj);
2131n/a }
2132n/a#ifdef Py_USING_UNICODE
21337 else if (PyUnicode_Check(sub_obj)) {
2134n/a Py_ssize_t count;
21352 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
21362 if (count == -1)
21370 return NULL;
2138n/a else
21392 return PyInt_FromSsize_t(count);
2140n/a }
2141n/a#endif
21425 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
21435 return NULL;
2144n/a
2145479137 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2146n/a
2147479137 return PyInt_FromSsize_t(
2148n/a stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2149n/a );
2150n/a}
2151n/a
2152n/aPyDoc_STRVAR(swapcase__doc__,
2153n/a"S.swapcase() -> string\n\
2154n/a\n\
2155n/aReturn a copy of the string S with uppercase characters\n\
2156n/aconverted to lowercase and vice versa.");
2157n/a
2158n/astatic PyObject *
2159n/astring_swapcase(PyStringObject *self)
216014{
216114 char *s = PyString_AS_STRING(self), *s_new;
216214 Py_ssize_t i, n = PyString_GET_SIZE(self);
2163n/a PyObject *newobj;
2164n/a
216514 newobj = PyString_FromStringAndSize(NULL, n);
216614 if (newobj == NULL)
21670 return NULL;
216814 s_new = PyString_AsString(newobj);
21697246 for (i = 0; i < n; i++) {
21707232 int c = Py_CHARMASK(*s++);
21717232 if (islower(c)) {
21722413 *s_new = toupper(c);
2173n/a }
21744819 else if (isupper(c)) {
21751759 *s_new = tolower(c);
2176n/a }
2177n/a else
21783060 *s_new = c;
21797232 s_new++;
2180n/a }
218114 return newobj;
2182n/a}
2183n/a
2184n/a
2185n/aPyDoc_STRVAR(translate__doc__,
2186n/a"S.translate(table [,deletechars]) -> string\n\
2187n/a\n\
2188n/aReturn a copy of the string S, where all characters occurring\n\
2189n/ain the optional argument deletechars are removed, and the\n\
2190n/aremaining characters have been mapped through the given\n\
2191n/atranslation table, which must be a string of length 256.");
2192n/a
2193n/astatic PyObject *
2194n/astring_translate(PyStringObject *self, PyObject *args)
21954466{
2196n/a register char *input, *output;
2197n/a const char *table;
21984466 register Py_ssize_t i, c, changed = 0;
21994466 PyObject *input_obj = (PyObject*)self;
22004466 const char *output_start, *del_table=NULL;
22014466 Py_ssize_t inlen, tablen, dellen = 0;
2202n/a PyObject *result;
2203n/a int trans_table[256];
22044466 PyObject *tableobj, *delobj = NULL;
2205n/a
22064466 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2207n/a &tableobj, &delobj))
22080 return NULL;
2209n/a
22104466 if (PyString_Check(tableobj)) {
22114457 table = PyString_AS_STRING(tableobj);
22124457 tablen = PyString_GET_SIZE(tableobj);
2213n/a }
22149 else if (tableobj == Py_None) {
22159 table = NULL;
22169 tablen = 256;
2217n/a }
2218n/a#ifdef Py_USING_UNICODE
22190 else if (PyUnicode_Check(tableobj)) {
2220n/a /* Unicode .translate() does not support the deletechars
2221n/a parameter; instead a mapping to None will cause characters
2222n/a to be deleted. */
22230 if (delobj != NULL) {
22240 PyErr_SetString(PyExc_TypeError,
2225n/a "deletions are implemented differently for unicode");
22260 return NULL;
2227n/a }
22280 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2229n/a }
2230n/a#endif
22310 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
22320 return NULL;
2233n/a
22344466 if (tablen != 256) {
22356 PyErr_SetString(PyExc_ValueError,
2236n/a "translation table must be 256 characters long");
22376 return NULL;
2238n/a }
2239n/a
22404460 if (delobj != NULL) {
224147 if (PyString_Check(delobj)) {
224247 del_table = PyString_AS_STRING(delobj);
224347 dellen = PyString_GET_SIZE(delobj);
2244n/a }
2245n/a#ifdef Py_USING_UNICODE
22460 else if (PyUnicode_Check(delobj)) {
22470 PyErr_SetString(PyExc_TypeError,
2248n/a "deletions are implemented differently for unicode");
22490 return NULL;
2250n/a }
2251n/a#endif
22520 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
22530 return NULL;
2254n/a }
2255n/a else {
22564413 del_table = NULL;
22574413 dellen = 0;
2258n/a }
2259n/a
22604460 inlen = PyString_GET_SIZE(input_obj);
22614460 result = PyString_FromStringAndSize((char *)NULL, inlen);
22624460 if (result == NULL)
22630 return NULL;
22644460 output_start = output = PyString_AsString(result);
22654460 input = PyString_AS_STRING(input_obj);
2266n/a
22674460 if (dellen == 0 && table != NULL) {
2268n/a /* If no deletions are required, use faster code */
2269146032 for (i = inlen; --i >= 0; ) {
2270137210 c = Py_CHARMASK(*input++);
2271137210 if (Py_CHARMASK((*output++ = table[c])) != c)
227266449 changed = 1;
2273n/a }
22744411 if (changed || !PyString_CheckExact(input_obj))
22751592 return result;
22762819 Py_DECREF(result);
22772819 Py_INCREF(input_obj);
22782819 return input_obj;
2279n/a }
2280n/a
228149 if (table == NULL) {
22822313 for (i = 0; i < 256; i++)
22832304 trans_table[i] = Py_CHARMASK(i);
2284n/a } else {
228510280 for (i = 0; i < 256; i++)
228610240 trans_table[i] = Py_CHARMASK(table[i]);
2287n/a }
2288n/a
22892454 for (i = 0; i < dellen; i++)
22902405 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2291n/a
2292380 for (i = inlen; --i >= 0; ) {
2293282 c = Py_CHARMASK(*input++);
2294282 if (trans_table[c] != -1)
229576 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
229667 continue;
2297215 changed = 1;
2298n/a }
229949 if (!changed && PyString_CheckExact(input_obj)) {
23006 Py_DECREF(result);
23016 Py_INCREF(input_obj);
23026 return input_obj;
2303n/a }
2304n/a /* Fix the size of the resulting string */
230543 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
23060 return NULL;
230743 return result;
2308n/a}
2309n/a
2310n/a
2311n/a/* find and count characters and substrings */
2312n/a
2313n/a#define findchar(target, target_len, c) \
2314n/a ((char *)memchr((const void *)(target), c, target_len))
2315n/a
2316n/a/* String ops must return a string. */
2317n/a/* If the object is subclass of string, create a copy */
2318n/aPy_LOCAL(PyStringObject *)
2319n/areturn_self(PyStringObject *self)
23201638610{
23211638610 if (PyString_CheckExact(self)) {
23221638571 Py_INCREF(self);
23231638571 return self;
2324n/a }
232539 return (PyStringObject *)PyString_FromStringAndSize(
2326n/a PyString_AS_STRING(self),
2327n/a PyString_GET_SIZE(self));
2328n/a}
2329n/a
2330n/aPy_LOCAL_INLINE(Py_ssize_t)
2331n/acountchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2332462253{
2333462253 Py_ssize_t count=0;
2334462253 const char *start=target;
2335462253 const char *end=target+target_len;
2336n/a
23371020085 while ( (start=findchar(start, end-start, c)) != NULL ) {
233895629 count++;
233995629 if (count >= maxcount)
234050 break;
234195579 start += 1;
2342n/a }
2343462253 return count;
2344n/a}
2345n/a
2346n/a
2347n/a/* Algorithms for different cases of string replacement */
2348n/a
2349n/a/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2350n/aPy_LOCAL(PyStringObject *)
2351n/areplace_interleave(PyStringObject *self,
2352n/a const char *to_s, Py_ssize_t to_len,
2353n/a Py_ssize_t maxcount)
235465{
2355n/a char *self_s, *result_s;
2356n/a Py_ssize_t self_len, result_len;
2357n/a Py_ssize_t count, i, product;
2358n/a PyStringObject *result;
2359n/a
236065 self_len = PyString_GET_SIZE(self);
2361n/a
2362n/a /* 1 at the end plus 1 after every character */
236365 count = self_len+1;
236465 if (maxcount < count)
236515 count = maxcount;
2366n/a
2367n/a /* Check for overflow */
2368n/a /* result_len = count * to_len + self_len; */
236965 product = count * to_len;
237065 if (product / to_len != count) {
23710 PyErr_SetString(PyExc_OverflowError,
2372n/a "replace string is too long");
23730 return NULL;
2374n/a }
237565 result_len = product + self_len;
237665 if (result_len < 0) {
23770 PyErr_SetString(PyExc_OverflowError,
2378n/a "replace string is too long");
23790 return NULL;
2380n/a }
2381n/a
238265 if (! (result = (PyStringObject *)
2383n/a PyString_FromStringAndSize(NULL, result_len)) )
23840 return NULL;
2385n/a
238665 self_s = PyString_AS_STRING(self);
238765 result_s = PyString_AS_STRING(result);
2388n/a
2389n/a /* TODO: special case single character, which doesn't need memcpy */
2390n/a
2391n/a /* Lay the first one down (guaranteed this will occur) */
239265 Py_MEMCPY(result_s, to_s, to_len);
239365 result_s += to_len;
239465 count -= 1;
2395n/a
2396160 for (i=0; i<count; i++) {
239795 *result_s++ = *self_s++;
239895 Py_MEMCPY(result_s, to_s, to_len);
239995 result_s += to_len;
2400n/a }
2401n/a
2402n/a /* Copy the rest of the original string */
240365 Py_MEMCPY(result_s, self_s, self_len-i);
2404n/a
240565 return result;
2406n/a}
2407n/a
2408n/a/* Special case for deleting a single character */
2409n/a/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2410n/aPy_LOCAL(PyStringObject *)
2411n/areplace_delete_single_character(PyStringObject *self,
2412n/a char from_c, Py_ssize_t maxcount)
2413454497{
2414n/a char *self_s, *result_s;
2415n/a char *start, *next, *end;
2416n/a Py_ssize_t self_len, result_len;
2417n/a Py_ssize_t count;
2418n/a PyStringObject *result;
2419n/a
2420454497 self_len = PyString_GET_SIZE(self);
2421454497 self_s = PyString_AS_STRING(self);
2422n/a
2423454497 count = countchar(self_s, self_len, from_c, maxcount);
2424454497 if (count == 0) {
2425410242 return return_self(self);
2426n/a }
2427n/a
242844255 result_len = self_len - count; /* from_len == 1 */
242944255 assert(result_len>=0);
2430n/a
243144255 if ( (result = (PyStringObject *)
2432n/a PyString_FromStringAndSize(NULL, result_len)) == NULL)
24330 return NULL;
243444255 result_s = PyString_AS_STRING(result);
2435n/a
243644255 start = self_s;
243744255 end = self_s + self_len;
2438165557 while (count-- > 0) {
243977047 next = findchar(start, end-start, from_c);
244077047 if (next == NULL)
24410 break;
244277047 Py_MEMCPY(result_s, start, next-start);
244377047 result_s += (next-start);
244477047 start = next+1;
2445n/a }
244644255 Py_MEMCPY(result_s, start, end-start);
2447n/a
244844255 return result;
2449n/a}
2450n/a
2451n/a/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2452n/a
2453n/aPy_LOCAL(PyStringObject *)
2454n/areplace_delete_substring(PyStringObject *self,
2455n/a const char *from_s, Py_ssize_t from_len,
2456440026 Py_ssize_t maxcount) {
2457n/a char *self_s, *result_s;
2458n/a char *start, *next, *end;
2459n/a Py_ssize_t self_len, result_len;
2460n/a Py_ssize_t count, offset;
2461n/a PyStringObject *result;
2462n/a
2463440026 self_len = PyString_GET_SIZE(self);
2464440026 self_s = PyString_AS_STRING(self);
2465n/a
2466440026 count = stringlib_count(self_s, self_len,
2467n/a from_s, from_len,
2468n/a maxcount);
2469n/a
2470440026 if (count == 0) {
2471n/a /* no matches */
2472369117 return return_self(self);
2473n/a }
2474n/a
247570909 result_len = self_len - (count * from_len);
247670909 assert (result_len>=0);
2477n/a
247870909 if ( (result = (PyStringObject *)
2479n/a PyString_FromStringAndSize(NULL, result_len)) == NULL )
24800 return NULL;
2481n/a
248270909 result_s = PyString_AS_STRING(result);
2483n/a
248470909 start = self_s;
248570909 end = self_s + self_len;
2486218774 while (count-- > 0) {
248776956 offset = stringlib_find(start, end-start,
2488n/a from_s, from_len,
2489n/a 0);
249076956 if (offset == -1)
24910 break;
249276956 next = start + offset;
2493n/a
249476956 Py_MEMCPY(result_s, start, next-start);
2495n/a
249676956 result_s += (next-start);
249776956 start = next+from_len;
2498n/a }
249970909 Py_MEMCPY(result_s, start, end-start);
250070909 return result;
2501n/a}
2502n/a
2503n/a/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2504n/aPy_LOCAL(PyStringObject *)
2505n/areplace_single_character_in_place(PyStringObject *self,
2506n/a char from_c, char to_c,
2507n/a Py_ssize_t maxcount)
250818117{
2509n/a char *self_s, *result_s, *start, *end, *next;
2510n/a Py_ssize_t self_len;
2511n/a PyStringObject *result;
2512n/a
2513n/a /* The result string will be the same size */
251418117 self_s = PyString_AS_STRING(self);
251518117 self_len = PyString_GET_SIZE(self);
2516n/a
251718117 next = findchar(self_s, self_len, from_c);
2518n/a
251918117 if (next == NULL) {
2520n/a /* No matches; return the original string */
25219638 return return_self(self);
2522n/a }
2523n/a
2524n/a /* Need to make a new string */
25258479 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
25268479 if (result == NULL)
25270 return NULL;
25288479 result_s = PyString_AS_STRING(result);
25298479 Py_MEMCPY(result_s, self_s, self_len);
2530n/a
2531n/a /* change everything in-place, starting with this one */
25328479 start = result_s + (next-self_s);
25338479 *start = to_c;
25348479 start++;
25358479 end = result_s + self_len;
2536n/a
253737550 while (--maxcount > 0) {
253829045 next = findchar(start, end-start, from_c);
253929045 if (next == NULL)
25408453 break;
254120592 *next = to_c;
254220592 start = next+1;
2543n/a }
2544n/a
25458479 return result;
2546n/a}
2547n/a
2548n/a/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2549n/aPy_LOCAL(PyStringObject *)
2550n/areplace_substring_in_place(PyStringObject *self,
2551n/a const char *from_s, Py_ssize_t from_len,
2552n/a const char *to_s, Py_ssize_t to_len,
2553n/a Py_ssize_t maxcount)
2554932{
2555n/a char *result_s, *start, *end;
2556n/a char *self_s;
2557n/a Py_ssize_t self_len, offset;
2558n/a PyStringObject *result;
2559n/a
2560n/a /* The result string will be the same size */
2561n/a
2562932 self_s = PyString_AS_STRING(self);
2563932 self_len = PyString_GET_SIZE(self);
2564n/a
2565932 offset = stringlib_find(self_s, self_len,
2566n/a from_s, from_len,
2567n/a 0);
2568932 if (offset == -1) {
2569n/a /* No matches; return the original string */
2570592 return return_self(self);
2571n/a }
2572n/a
2573n/a /* Need to make a new string */
2574340 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2575340 if (result == NULL)
25760 return NULL;
2577340 result_s = PyString_AS_STRING(result);
2578340 Py_MEMCPY(result_s, self_s, self_len);
2579n/a
2580n/a /* change everything in-place, starting with this one */
2581340 start = result_s + offset;
2582340 Py_MEMCPY(start, to_s, from_len);
2583340 start += from_len;
2584340 end = result_s + self_len;
2585n/a
2586745 while ( --maxcount > 0) {
2587390 offset = stringlib_find(start, end-start,
2588n/a from_s, from_len,
2589n/a 0);
2590390 if (offset==-1)
2591325 break;
259265 Py_MEMCPY(start+offset, to_s, from_len);
259365 start += offset+from_len;
2594n/a }
2595n/a
2596340 return result;
2597n/a}
2598n/a
2599n/a/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2600n/aPy_LOCAL(PyStringObject *)
2601n/areplace_single_character(PyStringObject *self,
2602n/a char from_c,
2603n/a const char *to_s, Py_ssize_t to_len,
2604n/a Py_ssize_t maxcount)
26057756{
2606n/a char *self_s, *result_s;
2607n/a char *start, *next, *end;
2608n/a Py_ssize_t self_len, result_len;
2609n/a Py_ssize_t count, product;
2610n/a PyStringObject *result;
2611n/a
26127756 self_s = PyString_AS_STRING(self);
26137756 self_len = PyString_GET_SIZE(self);
2614n/a
26157756 count = countchar(self_s, self_len, from_c, maxcount);
26167756 if (count == 0) {
2617n/a /* no matches, return unchanged */
26187212 return return_self(self);
2619n/a }
2620n/a
2621n/a /* use the difference between current and new, hence the "-1" */
2622n/a /* result_len = self_len + count * (to_len-1) */
2623544 product = count * (to_len-1);
2624544 if (product / (to_len-1) != count) {
26250 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
26260 return NULL;
2627n/a }
2628544 result_len = self_len + product;
2629544 if (result_len < 0) {
26300 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
26310 return NULL;
2632n/a }
2633n/a
2634544 if ( (result = (PyStringObject *)
2635n/a PyString_FromStringAndSize(NULL, result_len)) == NULL)
26360 return NULL;
2637544 result_s = PyString_AS_STRING(result);
2638n/a
2639544 start = self_s;
2640544 end = self_s + self_len;
264119670 while (count-- > 0) {
264218582 next = findchar(start, end-start, from_c);
264318582 if (next == NULL)
26440 break;
2645n/a
264618582 if (next == start) {
2647n/a /* replace with the 'to' */
2648624 Py_MEMCPY(result_s, to_s, to_len);
2649624 result_s += to_len;
2650624 start += 1;
2651n/a } else {
2652n/a /* copy the unchanged old then the 'to' */
265317958 Py_MEMCPY(result_s, start, next-start);
265417958 result_s += (next-start);
265517958 Py_MEMCPY(result_s, to_s, to_len);
265617958 result_s += to_len;
265717958 start = next+1;
2658n/a }
2659n/a }
2660n/a /* Copy the remainder of the remaining string */
2661544 Py_MEMCPY(result_s, start, end-start);
2662n/a
2663544 return result;
2664n/a}
2665n/a
2666n/a/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2667n/aPy_LOCAL(PyStringObject *)
2668n/areplace_substring(PyStringObject *self,
2669n/a const char *from_s, Py_ssize_t from_len,
2670n/a const char *to_s, Py_ssize_t to_len,
2671793514 Py_ssize_t maxcount) {
2672n/a char *self_s, *result_s;
2673n/a char *start, *next, *end;
2674n/a Py_ssize_t self_len, result_len;
2675n/a Py_ssize_t count, offset, product;
2676n/a PyStringObject *result;
2677n/a
2678793514 self_s = PyString_AS_STRING(self);
2679793514 self_len = PyString_GET_SIZE(self);
2680n/a
2681793514 count = stringlib_count(self_s, self_len,
2682n/a from_s, from_len,
2683n/a maxcount);
2684n/a
2685793514 if (count == 0) {
2686n/a /* no matches, return unchanged */
2687790926 return return_self(self);
2688n/a }
2689n/a
2690n/a /* Check for overflow */
2691n/a /* result_len = self_len + count * (to_len-from_len) */
26922588 product = count * (to_len-from_len);
26932588 if (product / (to_len-from_len) != count) {
26940 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
26950 return NULL;
2696n/a }
26972588 result_len = self_len + product;
26982588 if (result_len < 0) {
26990 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
27000 return NULL;
2701n/a }
2702n/a
27032588 if ( (result = (PyStringObject *)
2704n/a PyString_FromStringAndSize(NULL, result_len)) == NULL)
27050 return NULL;
27062588 result_s = PyString_AS_STRING(result);
2707n/a
27082588 start = self_s;
27092588 end = self_s + self_len;
271011208 while (count-- > 0) {
27116032 offset = stringlib_find(start, end-start,
2712n/a from_s, from_len,
2713n/a 0);
27146032 if (offset == -1)
27150 break;
27166032 next = start+offset;
27176032 if (next == start) {
2718n/a /* replace with the 'to' */
2719719 Py_MEMCPY(result_s, to_s, to_len);
2720719 result_s += to_len;
2721719 start += from_len;
2722n/a } else {
2723n/a /* copy the unchanged old then the 'to' */
27245313 Py_MEMCPY(result_s, start, next-start);
27255313 result_s += (next-start);
27265313 Py_MEMCPY(result_s, to_s, to_len);
27275313 result_s += to_len;
27285313 start = next+from_len;
2729n/a }
2730n/a }
2731n/a /* Copy the remainder of the remaining string */
27322588 Py_MEMCPY(result_s, start, end-start);
2733n/a
27342588 return result;
2735n/a}
2736n/a
2737n/a
2738n/aPy_LOCAL(PyStringObject *)
2739n/areplace(PyStringObject *self,
2740n/a const char *from_s, Py_ssize_t from_len,
2741n/a const char *to_s, Py_ssize_t to_len,
2742n/a Py_ssize_t maxcount)
27431765790{
27441765790 if (maxcount < 0) {
27451765489 maxcount = PY_SSIZE_T_MAX;
2746301 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2747n/a /* nothing to do; return the original string */
274878 return return_self(self);
2749n/a }
2750n/a
27511765712 if (maxcount == 0 ||
2752n/a (from_len == 0 && to_len == 0)) {
2753n/a /* nothing to do; return the original string */
275418 return return_self(self);
2755n/a }
2756n/a
2757n/a /* Handle zero-length special cases */
2758n/a
27591765694 if (from_len == 0) {
2760n/a /* insert the 'to' string everywhere. */
2761n/a /* >>> "Python".replace("", ".") */
2762n/a /* '.P.y.t.h.o.n.' */
276365 return replace_interleave(self, to_s, to_len, maxcount);
2764n/a }
2765n/a
2766n/a /* Except for "".replace("", "A") == "A" there is no way beyond this */
2767n/a /* point for an empty self string to generate a non-empty string */
2768n/a /* Special case so the remaining code always gets a non-empty string */
27691765629 if (PyString_GET_SIZE(self) == 0) {
277050787 return return_self(self);
2771n/a }
2772n/a
27731714842 if (to_len == 0) {
2774n/a /* delete all occurances of 'from' string */
2775894523 if (from_len == 1) {
2776454497 return replace_delete_single_character(
2777n/a self, from_s[0], maxcount);
2778n/a } else {
2779440026 return replace_delete_substring(self, from_s, from_len, maxcount);
2780n/a }
2781n/a }
2782n/a
2783n/a /* Handle special case where both strings have the same length */
2784n/a
2785820319 if (from_len == to_len) {
278619049 if (from_len == 1) {
278718117 return replace_single_character_in_place(
2788n/a self,
2789n/a from_s[0],
2790n/a to_s[0],
2791n/a maxcount);
2792n/a } else {
2793932 return replace_substring_in_place(
2794n/a self, from_s, from_len, to_s, to_len, maxcount);
2795n/a }
2796n/a }
2797n/a
2798n/a /* Otherwise use the more generic algorithms */
2799801270 if (from_len == 1) {
28007756 return replace_single_character(self, from_s[0],
2801n/a to_s, to_len, maxcount);
2802n/a } else {
2803n/a /* len('from')>=2, len('to')>=1 */
2804793514 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2805n/a }
2806n/a}
2807n/a
2808n/aPyDoc_STRVAR(replace__doc__,
2809n/a"S.replace(old, new[, count]) -> string\n\
2810n/a\n\
2811n/aReturn a copy of string S with all occurrences of substring\n\
2812n/aold replaced by new. If the optional argument count is\n\
2813n/agiven, only the first count occurrences are replaced.");
2814n/a
2815n/astatic PyObject *
2816n/astring_replace(PyStringObject *self, PyObject *args)
28171765806{
28181765806 Py_ssize_t count = -1;
2819n/a PyObject *from, *to;
2820n/a const char *from_s, *to_s;
2821n/a Py_ssize_t from_len, to_len;
2822n/a
28231765806 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
28244 return NULL;
2825n/a
28261765802 if (PyString_Check(from)) {
28271765785 from_s = PyString_AS_STRING(from);
28281765785 from_len = PyString_GET_SIZE(from);
2829n/a }
2830n/a#ifdef Py_USING_UNICODE
28311765802 if (PyUnicode_Check(from))
28322 return PyUnicode_Replace((PyObject *)self,
2833n/a from, to, count);
2834n/a#endif
28351765800 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
28365 return NULL;
2837n/a
28381765795 if (PyString_Check(to)) {
28391765780 to_s = PyString_AS_STRING(to);
28401765780 to_len = PyString_GET_SIZE(to);
2841n/a }
2842n/a#ifdef Py_USING_UNICODE
284315 else if (PyUnicode_Check(to))
28440 return PyUnicode_Replace((PyObject *)self,
2845n/a from, to, count);
2846n/a#endif
284715 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
28485 return NULL;
2849n/a
28501765790 return (PyObject *)replace((PyStringObject *) self,
2851n/a from_s, from_len,
2852n/a to_s, to_len, count);
2853n/a}
2854n/a
2855n/a/** End DALKE **/
2856n/a
2857n/a/* Matches the end (direction >= 0) or start (direction < 0) of self
2858n/a * against substr, using the start and end arguments. Returns
2859n/a * -1 on error, 0 if not found and 1 if found.
2860n/a */
2861n/aPy_LOCAL(int)
2862n/a_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2863n/a Py_ssize_t end, int direction)
28642076527{
28652076527 Py_ssize_t len = PyString_GET_SIZE(self);
2866n/a Py_ssize_t slen;
2867n/a const char* sub;
2868n/a const char* str;
2869n/a
28702076527 if (PyString_Check(substr)) {
28712076458 sub = PyString_AS_STRING(substr);
28722076458 slen = PyString_GET_SIZE(substr);
2873n/a }
2874n/a#ifdef Py_USING_UNICODE
287569 else if (PyUnicode_Check(substr))
287657 return PyUnicode_Tailmatch((PyObject *)self,
2877n/a substr, start, end, direction);
2878n/a#endif
287912 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
288012 return -1;
28812076458 str = PyString_AS_STRING(self);
2882n/a
28832076458 ADJUST_INDICES(start, end, len);
2884n/a
28852076458 if (direction < 0) {
2886n/a /* startswith */
28871599797 if (start+slen > len)
288830518 return 0;
2889n/a } else {
2890n/a /* endswith */
2891476661 if (end-start < slen || start > len)
28923160 return 0;
2893n/a
2894473501 if (end-slen > start)
2895329457 start = end - slen;
2896n/a }
28972042780 if (end-start >= slen)
28982042768 return ! memcmp(str+start, sub, slen);
289912 return 0;
2900n/a}
2901n/a
2902n/a
2903n/aPyDoc_STRVAR(startswith__doc__,
2904n/a"S.startswith(prefix[, start[, end]]) -> bool\n\
2905n/a\n\
2906n/aReturn True if S starts with the specified prefix, False otherwise.\n\
2907n/aWith optional start, test S beginning at that position.\n\
2908n/aWith optional end, stop comparing S at that position.\n\
2909n/aprefix can also be a tuple of strings to try.");
2910n/a
2911n/astatic PyObject *
2912n/astring_startswith(PyStringObject *self, PyObject *args)
29131598751{
29141598751 Py_ssize_t start = 0;
29151598751 Py_ssize_t end = PY_SSIZE_T_MAX;
2916n/a PyObject *subobj;
2917n/a int result;
2918n/a
29191598751 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2920n/a _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
29211 return NULL;
29221598750 if (PyTuple_Check(subobj)) {
2923n/a Py_ssize_t i;
29242599 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
29251887 result = _string_tailmatch(self,
2926n/a PyTuple_GET_ITEM(subobj, i),
2927n/a start, end, -1);
29281887 if (result == -1)
29293 return NULL;
29301884 else if (result) {
293162 Py_RETURN_TRUE;
2932n/a }
2933n/a }
2934712 Py_RETURN_FALSE;
2935n/a }
29361597973 result = _string_tailmatch(self, subobj, start, end, -1);
29371597973 if (result == -1)
29383 return NULL;
2939n/a else
29401597970 return PyBool_FromLong(result);
2941n/a}
2942n/a
2943n/a
2944n/aPyDoc_STRVAR(endswith__doc__,
2945n/a"S.endswith(suffix[, start[, end]]) -> bool\n\
2946n/a\n\
2947n/aReturn True if S ends with the specified suffix, False otherwise.\n\
2948n/aWith optional start, test S beginning at that position.\n\
2949n/aWith optional end, stop comparing S at that position.\n\
2950n/asuffix can also be a tuple of strings to try.");
2951n/a
2952n/astatic PyObject *
2953n/astring_endswith(PyStringObject *self, PyObject *args)
2954475387{
2955475387 Py_ssize_t start = 0;
2956475387 Py_ssize_t end = PY_SSIZE_T_MAX;
2957n/a PyObject *subobj;
2958n/a int result;
2959n/a
2960475387 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2961n/a _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
29621 return NULL;
2963475386 if (PyTuple_Check(subobj)) {
2964n/a Py_ssize_t i;
29653849 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
29662574 result = _string_tailmatch(self,
2967n/a PyTuple_GET_ITEM(subobj, i),
2968n/a start, end, +1);
29692574 if (result == -1)
29703 return NULL;
29712571 else if (result) {
297215 Py_RETURN_TRUE;
2973n/a }
2974n/a }
29751275 Py_RETURN_FALSE;
2976n/a }
2977474093 result = _string_tailmatch(self, subobj, start, end, +1);
2978474093 if (result == -1)
29793 return NULL;
2980n/a else
2981474090 return PyBool_FromLong(result);
2982n/a}
2983n/a
2984n/a
2985n/aPyDoc_STRVAR(encode__doc__,
2986n/a"S.encode([encoding[,errors]]) -> object\n\
2987n/a\n\
2988n/aEncodes S using the codec registered for encoding. encoding defaults\n\
2989n/ato the default encoding. errors may be given to set a different error\n\
2990n/ahandling scheme. Default is 'strict' meaning that encoding errors raise\n\
2991n/aa UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2992n/a'xmlcharrefreplace' as well as any other name registered with\n\
2993n/acodecs.register_error that is able to handle UnicodeEncodeErrors.");
2994n/a
2995n/astatic PyObject *
2996n/astring_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
29974749{
2998n/a static char *kwlist[] = {"encoding", "errors", 0};
29994749 char *encoding = NULL;
30004749 char *errors = NULL;
3001n/a PyObject *v;
3002n/a
30034749 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3004n/a kwlist, &encoding, &errors))
30053 return NULL;
30064746 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
30074746 if (v == NULL)
30082 goto onError;
30094744 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
30100 PyErr_Format(PyExc_TypeError,
3011n/a "encoder did not return a string/unicode object "
3012n/a "(type=%.400s)",
3013n/a Py_TYPE(v)->tp_name);
30140 Py_DECREF(v);
30150 return NULL;
3016n/a }
30174744 return v;
3018n/a
30192 onError:
30202 return NULL;
3021n/a}
3022n/a
3023n/a
3024n/aPyDoc_STRVAR(decode__doc__,
3025n/a"S.decode([encoding[,errors]]) -> object\n\
3026n/a\n\
3027n/aDecodes S using the codec registered for encoding. encoding defaults\n\
3028n/ato the default encoding. errors may be given to set a different error\n\
3029n/ahandling scheme. Default is 'strict' meaning that encoding errors raise\n\
3030n/aa UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3031n/aas well as any other name registered with codecs.register_error that is\n\
3032n/aable to handle UnicodeDecodeErrors.");
3033n/a
3034n/astatic PyObject *
3035n/astring_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
30361313284{
3037n/a static char *kwlist[] = {"encoding", "errors", 0};
30381313284 char *encoding = NULL;
30391313284 char *errors = NULL;
3040n/a PyObject *v;
3041n/a
30421313284 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3043n/a kwlist, &encoding, &errors))
30443 return NULL;
30451313281 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
30461313281 if (v == NULL)
3047550 goto onError;
30481312731 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
30490 PyErr_Format(PyExc_TypeError,
3050n/a "decoder did not return a string/unicode object "
3051n/a "(type=%.400s)",
3052n/a Py_TYPE(v)->tp_name);
30530 Py_DECREF(v);
30540 return NULL;
3055n/a }
30561312731 return v;
3057n/a
3058550 onError:
3059550 return NULL;
3060n/a}
3061n/a
3062n/a
3063n/aPyDoc_STRVAR(expandtabs__doc__,
3064n/a"S.expandtabs([tabsize]) -> string\n\
3065n/a\n\
3066n/aReturn a copy of S where all tab characters are expanded using spaces.\n\
3067n/aIf tabsize is not given, a tab size of 8 characters is assumed.");
3068n/a
3069n/astatic PyObject*
3070n/astring_expandtabs(PyStringObject *self, PyObject *args)
3071391893{
3072n/a const char *e, *p, *qe;
3073n/a char *q;
3074n/a Py_ssize_t i, j, incr;
3075n/a PyObject *u;
3076391893 int tabsize = 8;
3077n/a
3078391893 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
30791 return NULL;
3080n/a
3081n/a /* First pass: determine size of output string */
3082391892 i = 0; /* chars up to and including most recent \n or \r */
3083391892 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3084391892 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
308515822006 for (p = PyString_AS_STRING(self); p < e; p++)
308615430114 if (*p == '\t') {
3087722 if (tabsize > 0) {
3088722 incr = tabsize - (j % tabsize);
3089722 if (j > PY_SSIZE_T_MAX - incr)
30900 goto overflow1;
3091722 j += incr;
3092n/a }
3093n/a }
3094n/a else {
309515429392 if (j > PY_SSIZE_T_MAX - 1)
30960 goto overflow1;
309715429392 j++;
309815429392 if (*p == '\n' || *p == '\r') {
3099420301 if (i > PY_SSIZE_T_MAX - j)
31000 goto overflow1;
3101420301 i += j;
3102420301 j = 0;
3103n/a }
3104n/a }
3105n/a
3106391892 if (i > PY_SSIZE_T_MAX - j)
31070 goto overflow1;
3108n/a
3109n/a /* Second pass: create output string and fill it */
3110391892 u = PyString_FromStringAndSize(NULL, i + j);
3111391892 if (!u)
31120 return NULL;
3113n/a
3114391892 j = 0; /* same as in first pass */
3115391892 q = PyString_AS_STRING(u); /* next output char */
3116391892 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3117n/a
311815822006 for (p = PyString_AS_STRING(self); p < e; p++)
311915430114 if (*p == '\t') {
3120722 if (tabsize > 0) {
3121722 i = tabsize - (j % tabsize);
3122722 j += i;
31232443 while (i--) {
3124999 if (q >= qe)
31250 goto overflow2;
3126999 *q++ = ' ';
3127n/a }
3128n/a }
3129n/a }
3130n/a else {
313115429392 if (q >= qe)
31320 goto overflow2;
313315429392 *q++ = *p;
313415429392 j++;
313515429392 if (*p == '\n' || *p == '\r')
3136420301 j = 0;
3137n/a }
3138n/a
3139391892 return u;
3140n/a
31410 overflow2:
31420 Py_DECREF(u);
31430 overflow1:
31440 PyErr_SetString(PyExc_OverflowError, "new string is too long");
31450 return NULL;
3146n/a}
3147n/a
3148n/aPy_LOCAL_INLINE(PyObject *)
3149n/apad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
31508842{
3151n/a PyObject *u;
3152n/a
31538842 if (left < 0)
31541 left = 0;
31558842 if (right < 0)
31562 right = 0;
3157n/a
31588842 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
31590 Py_INCREF(self);
31600 return (PyObject *)self;
3161n/a }
3162n/a
31638842 u = PyString_FromStringAndSize(NULL,
3164n/a left + PyString_GET_SIZE(self) + right);
31658842 if (u) {
31668842 if (left)
31675405 memset(PyString_AS_STRING(u), fill, left);
31688842 Py_MEMCPY(PyString_AS_STRING(u) + left,
3169n/a PyString_AS_STRING(self),
3170n/a PyString_GET_SIZE(self));
31718842 if (right)
31723611 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3173n/a fill, right);
3174n/a }
3175n/a
31768842 return u;
3177n/a}
3178n/a
3179n/aPyDoc_STRVAR(ljust__doc__,
3180n/a"S.ljust(width[, fillchar]) -> string\n"
3181n/a"\n"
3182n/a"Return S left-justified in a string of length width. Padding is\n"
3183n/a"done using the specified fill character (default is a space).");
3184n/a
3185n/astatic PyObject *
3186n/astring_ljust(PyStringObject *self, PyObject *args)
31873486{
3188n/a Py_ssize_t width;
31893486 char fillchar = ' ';
3190n/a
31913486 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
31922 return NULL;
3193n/a
31943484 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
319555 Py_INCREF(self);
319655 return (PyObject*) self;
3197n/a }
3198n/a
31993429 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3200n/a}
3201n/a
3202n/a
3203n/aPyDoc_STRVAR(rjust__doc__,
3204n/a"S.rjust(width[, fillchar]) -> string\n"
3205n/a"\n"
3206n/a"Return S right-justified in a string of length width. Padding is\n"
3207n/a"done using the specified fill character (default is a space)");
3208n/a
3209n/astatic PyObject *
3210n/astring_rjust(PyStringObject *self, PyObject *args)
32115209{
3212n/a Py_ssize_t width;
32135209 char fillchar = ' ';
3214n/a
32155209 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
32162 return NULL;
3217n/a
32185207 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
321910 Py_INCREF(self);
322010 return (PyObject*) self;
3221n/a }
3222n/a
32235197 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3224n/a}
3225n/a
3226n/a
3227n/aPyDoc_STRVAR(center__doc__,
3228n/a"S.center(width[, fillchar]) -> string\n"
3229n/a"\n"
3230n/a"Return S centered in a string of length width. Padding is\n"
3231n/a"done using the specified fill character (default is a space)");
3232n/a
3233n/astatic PyObject *
3234n/astring_center(PyStringObject *self, PyObject *args)
32351120{
3236n/a Py_ssize_t marg, left;
3237n/a Py_ssize_t width;
32381120 char fillchar = ' ';
3239n/a
32401120 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
32412 return NULL;
3242n/a
32431118 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3244928 Py_INCREF(self);
3245928 return (PyObject*) self;
3246n/a }
3247n/a
3248190 marg = width - PyString_GET_SIZE(self);
3249190 left = marg / 2 + (marg & width & 1);
3250n/a
3251190 return pad(self, left, marg - left, fillchar);
3252n/a}
3253n/a
3254n/aPyDoc_STRVAR(zfill__doc__,
3255n/a"S.zfill(width) -> string\n"
3256n/a"\n"
3257n/a"Pad a numeric string S with zeros on the left, to fill a field\n"
3258n/a"of the specified width. The string S is never truncated.");
3259n/a
3260n/astatic PyObject *
3261n/astring_zfill(PyStringObject *self, PyObject *args)
326270{
3263n/a Py_ssize_t fill;
3264n/a PyObject *s;
3265n/a char *p;
3266n/a Py_ssize_t width;
3267n/a
326870 if (!PyArg_ParseTuple(args, "n:zfill", &width))
32692 return NULL;
3270n/a
327168 if (PyString_GET_SIZE(self) >= width) {
327242 if (PyString_CheckExact(self)) {
327335 Py_INCREF(self);
327435 return (PyObject*) self;
3275n/a }
3276n/a else
32777 return PyString_FromStringAndSize(
3278n/a PyString_AS_STRING(self),
3279n/a PyString_GET_SIZE(self)
3280n/a );
3281n/a }
3282n/a
328326 fill = width - PyString_GET_SIZE(self);
3284n/a
328526 s = pad(self, fill, 0, '0');
3286n/a
328726 if (s == NULL)
32880 return NULL;
3289n/a
329026 p = PyString_AS_STRING(s);
329126 if (p[fill] == '+' || p[fill] == '-') {
3292n/a /* move sign to beginning of string */
329311 p[0] = p[fill];
329411 p[fill] = '0';
3295n/a }
3296n/a
329726 return (PyObject*) s;
3298n/a}
3299n/a
3300n/aPyDoc_STRVAR(isspace__doc__,
3301n/a"S.isspace() -> bool\n\
3302n/a\n\
3303n/aReturn True if all characters in S are whitespace\n\
3304n/aand there is at least one character in S, False otherwise.");
3305n/a
3306n/astatic PyObject*
3307n/astring_isspace(PyStringObject *self)
330810153{
3309n/a register const unsigned char *p
331010153 = (unsigned char *) PyString_AS_STRING(self);
3311n/a register const unsigned char *e;
3312n/a
3313n/a /* Shortcut for single character strings */
331410153 if (PyString_GET_SIZE(self) == 1 &&
3315n/a isspace(*p))
33167097 return PyBool_FromLong(1);
3317n/a
3318n/a /* Special case for empty strings */
33193056 if (PyString_GET_SIZE(self) == 0)
33205 return PyBool_FromLong(0);
3321n/a
33223051 e = p + PyString_GET_SIZE(self);
332313383 for (; p < e; p++) {
332413379 if (!isspace(*p))
33253047 return PyBool_FromLong(0);
3326n/a }
33274 return PyBool_FromLong(1);
3328n/a}
3329n/a
3330n/a
3331n/aPyDoc_STRVAR(isalpha__doc__,
3332n/a"S.isalpha() -> bool\n\
3333n/a\n\
3334n/aReturn True if all characters in S are alphabetic\n\
3335n/aand there is at least one character in S, False otherwise.");
3336n/a
3337n/astatic PyObject*
3338n/astring_isalpha(PyStringObject *self)
333956902{
3340n/a register const unsigned char *p
334156902 = (unsigned char *) PyString_AS_STRING(self);
3342n/a register const unsigned char *e;
3343n/a
3344n/a /* Shortcut for single character strings */
334556902 if (PyString_GET_SIZE(self) == 1 &&
3346n/a isalpha(*p))
334754573 return PyBool_FromLong(1);
3348n/a
3349n/a /* Special case for empty strings */
33502329 if (PyString_GET_SIZE(self) == 0)
33513 return PyBool_FromLong(0);
3352n/a
33532326 e = p + PyString_GET_SIZE(self);
335412664 for (; p < e; p++) {
335512659 if (!isalpha(*p))
33562321 return PyBool_FromLong(0);
3357n/a }
33585 return PyBool_FromLong(1);
3359n/a}
3360n/a
3361n/a
3362n/aPyDoc_STRVAR(isalnum__doc__,
3363n/a"S.isalnum() -> bool\n\
3364n/a\n\
3365n/aReturn True if all characters in S are alphanumeric\n\
3366n/aand there is at least one character in S, False otherwise.");
3367n/a
3368n/astatic PyObject*
3369n/astring_isalnum(PyStringObject *self)
337053252{
3371n/a register const unsigned char *p
337253252 = (unsigned char *) PyString_AS_STRING(self);
3373n/a register const unsigned char *e;
3374n/a
3375n/a /* Shortcut for single character strings */
337653252 if (PyString_GET_SIZE(self) == 1 &&
3377n/a isalnum(*p))
337853186 return PyBool_FromLong(1);
3379n/a
3380n/a /* Special case for empty strings */
338166 if (PyString_GET_SIZE(self) == 0)
33823 return PyBool_FromLong(0);
3383n/a
338463 e = p + PyString_GET_SIZE(self);
338510445 for (; p < e; p++) {
338610437 if (!isalnum(*p))
338755 return PyBool_FromLong(0);
3388n/a }
33898 return PyBool_FromLong(1);
3390n/a}
3391n/a
3392n/a
3393n/aPyDoc_STRVAR(isdigit__doc__,
3394n/a"S.isdigit() -> bool\n\
3395n/a\n\
3396n/aReturn True if all characters in S are digits\n\
3397n/aand there is at least one character in S, False otherwise.");
3398n/a
3399n/astatic PyObject*
3400n/astring_isdigit(PyStringObject *self)
34019315{
3402n/a register const unsigned char *p
34039315 = (unsigned char *) PyString_AS_STRING(self);
3404n/a register const unsigned char *e;
3405n/a
3406n/a /* Shortcut for single character strings */
34079315 if (PyString_GET_SIZE(self) == 1 &&
3408n/a isdigit(*p))
34092816 return PyBool_FromLong(1);
3410n/a
3411n/a /* Special case for empty strings */
34126499 if (PyString_GET_SIZE(self) == 0)
34133 return PyBool_FromLong(0);
3414n/a
34156496 e = p + PyString_GET_SIZE(self);
341617088 for (; p < e; p++) {
341716984 if (!isdigit(*p))
34186392 return PyBool_FromLong(0);
3419n/a }
3420104 return PyBool_FromLong(1);
3421n/a}
3422n/a
3423n/a
3424n/aPyDoc_STRVAR(islower__doc__,
3425n/a"S.islower() -> bool\n\
3426n/a\n\
3427n/aReturn True if all cased characters in S are lowercase and there is\n\
3428n/aat least one cased character in S, False otherwise.");
3429n/a
3430n/astatic PyObject*
3431n/astring_islower(PyStringObject *self)
3432521{
3433n/a register const unsigned char *p
3434521 = (unsigned char *) PyString_AS_STRING(self);
3435n/a register const unsigned char *e;
3436n/a int cased;
3437n/a
3438n/a /* Shortcut for single character strings */
3439521 if (PyString_GET_SIZE(self) == 1)
3440373 return PyBool_FromLong(islower(*p) != 0);
3441n/a
3442n/a /* Special case for empty strings */
3443148 if (PyString_GET_SIZE(self) == 0)
34443 return PyBool_FromLong(0);
3445n/a
3446145 e = p + PyString_GET_SIZE(self);
3447145 cased = 0;
344811547 for (; p < e; p++) {
344911413 if (isupper(*p))
345011 return PyBool_FromLong(0);
345111402 else if (!cased && islower(*p))
345282 cased = 1;
3453n/a }
3454134 return PyBool_FromLong(cased);
3455n/a}
3456n/a
3457n/a
3458n/aPyDoc_STRVAR(isupper__doc__,
3459n/a"S.isupper() -> bool\n\
3460n/a\n\
3461n/aReturn True if all cased characters in S are uppercase and there is\n\
3462n/aat least one cased character in S, False otherwise.");
3463n/a
3464n/astatic PyObject*
3465n/astring_isupper(PyStringObject *self)
34668709{
3467n/a register const unsigned char *p
34688709 = (unsigned char *) PyString_AS_STRING(self);
3469n/a register const unsigned char *e;
3470n/a int cased;
3471n/a
3472n/a /* Shortcut for single character strings */
34738709 if (PyString_GET_SIZE(self) == 1)
3474271 return PyBool_FromLong(isupper(*p) != 0);
3475n/a
3476n/a /* Special case for empty strings */
34778438 if (PyString_GET_SIZE(self) == 0)
34783 return PyBool_FromLong(0);
3479n/a
34808435 e = p + PyString_GET_SIZE(self);
34818435 cased = 0;
348221308 for (; p < e; p++) {
348320943 if (islower(*p))
34848070 return PyBool_FromLong(0);
348512873 else if (!cased && isupper(*p))
3486369 cased = 1;
3487n/a }
3488365 return PyBool_FromLong(cased);
3489n/a}
3490n/a
3491n/a
3492n/aPyDoc_STRVAR(istitle__doc__,
3493n/a"S.istitle() -> bool\n\
3494n/a\n\
3495n/aReturn True if S is a titlecased string and there is at least one\n\
3496n/acharacter in S, i.e. uppercase characters may only follow uncased\n\
3497n/acharacters and lowercase characters only cased ones. Return False\n\
3498n/aotherwise.");
3499n/a
3500n/astatic PyObject*
3501n/astring_istitle(PyStringObject *self, PyObject *uncased)
350238{
3503n/a register const unsigned char *p
350438 = (unsigned char *) PyString_AS_STRING(self);
3505n/a register const unsigned char *e;
3506n/a int cased, previous_is_cased;
3507n/a
3508n/a /* Shortcut for single character strings */
350938 if (PyString_GET_SIZE(self) == 1)
351011 return PyBool_FromLong(isupper(*p) != 0);
3511n/a
3512n/a /* Special case for empty strings */
351327 if (PyString_GET_SIZE(self) == 0)
35143 return PyBool_FromLong(0);
3515n/a
351624 e = p + PyString_GET_SIZE(self);
351724 cased = 0;
351824 previous_is_cased = 0;
351915687 for (; p < e; p++) {
352015676 register const unsigned char ch = *p;
3521n/a
352215676 if (isupper(ch)) {
352348 if (previous_is_cased)
35244 return PyBool_FromLong(0);
352544 previous_is_cased = 1;
352644 cased = 1;
3527n/a }
352815628 else if (islower(ch)) {
352915577 if (!previous_is_cased)
35309 return PyBool_FromLong(0);
353115568 previous_is_cased = 1;
353215568 cased = 1;
3533n/a }
3534n/a else
353551 previous_is_cased = 0;
3536n/a }
353711 return PyBool_FromLong(cased);
3538n/a}
3539n/a
3540n/a
3541n/aPyDoc_STRVAR(splitlines__doc__,
3542n/a"S.splitlines([keepends]) -> list of strings\n\
3543n/a\n\
3544n/aReturn a list of the lines in S, breaking at line boundaries.\n\
3545n/aLine breaks are not included in the resulting list unless keepends\n\
3546n/ais given and true.");
3547n/a
3548n/astatic PyObject*
3549n/astring_splitlines(PyStringObject *self, PyObject *args)
35501607{
35511607 int keepends = 0;
3552n/a
35531607 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
35541 return NULL;
3555n/a
35561606 return stringlib_splitlines(
3557n/a (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3558n/a keepends
3559n/a );
3560n/a}
3561n/a
3562n/aPyDoc_STRVAR(sizeof__doc__,
3563n/a"S.__sizeof__() -> size of S in memory, in bytes");
3564n/a
3565n/astatic PyObject *
3566n/astring_sizeof(PyStringObject *v)
35672{
3568n/a Py_ssize_t res;
35692 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
35702 return PyInt_FromSsize_t(res);
3571n/a}
3572n/a
3573n/astatic PyObject *
3574n/astring_getnewargs(PyStringObject *v)
35759{
35769 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3577n/a}
3578n/a
3579n/a
3580n/a#include "stringlib/string_format.h"
3581n/a
3582n/aPyDoc_STRVAR(format__doc__,
3583n/a"S.format(*args, **kwargs) -> unicode\n\
3584n/a\n\
3585n/a");
3586n/a
3587n/astatic PyObject *
3588n/astring__format__(PyObject* self, PyObject* args)
3589126{
3590n/a PyObject *format_spec;
3591126 PyObject *result = NULL;
3592126 PyObject *tmp = NULL;
3593n/a
3594n/a /* If 2.x, convert format_spec to the same type as value */
3595n/a /* This is to allow things like u''.format('') */
3596126 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
35970 goto done;
3598126 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
35990 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3600n/a "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
36010 goto done;
3602n/a }
3603126 tmp = PyObject_Str(format_spec);
3604126 if (tmp == NULL)
36050 goto done;
3606126 format_spec = tmp;
3607n/a
3608126 result = _PyBytes_FormatAdvanced(self,
3609n/a PyString_AS_STRING(format_spec),
3610n/a PyString_GET_SIZE(format_spec));
3611126done:
3612126 Py_XDECREF(tmp);
3613126 return result;
3614n/a}
3615n/a
3616n/aPyDoc_STRVAR(p_format__doc__,
3617n/a"S.__format__(format_spec) -> unicode\n\
3618n/a\n\
3619n/a");
3620n/a
3621n/a
3622n/astatic PyMethodDef
3623n/astring_methods[] = {
3624n/a /* Counterparts of the obsolete stropmodule functions; except
3625n/a string.maketrans(). */
3626n/a {"join", (PyCFunction)string_join, METH_O, join__doc__},
3627n/a {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3628n/a {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3629n/a {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3630n/a {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3631n/a {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3632n/a {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3633n/a {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3634n/a {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3635n/a {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3636n/a {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3637n/a {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3638n/a {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3639n/a capitalize__doc__},
3640n/a {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3641n/a {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3642n/a endswith__doc__},
3643n/a {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3644n/a {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3645n/a {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3646n/a {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3647n/a {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3648n/a {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3649n/a {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3650n/a {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3651n/a {"rpartition", (PyCFunction)string_rpartition, METH_O,
3652n/a rpartition__doc__},
3653n/a {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3654n/a startswith__doc__},
3655n/a {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3656n/a {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3657n/a swapcase__doc__},
3658n/a {"translate", (PyCFunction)string_translate, METH_VARARGS,
3659n/a translate__doc__},
3660n/a {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3661n/a {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3662n/a {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3663n/a {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3664n/a {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3665n/a {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3666n/a {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3667n/a {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3668n/a {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3669n/a {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3670n/a {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3671n/a {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3672n/a expandtabs__doc__},
3673n/a {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3674n/a splitlines__doc__},
3675n/a {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3676n/a sizeof__doc__},
3677n/a {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3678n/a {NULL, NULL} /* sentinel */
3679n/a};
3680n/a
3681n/astatic PyObject *
3682n/astr_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3683n/a
3684n/astatic PyObject *
3685n/astring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3686823148{
3687823148 PyObject *x = NULL;
3688n/a static char *kwlist[] = {"object", 0};
3689n/a
3690823148 if (type != &PyString_Type)
36911031 return str_subtype_new(type, args, kwds);
3692822117 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
36931 return NULL;
3694822116 if (x == NULL)
369510 return PyString_FromString("");
3696822106 return PyObject_Str(x);
3697n/a}
3698n/a
3699n/astatic PyObject *
3700n/astr_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
37011031{
3702n/a PyObject *tmp, *pnew;
3703n/a Py_ssize_t n;
3704n/a
37051031 assert(PyType_IsSubtype(type, &PyString_Type));
37061031 tmp = string_new(&PyString_Type, args, kwds);
37071031 if (tmp == NULL)
37080 return NULL;
37091031 assert(PyString_CheckExact(tmp));
37101031 n = PyString_GET_SIZE(tmp);
37111031 pnew = type->tp_alloc(type, n);
37121031 if (pnew != NULL) {
37131031 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
37141031 ((PyStringObject *)pnew)->ob_shash =
3715n/a ((PyStringObject *)tmp)->ob_shash;
37161031 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3717n/a }
37181031 Py_DECREF(tmp);
37191031 return pnew;
3720n/a}
3721n/a
3722n/astatic PyObject *
3723n/abasestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
37240{
37250 PyErr_SetString(PyExc_TypeError,
3726n/a "The basestring type cannot be instantiated");
37270 return NULL;
3728n/a}
3729n/a
3730n/astatic PyObject *
3731n/astring_mod(PyObject *v, PyObject *w)
37326095{
37336095 if (!PyString_Check(v)) {
37340 Py_INCREF(Py_NotImplemented);
37350 return Py_NotImplemented;
3736n/a }
37376095 return PyString_Format(v, w);
3738n/a}
3739n/a
3740n/aPyDoc_STRVAR(basestring_doc,
3741n/a"Type basestring cannot be instantiated; it is the base for str and unicode.");
3742n/a
3743n/astatic PyNumberMethods string_as_number = {
3744n/a 0, /*nb_add*/
3745n/a 0, /*nb_subtract*/
3746n/a 0, /*nb_multiply*/
3747n/a 0, /*nb_divide*/
3748n/a string_mod, /*nb_remainder*/
3749n/a};
3750n/a
3751n/a
3752n/aPyTypeObject PyBaseString_Type = {
3753n/a PyVarObject_HEAD_INIT(&PyType_Type, 0)
3754n/a "basestring",
3755n/a 0,
3756n/a 0,
3757n/a 0, /* tp_dealloc */
3758n/a 0, /* tp_print */
3759n/a 0, /* tp_getattr */
3760n/a 0, /* tp_setattr */
3761n/a 0, /* tp_compare */
3762n/a 0, /* tp_repr */
3763n/a 0, /* tp_as_number */
3764n/a 0, /* tp_as_sequence */
3765n/a 0, /* tp_as_mapping */
3766n/a 0, /* tp_hash */
3767n/a 0, /* tp_call */
3768n/a 0, /* tp_str */
3769n/a 0, /* tp_getattro */
3770n/a 0, /* tp_setattro */
3771n/a 0, /* tp_as_buffer */
3772n/a Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3773n/a basestring_doc, /* tp_doc */
3774n/a 0, /* tp_traverse */
3775n/a 0, /* tp_clear */
3776n/a 0, /* tp_richcompare */
3777n/a 0, /* tp_weaklistoffset */
3778n/a 0, /* tp_iter */
3779n/a 0, /* tp_iternext */
3780n/a 0, /* tp_methods */
3781n/a 0, /* tp_members */
3782n/a 0, /* tp_getset */
3783n/a &PyBaseObject_Type, /* tp_base */
3784n/a 0, /* tp_dict */
3785n/a 0, /* tp_descr_get */
3786n/a 0, /* tp_descr_set */
3787n/a 0, /* tp_dictoffset */
3788n/a 0, /* tp_init */
3789n/a 0, /* tp_alloc */
3790n/a basestring_new, /* tp_new */
3791n/a 0, /* tp_free */
3792n/a};
3793n/a
3794n/aPyDoc_STRVAR(string_doc,
3795n/a"str(object) -> string\n\
3796n/a\n\
3797n/aReturn a nice string representation of the object.\n\
3798n/aIf the argument is a string, the return value is the same object.");
3799n/a
3800n/aPyTypeObject PyString_Type = {
3801n/a PyVarObject_HEAD_INIT(&PyType_Type, 0)
3802n/a "str",
3803n/a PyStringObject_SIZE,
3804n/a sizeof(char),
3805n/a string_dealloc, /* tp_dealloc */
3806n/a (printfunc)string_print, /* tp_print */
3807n/a 0, /* tp_getattr */
3808n/a 0, /* tp_setattr */
3809n/a 0, /* tp_compare */
3810n/a string_repr, /* tp_repr */
3811n/a &string_as_number, /* tp_as_number */
3812n/a &string_as_sequence, /* tp_as_sequence */
3813n/a &string_as_mapping, /* tp_as_mapping */
3814n/a (hashfunc)string_hash, /* tp_hash */
3815n/a 0, /* tp_call */
3816n/a string_str, /* tp_str */
3817n/a PyObject_GenericGetAttr, /* tp_getattro */
3818n/a 0, /* tp_setattro */
3819n/a &string_as_buffer, /* tp_as_buffer */
3820n/a Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3821n/a Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3822n/a Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3823n/a string_doc, /* tp_doc */
3824n/a 0, /* tp_traverse */
3825n/a 0, /* tp_clear */
3826n/a (richcmpfunc)string_richcompare, /* tp_richcompare */
3827n/a 0, /* tp_weaklistoffset */
3828n/a 0, /* tp_iter */
3829n/a 0, /* tp_iternext */
3830n/a string_methods, /* tp_methods */
3831n/a 0, /* tp_members */
3832n/a 0, /* tp_getset */
3833n/a &PyBaseString_Type, /* tp_base */
3834n/a 0, /* tp_dict */
3835n/a 0, /* tp_descr_get */
3836n/a 0, /* tp_descr_set */
3837n/a 0, /* tp_dictoffset */
3838n/a 0, /* tp_init */
3839n/a 0, /* tp_alloc */
3840n/a string_new, /* tp_new */
3841n/a PyObject_Del, /* tp_free */
3842n/a};
3843n/a
3844n/avoid
3845n/aPyString_Concat(register PyObject **pv, register PyObject *w)
38462462356{
3847n/a register PyObject *v;
38482462356 if (*pv == NULL)
38490 return;
38502462356 if (w == NULL || !PyString_Check(*pv)) {
38512 Py_DECREF(*pv);
38522 *pv = NULL;
38532 return;
3854n/a }
38552462354 v = string_concat((PyStringObject *) *pv, w);
38562462354 Py_DECREF(*pv);
38572462354 *pv = v;
3858n/a}
3859n/a
3860n/avoid
3861n/aPyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3862126361{
3863126361 PyString_Concat(pv, w);
3864126361 Py_XDECREF(w);
3865126361}
3866n/a
3867n/a
3868n/a/* The following function breaks the notion that strings are immutable:
3869n/a it changes the size of a string. We get away with this only if there
3870n/a is only one module referencing the object. You can also think of it
3871n/a as creating a new string object and destroying the old one, only
3872n/a more efficiently. In any case, don't use this if the string may
3873n/a already be known to some other part of the code...
3874n/a Note that if there's not enough memory to resize the string, the original
3875n/a string object at *pv is deallocated, *pv is set to NULL, an "out of
3876n/a memory" exception is set, and -1 is returned. Else (on success) 0 is
3877n/a returned, and the value in *pv may or may not be the same as on input.
3878n/a As always, an extra byte is allocated for a trailing \0 byte (newsize
3879n/a does *not* include that), and a trailing \0 byte is stored.
3880n/a*/
3881n/a
3882n/aint
3883n/a_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
38847925742{
3885n/a register PyObject *v;
3886n/a register PyStringObject *sv;
38877925742 v = *pv;
38887925742 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3889n/a PyString_CHECK_INTERNED(v)) {
38900 *pv = 0;
38910 Py_DECREF(v);
38920 PyErr_BadInternalCall();
38930 return -1;
3894n/a }
3895n/a /* XXX UNREF/NEWREF interface should be more symmetrical */
38967925742 _Py_DEC_REFTOTAL;
38977925742 _Py_ForgetReference(v);
38987925742 *pv = (PyObject *)
3899n/a PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
39007925742 if (*pv == NULL) {
39010 PyObject_Del(v);
39020 PyErr_NoMemory();
39030 return -1;
3904n/a }
39057925742 _Py_NewReference(*pv);
39067925742 sv = (PyStringObject *) *pv;
39077925742 Py_SIZE(sv) = newsize;
39087925742 sv->ob_sval[newsize] = '\0';
39097925742 sv->ob_shash = -1; /* invalidate cached hash value */
39107925742 return 0;
3911n/a}
3912n/a
3913n/a/* Helpers for formatstring */
3914n/a
3915n/aPy_LOCAL_INLINE(PyObject *)
3916n/agetnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
39171135861{
39181135861 Py_ssize_t argidx = *p_argidx;
39191135861 if (argidx < arglen) {
39201135856 (*p_argidx)++;
39211135856 if (arglen < 0)
3922682661 return args;
3923n/a else
3924453195 return PyTuple_GetItem(args, argidx);
3925n/a }
39265 PyErr_SetString(PyExc_TypeError,
3927n/a "not enough arguments for format string");
39285 return NULL;
3929n/a}
3930n/a
3931n/a/* Format codes
3932n/a * F_LJUST '-'
3933n/a * F_SIGN '+'
3934n/a * F_BLANK ' '
3935n/a * F_ALT '#'
3936n/a * F_ZERO '0'
3937n/a */
3938n/a#define F_LJUST (1<<0)
3939n/a#define F_SIGN (1<<1)
3940n/a#define F_BLANK (1<<2)
3941n/a#define F_ALT (1<<3)
3942n/a#define F_ZERO (1<<4)
3943n/a
3944n/a/* Returns a new reference to a PyString object, or NULL on failure. */
3945n/a
3946n/astatic PyObject *
3947n/aformatfloat(PyObject *v, int flags, int prec, int type)
394850112{
3949n/a char *p;
3950n/a PyObject *result;
3951n/a double x;
3952n/a
395350112 x = PyFloat_AsDouble(v);
395450112 if (x == -1.0 && PyErr_Occurred()) {
39551 PyErr_Format(PyExc_TypeError, "float argument required, "
3956n/a "not %.200s", Py_TYPE(v)->tp_name);
39571 return NULL;
3958n/a }
3959n/a
396050111 if (prec < 0)
396129404 prec = 6;
3962n/a
396350111 p = PyOS_double_to_string(x, type, prec,
3964n/a (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3965n/a
396650111 if (p == NULL)
39670 return NULL;
396850111 result = PyString_FromStringAndSize(p, strlen(p));
396950111 PyMem_Free(p);
397050111 return result;
3971n/a}
3972n/a
3973n/a/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3974n/a * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3975n/a * Python's regular ints.
3976n/a * Return value: a new PyString*, or NULL if error.
3977n/a * . *pbuf is set to point into it,
3978n/a * *plen set to the # of chars following that.
3979n/a * Caller must decref it when done using pbuf.
3980n/a * The string starting at *pbuf is of the form
3981n/a * "-"? ("0x" | "0X")? digit+
3982n/a * "0x"/"0X" are present only for x and X conversions, with F_ALT
3983n/a * set in flags. The case of hex digits will be correct,
3984n/a * There will be at least prec digits, zero-filled on the left if
3985n/a * necessary to get that many.
3986n/a * val object to be converted
3987n/a * flags bitmask of format flags; only F_ALT is looked at
3988n/a * prec minimum number of digits; 0-fill on left if needed
3989n/a * type a character in [duoxX]; u acts the same as d
3990n/a *
3991n/a * CAUTION: o, x and X conversions on regular ints can never
3992n/a * produce a '-' sign, but can for Python's unbounded ints.
3993n/a */
3994n/aPyObject*
3995n/a_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3996n/a char **pbuf, int *plen)
399736405{
399836405 PyObject *result = NULL;
3999n/a char *buf;
4000n/a Py_ssize_t i;
4001n/a int sign; /* 1 if '-', else 0 */
4002n/a int len; /* number of characters */
4003n/a Py_ssize_t llen;
4004n/a int numdigits; /* len == numnondigits + numdigits */
400536405 int numnondigits = 0;
4006n/a
400736405 switch (type) {
4008n/a case 'd':
4009n/a case 'u':
40101190 result = Py_TYPE(val)->tp_str(val);
40111190 break;
4012n/a case 'o':
4013178 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4014178 break;
4015n/a case 'x':
4016n/a case 'X':
401735037 numnondigits = 2;
401835037 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
401935037 break;
4020n/a default:
40210 assert(!"'type' not in [duoxX]");
4022n/a }
402336405 if (!result)
40240 return NULL;
4025n/a
402636405 buf = PyString_AsString(result);
402736405 if (!buf) {
40281 Py_DECREF(result);
40291 return NULL;
4030n/a }
4031n/a
4032n/a /* To modify the string in-place, there can only be one reference. */
403336404 if (Py_REFCNT(result) != 1) {
40340 PyErr_BadInternalCall();
40350 return NULL;
4036n/a }
403736404 llen = PyString_Size(result);
403836404 if (llen > INT_MAX) {
40390 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
40400 return NULL;
4041n/a }
404236404 len = (int)llen;
404336404 if (buf[len-1] == 'L') {
404435214 --len;
404535214 buf[len] = '\0';
4046n/a }
404736404 sign = buf[0] == '-';
404836404 numnondigits += sign;
404936404 numdigits = len - numnondigits;
405036404 assert(numdigits > 0);
4051n/a
4052n/a /* Get rid of base marker unless F_ALT */
405336404 if ((flags & F_ALT) == 0) {
4054n/a /* Need to skip 0x, 0X or 0. */
405536342 int skipped = 0;
405636342 switch (type) {
4057n/a case 'o':
4058149 assert(buf[sign] == '0');
4059n/a /* If 0 is only digit, leave it alone. */
4060149 if (numdigits > 1) {
406152 skipped = 1;
406252 --numdigits;
4063n/a }
4064149 break;
4065n/a case 'x':
4066n/a case 'X':
406735003 assert(buf[sign] == '0');
406835003 assert(buf[sign + 1] == 'x');
406935003 skipped = 2;
407035003 numnondigits -= 2;
4071n/a break;
4072n/a }
407336342 if (skipped) {
407435055 buf += skipped;
407535055 len -= skipped;
407635055 if (sign)
407736 buf[0] = '-';
4078n/a }
407936342 assert(len == numnondigits + numdigits);
408036342 assert(numdigits > 0);
4081n/a }
4082n/a
4083n/a /* Fill with leading zeroes to meet minimum width. */
408436404 if (prec > numdigits) {
4085n/a PyObject *r1 = PyString_FromStringAndSize(NULL,
408654 numnondigits + prec);
4087n/a char *b1;
408854 if (!r1) {
40890 Py_DECREF(result);
40900 return NULL;
4091n/a }
409254 b1 = PyString_AS_STRING(r1);
409398 for (i = 0; i < numnondigits; ++i)
409444 *b1++ = *buf++;
4095128 for (i = 0; i < prec - numdigits; i++)
409674 *b1++ = '0';
40971502 for (i = 0; i < numdigits; i++)
40981448 *b1++ = *buf++;
409954 *b1 = '\0';
410054 Py_DECREF(result);
410154 result = r1;
410254 buf = PyString_AS_STRING(result);
410354 len = numnondigits + prec;
4104n/a }
4105n/a
4106n/a /* Fix up case for hex conversions. */
410736404 if (type == 'X') {
4108n/a /* Need to convert all lower case letters to upper case.
4109n/a and need to convert 0x to 0X (and -0x to -0X). */
4110472 for (i = 0; i < len; i++)
4111450 if (buf[i] >= 'a' && buf[i] <= 'x')
4112128 buf[i] -= 'a'-'A';
4113n/a }
411436404 *pbuf = buf;
411536404 *plen = len;
411636404 return result;
4117n/a}
4118n/a
4119n/aPy_LOCAL_INLINE(int)
4120n/aformatint(char *buf, size_t buflen, int flags,
4121n/a int prec, int type, PyObject *v)
4122689802{
4123n/a /* fmt = '%#.' + `prec` + 'l' + `type`
4124n/a worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4125n/a + 1 + 1 = 24 */
4126n/a char fmt[64]; /* plenty big enough! */
4127n/a char *sign;
4128n/a long x;
4129n/a
4130689802 x = PyInt_AsLong(v);
4131689802 if (x == -1 && PyErr_Occurred()) {
41320 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4133n/a Py_TYPE(v)->tp_name);
41340 return -1;
4135n/a }
4136689802 if (x < 0 && type == 'u') {
41370 type = 'd';
4138n/a }
4139689804 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
41402 sign = "-";
4141n/a else
4142689800 sign = "";
4143689802 if (prec < 0)
4144624261 prec = 1;
4145n/a
4146689813 if ((flags & F_ALT) &&
4147n/a (type == 'x' || type == 'X')) {
4148n/a /* When converting under %#x or %#X, there are a number
4149n/a * of issues that cause pain:
4150n/a * - when 0 is being converted, the C standard leaves off
4151n/a * the '0x' or '0X', which is inconsistent with other
4152n/a * %#x/%#X conversions and inconsistent with Python's
4153n/a * hex() function
4154n/a * - there are platforms that violate the standard and
4155n/a * convert 0 with the '0x' or '0X'
4156n/a * (Metrowerks, Compaq Tru64)
4157n/a * - there are platforms that give '0x' when converting
4158n/a * under %#X, but convert 0 in accordance with the
4159n/a * standard (OS/2 EMX)
4160n/a *
4161n/a * We can achieve the desired consistency by inserting our
4162n/a * own '0x' or '0X' prefix, and substituting %x/%X in place
4163n/a * of %#x/%#X.
4164n/a *
4165n/a * Note that this is the same approach as used in
4166n/a * formatint() in unicodeobject.c
4167n/a */
416811 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4169n/a sign, type, prec, type);
4170n/a }
4171n/a else {
4172689791 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4173n/a sign, (flags&F_ALT) ? "#" : "",
4174n/a prec, type);
4175n/a }
4176n/a
4177n/a /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4178n/a * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4179n/a */
4180689802 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
41812 PyErr_SetString(PyExc_OverflowError,
4182n/a "formatted integer is too long (precision too large?)");
41832 return -1;
4184n/a }
4185689800 if (sign[0])
41862 PyOS_snprintf(buf, buflen, fmt, -x);
4187n/a else
4188689798 PyOS_snprintf(buf, buflen, fmt, x);
4189689800 return (int)strlen(buf);
4190n/a}
4191n/a
4192n/aPy_LOCAL_INLINE(int)
4193n/aformatchar(char *buf, size_t buflen, PyObject *v)
4194109473{
4195n/a /* presume that the buffer is at least 2 characters long */
4196109473 if (PyString_Check(v)) {
419736532 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
41980 return -1;
4199n/a }
4200n/a else {
420172941 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
420210 return -1;
4203n/a }
4204109463 buf[1] = '\0';
4205109463 return 1;
4206n/a}
4207n/a
4208n/a/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4209n/a
4210n/a FORMATBUFLEN is the length of the buffer in which the ints &
4211n/a chars are formatted. XXX This is a magic number. Each formatting
4212n/a routine does bounds checking to ensure no overflow, but a better
4213n/a solution may be to malloc a buffer of appropriate size for each
4214n/a format. For now, the current solution is sufficient.
4215n/a*/
4216n/a#define FORMATBUFLEN (size_t)120
4217n/a
4218n/aPyObject *
4219n/aPyString_Format(PyObject *format, PyObject *args)
4220872100{
4221n/a char *fmt, *res;
4222n/a Py_ssize_t arglen, argidx;
4223n/a Py_ssize_t reslen, rescnt, fmtcnt;
4224872100 int args_owned = 0;
4225n/a PyObject *result, *orig_args;
4226n/a#ifdef Py_USING_UNICODE
4227n/a PyObject *v, *w;
4228n/a#endif
4229872100 PyObject *dict = NULL;
4230872100 if (format == NULL || !PyString_Check(format) || args == NULL) {
42310 PyErr_BadInternalCall();
42320 return NULL;
4233n/a }
4234872100 orig_args = args;
4235872100 fmt = PyString_AS_STRING(format);
4236872100 fmtcnt = PyString_GET_SIZE(format);
4237872100 reslen = rescnt = fmtcnt + 100;
4238872100 result = PyString_FromStringAndSize((char *)NULL, reslen);
4239872100 if (result == NULL)
42400 return NULL;
4241872100 res = PyString_AsString(result);
4242872100 if (PyTuple_Check(args)) {
4243191358 arglen = PyTuple_GET_SIZE(args);
4244191358 argidx = 0;
4245n/a }
4246n/a else {
4247680742 arglen = -1;
4248680742 argidx = -2;
4249n/a }
4250872100 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4251n/a !PyObject_TypeCheck(args, &PyBaseString_Type))
42527680 dict = args;
42539731912 while (--fmtcnt >= 0) {
42547990334 if (*fmt != '%') {
42556857230 if (--rescnt < 0) {
4256182 rescnt = fmtcnt + 100;
4257182 reslen += rescnt;
4258182 if (_PyString_Resize(&result, reslen))
42590 return NULL;
4260182 res = PyString_AS_STRING(result)
4261n/a + reslen - rescnt;
4262182 --rescnt;
4263n/a }
42646857230 *res++ = *fmt++;
4265n/a }
4266n/a else {
4267n/a /* Got a format specifier */
42681133104 int flags = 0;
42691133104 Py_ssize_t width = -1;
42701133104 int prec = -1;
42711133104 int c = '\0';
4272n/a int fill;
4273n/a int isnumok;
42741133104 PyObject *v = NULL;
42751133104 PyObject *temp = NULL;
4276n/a char *pbuf;
4277n/a int sign;
4278n/a Py_ssize_t len;
4279n/a char formatbuf[FORMATBUFLEN];
4280n/a /* For format{int,char}() */
4281n/a#ifdef Py_USING_UNICODE
42821133104 char *fmt_start = fmt;
42831133104 Py_ssize_t argidx_start = argidx;
4284n/a#endif
4285n/a
42861133104 fmt++;
42871133104 if (*fmt == '(') {
4288n/a char *keystart;
4289n/a Py_ssize_t keylen;
4290n/a PyObject *key;
42919567 int pcount = 1;
4292n/a
42939567 if (dict == NULL) {
42949 PyErr_SetString(PyExc_TypeError,
4295n/a "format requires a mapping");
42969 goto error;
4297n/a }
42989558 ++fmt;
42999558 --fmtcnt;
43009558 keystart = fmt;
4301n/a /* Skip over balanced parentheses */
430284524 while (pcount > 0 && --fmtcnt >= 0) {
430365408 if (*fmt == ')')
43049556 --pcount;
430555852 else if (*fmt == '(')
43063 ++pcount;
430765408 fmt++;
4308n/a }
43099558 keylen = fmt - keystart - 1;
43109558 if (fmtcnt < 0 || pcount > 0) {
43115 PyErr_SetString(PyExc_ValueError,
4312n/a "incomplete format key");
43135 goto error;
4314n/a }
43159553 key = PyString_FromStringAndSize(keystart,
4316n/a keylen);
43179553 if (key == NULL)
43180 goto error;
43199553 if (args_owned) {
43203811 Py_DECREF(args);
43213811 args_owned = 0;
4322n/a }
43239553 args = PyObject_GetItem(dict, key);
43249553 Py_DECREF(key);
43259553 if (args == NULL) {
43261 goto error;
4327n/a }
43289552 args_owned = 1;
43299552 arglen = -1;
43309552 argidx = -2;
4331n/a }
43322483941 while (--fmtcnt >= 0) {
43331350847 switch (c = *fmt++) {
43342903 case '-': flags |= F_LJUST; continue;
433512953 case '+': flags |= F_SIGN; continue;
433650 case ' ': flags |= F_BLANK; continue;
4337270 case '#': flags |= F_ALT; continue;
4338201587 case '0': flags |= F_ZERO; continue;
4339n/a }
43401133084 break;
4341n/a }
43421133089 if (c == '*') {
434310135 v = getnextarg(args, arglen, &argidx);
434410135 if (v == NULL)
43450 goto error;
434610135 if (!PyInt_Check(v)) {
43477 PyErr_SetString(PyExc_TypeError,
4348n/a "* wants int");
43497 goto error;
4350n/a }
435110128 width = PyInt_AsLong(v);
435210128 if (width < 0) {
43531 flags |= F_LJUST;
43541 width = -width;
4355n/a }
435610128 if (--fmtcnt >= 0)
435710128 c = *fmt++;
4358n/a }
43591122954 else if (c >= 0 && isdigit(c)) {
4360390420 width = c - '0';
4361782234 while (--fmtcnt >= 0) {
4362391811 c = Py_CHARMASK(*fmt++);
4363391811 if (!isdigit(c))
4364390417 break;
43651394 if ((width*10) / 10 != width) {
43660 PyErr_SetString(
4367n/a PyExc_ValueError,
4368n/a "width too big");
43690 goto error;
4370n/a }
43711394 width = width*10 + (c - '0');
4372n/a }
4373n/a }
43741133082 if (c == '.') {
437587501 prec = 0;
437687501 if (--fmtcnt >= 0)
437787501 c = *fmt++;
437887501 if (c == '*') {
4379362 v = getnextarg(args, arglen, &argidx);
4380362 if (v == NULL)
43810 goto error;
4382362 if (!PyInt_Check(v)) {
43833 PyErr_SetString(
4384n/a PyExc_TypeError,
4385n/a "* wants int");
43863 goto error;
4387n/a }
4388359 prec = PyInt_AsLong(v);
4389359 if (prec < 0)
43901 prec = 0;
4391359 if (--fmtcnt >= 0)
4392359 c = *fmt++;
4393n/a }
439487139 else if (c >= 0 && isdigit(c)) {
439587127 prec = c - '0';
4396191195 while (--fmtcnt >= 0) {
4397104068 c = Py_CHARMASK(*fmt++);
4398104068 if (!isdigit(c))
439987127 break;
440016941 if ((prec*10) / 10 != prec) {
44010 PyErr_SetString(
4402n/a PyExc_ValueError,
4403n/a "prec too big");
44040 goto error;
4405n/a }
440616941 prec = prec*10 + (c - '0');
4407n/a }
4408n/a }
4409n/a } /* prec */
44101133079 if (fmtcnt >= 0) {
44111133071 if (c == 'h' || c == 'l' || c == 'L') {
44123 if (--fmtcnt >= 0)
44133 c = *fmt++;
4414n/a }
4415n/a }
44161133079 if (fmtcnt < 0) {
44178 PyErr_SetString(PyExc_ValueError,
4418n/a "incomplete format");
44198 goto error;
4420n/a }
44211133071 if (c != '%') {
44221125364 v = getnextarg(args, arglen, &argidx);
44231125364 if (v == NULL)
44245 goto error;
4425n/a }
44261133066 sign = 0;
44271133066 fill = ' ';
44281133066 switch (c) {
4429n/a case '%':
44307707 pbuf = "%";
44317707 len = 1;
44327707 break;
4433n/a case 's':
4434n/a#ifdef Py_USING_UNICODE
4435204389 if (PyUnicode_Check(v)) {
44362461 fmt = fmt_start;
44372461 argidx = argidx_start;
44382461 goto unicode;
4439n/a }
4440n/a#endif
4441201928 temp = _PyObject_Str(v);
4442n/a#ifdef Py_USING_UNICODE
4443201928 if (temp != NULL && PyUnicode_Check(temp)) {
44441 Py_DECREF(temp);
44451 fmt = fmt_start;
44461 argidx = argidx_start;
44471 goto unicode;
4448n/a }
4449n/a#endif
4450n/a /* Fall through */
4451n/a case 'r':
4452237105 if (c == 'r')
445335178 temp = PyObject_Repr(v);
4454237105 if (temp == NULL)
44550 goto error;
4456237105 if (!PyString_Check(temp)) {
44570 PyErr_SetString(PyExc_TypeError,
4458n/a "%s argument has non-string str()");
44590 Py_DECREF(temp);
44600 goto error;
4461n/a }
4462237105 pbuf = PyString_AS_STRING(temp);
4463237105 len = PyString_GET_SIZE(temp);
4464237105 if (prec >= 0 && len > prec)
4465588 len = prec;
4466237105 break;
4467n/a case 'i':
4468n/a case 'd':
4469n/a case 'u':
4470n/a case 'o':
4471n/a case 'x':
4472n/a case 'X':
4473726204 if (c == 'i')
44741922 c = 'd';
4475726204 isnumok = 0;
4476726204 if (PyNumber_Check(v)) {
4477726102 PyObject *iobj=NULL;
4478n/a
44791451937 if (PyInt_Check(v) || (PyLong_Check(v))) {
4480725835 iobj = v;
4481725835 Py_INCREF(iobj);
4482n/a }
4483n/a else {
4484267 iobj = PyNumber_Int(v);
4485267 if (iobj==NULL) iobj = PyNumber_Long(v);
4486n/a }
4487726102 if (iobj!=NULL) {
4488726099 if (PyInt_Check(iobj)) {
4489689802 isnumok = 1;
4490689802 pbuf = formatbuf;
4491689802 len = formatint(pbuf,
4492n/a sizeof(formatbuf),
4493n/a flags, prec, c, iobj);
4494689802 Py_DECREF(iobj);
4495689802 if (len < 0)
44962 goto error;
4497689800 sign = 1;
4498n/a }
449936297 else if (PyLong_Check(iobj)) {
4500n/a int ilen;
4501n/a
450236297 isnumok = 1;
450336297 temp = _PyString_FormatLong(iobj, flags,
4504n/a prec, c, &pbuf, &ilen);
450536297 Py_DECREF(iobj);
450636297 len = ilen;
450736297 if (!temp)
45081 goto error;
450936296 sign = 1;
4510n/a }
4511n/a else {
45120 Py_DECREF(iobj);
4513n/a }
4514n/a }
4515n/a }
4516726201 if (!isnumok) {
4517105 PyErr_Format(PyExc_TypeError,
4518n/a "%%%c format: a number is required, "
4519n/a "not %.200s", c, Py_TYPE(v)->tp_name);
4520105 goto error;
4521n/a }
4522726096 if (flags & F_ZERO)
4523200581 fill = '0';
4524726096 break;
4525n/a case 'e':
4526n/a case 'E':
4527n/a case 'f':
4528n/a case 'F':
4529n/a case 'g':
4530n/a case 'G':
453150112 temp = formatfloat(v, flags, prec, c);
453250112 if (temp == NULL)
45331 goto error;
453450111 pbuf = PyString_AS_STRING(temp);
453550111 len = PyString_GET_SIZE(temp);
453650111 sign = 1;
453750111 if (flags & F_ZERO)
45381006 fill = '0';
453950111 break;
4540n/a case 'c':
4541n/a#ifdef Py_USING_UNICODE
4542109475 if (PyUnicode_Check(v)) {
45432 fmt = fmt_start;
45442 argidx = argidx_start;
45452 goto unicode;
4546n/a }
4547n/a#endif
4548109473 pbuf = formatbuf;
4549109473 len = formatchar(pbuf, sizeof(formatbuf), v);
4550109473 if (len < 0)
455110 goto error;
4552109463 break;
4553n/a default:
45541 PyErr_Format(PyExc_ValueError,
4555n/a "unsupported format character '%c' (0x%x) "
4556n/a "at index %zd",
4557n/a c, c,
4558n/a (Py_ssize_t)(fmt - 1 -
4559n/a PyString_AsString(format)));
45601 goto error;
4561n/a }
45621130482 if (sign) {
4563783895 if (*pbuf == '-' || *pbuf == '+') {
45647688 sign = *pbuf++;
45657688 len--;
4566n/a }
4567768519 else if (flags & F_SIGN)
45686403 sign = '+';
4569762116 else if (flags & F_BLANK)
457038 sign = ' ';
4571n/a else
4572762078 sign = 0;
4573n/a }
45741130482 if (width < len)
4575731333 width = len;
45761130482 if (rescnt - (sign != 0) < width) {
45776650 reslen -= rescnt;
45786650 rescnt = width + fmtcnt + 100;
45796650 reslen += rescnt;
45806650 if (reslen < 0) {
45810 Py_DECREF(result);
45820 Py_XDECREF(temp);
45830 return PyErr_NoMemory();
4584n/a }
45856650 if (_PyString_Resize(&result, reslen)) {
45860 Py_XDECREF(temp);
45870 return NULL;
4588n/a }
45896650 res = PyString_AS_STRING(result)
4590n/a + reslen - rescnt;
4591n/a }
45921130482 if (sign) {
459314129 if (fill != ' ')
4594550 *res++ = sign;
459514129 rescnt--;
459614129 if (width > len)
4597151 width--;
4598n/a }
45991130482 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
460026 assert(pbuf[0] == '0');
460126 assert(pbuf[1] == c);
460226 if (fill != ' ') {
46031 *res++ = *pbuf++;
46041 *res++ = *pbuf++;
4605n/a }
460626 rescnt -= 2;
460726 width -= 2;
460826 if (width < 0)
46090 width = 0;
461026 len -= 2;
4611n/a }
46121130482 if (width > len && !(flags & F_LJUST)) {
4613n/a do {
46141329517 --rescnt;
46151329517 *res++ = fill;
46161329517 } while (--width > len);
4617n/a }
46181130482 if (fill == ' ') {
4619928895 if (sign)
462013579 *res++ = sign;
4621928895 if ((flags & F_ALT) &&
4622n/a (c == 'x' || c == 'X')) {
462325 assert(pbuf[0] == '0');
462425 assert(pbuf[1] == c);
462525 *res++ = *pbuf++;
462625 *res++ = *pbuf++;
4627n/a }
4628n/a }
46291130482 Py_MEMCPY(res, pbuf, len);
46301130482 res += len;
46311130482 rescnt -= len;
46322276858 while (--width >= len) {
463315894 --rescnt;
463415894 *res++ = ' ';
4635n/a }
46361130482 if (dict && (argidx < arglen) && c != '%') {
46370 PyErr_SetString(PyExc_TypeError,
4638n/a "not all arguments converted during string formatting");
46390 Py_XDECREF(temp);
46400 goto error;
4641n/a }
46421130482 Py_XDECREF(temp);
4643n/a } /* '%' */
4644n/a } /* until end */
4645869478 if (argidx < arglen && !dict) {
46463 PyErr_SetString(PyExc_TypeError,
4647n/a "not all arguments converted during string formatting");
46483 goto error;
4649n/a }
4650869475 if (args_owned) {
46515731 Py_DECREF(args);
4652n/a }
4653869475 if (_PyString_Resize(&result, reslen - rescnt))
46540 return NULL;
4655869475 return result;
4656n/a
4657n/a#ifdef Py_USING_UNICODE
46582464 unicode:
46592464 if (args_owned) {
46608 Py_DECREF(args);
46618 args_owned = 0;
4662n/a }
4663n/a /* Fiddle args right (remove the first argidx arguments) */
46642584 if (PyTuple_Check(orig_args) && argidx > 0) {
4665n/a PyObject *v;
4666120 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4667120 v = PyTuple_New(n);
4668120 if (v == NULL)
46690 goto error;
4670377 while (--n >= 0) {
4671137 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4672137 Py_INCREF(w);
4673137 PyTuple_SET_ITEM(v, n, w);
4674n/a }
4675120 args = v;
4676n/a } else {
46772344 Py_INCREF(orig_args);
46782344 args = orig_args;
4679n/a }
46802464 args_owned = 1;
4681n/a /* Take what we have of the result and let the Unicode formatting
4682n/a function format the rest of the input. */
46832464 rescnt = res - PyString_AS_STRING(result);
46842464 if (_PyString_Resize(&result, rescnt))
46850 goto error;
46862464 fmtcnt = PyString_GET_SIZE(format) - \
4687n/a (fmt - PyString_AS_STRING(format));
46882464 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
46892464 if (format == NULL)
46900 goto error;
46912464 v = PyUnicode_Format(format, args);
46922464 Py_DECREF(format);
46932464 if (v == NULL)
46941 goto error;
4695n/a /* Paste what we have (result) to what the Unicode formatting
4696n/a function returned (v) and return the result (or error) */
46972463 w = PyUnicode_Concat(result, v);
46982463 Py_DECREF(result);
46992463 Py_DECREF(v);
47002463 Py_DECREF(args);
47012463 return w;
4702n/a#endif /* Py_USING_UNICODE */
4703n/a
4704162 error:
4705162 Py_DECREF(result);
4706162 if (args_owned) {
47073 Py_DECREF(args);
4708n/a }
4709162 return NULL;
4710n/a}
4711n/a
4712n/avoid
4713n/aPyString_InternInPlace(PyObject **p)
471425398699{
471525398699 register PyStringObject *s = (PyStringObject *)(*p);
4716n/a PyObject *t;
471725398699 if (s == NULL || !PyString_Check(s))
47180 Py_FatalError("PyString_InternInPlace: strings only please!");
4719n/a /* If it's a string subclass, we don't really know what putting
4720n/a it in the interned dict might do. */
472125398699 if (!PyString_CheckExact(s))
47221 return;
472325398698 if (PyString_CHECK_INTERNED(s))
472413371891 return;
472512026807 if (interned == NULL) {
4726293 interned = PyDict_New();
4727293 if (interned == NULL) {
47280 PyErr_Clear(); /* Don't leave an exception */
47290 return;
4730n/a }
4731n/a }
473212026807 t = PyDict_GetItem(interned, (PyObject *)s);
473312026807 if (t) {
473411022097 Py_INCREF(t);
473511022097 Py_DECREF(*p);
473611022097 *p = t;
473711022097 return;
4738n/a }
4739n/a
47401004710 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
47410 PyErr_Clear();
47420 return;
4743n/a }
4744n/a /* The two references in interned are not counted by refcnt.
4745n/a The string deallocator will take care of this */
47461004710 Py_REFCNT(s) -= 2;
47471004710 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4748n/a}
4749n/a
4750n/avoid
4751n/aPyString_InternImmortal(PyObject **p)
47520{
47530 PyString_InternInPlace(p);
47540 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
47550 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
47560 Py_INCREF(*p);
4757n/a }
47580}
4759n/a
4760n/a
4761n/aPyObject *
4762n/aPyString_InternFromString(const char *cp)
47633674460{
47643674460 PyObject *s = PyString_FromString(cp);
47653674460 if (s == NULL)
47660 return NULL;
47673674460 PyString_InternInPlace(&s);
47683674460 return s;
4769n/a}
4770n/a
4771n/avoid
4772n/aPyString_Fini(void)
4773293{
4774n/a int i;
477575301 for (i = 0; i < UCHAR_MAX + 1; i++) {
477675008 Py_XDECREF(characters[i]);
477775008 characters[i] = NULL;
4778n/a }
4779293 Py_XDECREF(nullstring);
4780293 nullstring = NULL;
4781293}
4782n/a
4783n/avoid _Py_ReleaseInternedStrings(void)
47840{
4785n/a PyObject *keys;
4786n/a PyStringObject *s;
4787n/a Py_ssize_t i, n;
47880 Py_ssize_t immortal_size = 0, mortal_size = 0;
4789n/a
47900 if (interned == NULL || !PyDict_Check(interned))
47910 return;
47920 keys = PyDict_Keys(interned);
47930 if (keys == NULL || !PyList_Check(keys)) {
47940 PyErr_Clear();
47950 return;
4796n/a }
4797n/a
4798n/a /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4799n/a detector, interned strings are not forcibly deallocated; rather, we
4800n/a give them their stolen references back, and then clear and DECREF
4801n/a the interned dict. */
4802n/a
48030 n = PyList_GET_SIZE(keys);
48040 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4805n/a n);
48060 for (i = 0; i < n; i++) {
48070 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
48080 switch (s->ob_sstate) {
4809n/a case SSTATE_NOT_INTERNED:
4810n/a /* XXX Shouldn't happen */
48110 break;
4812n/a case SSTATE_INTERNED_IMMORTAL:
48130 Py_REFCNT(s) += 1;
48140 immortal_size += Py_SIZE(s);
48150 break;
4816n/a case SSTATE_INTERNED_MORTAL:
48170 Py_REFCNT(s) += 2;
48180 mortal_size += Py_SIZE(s);
48190 break;
4820n/a default:
48210 Py_FatalError("Inconsistent interned string state.");
4822n/a }
48230 s->ob_sstate = SSTATE_NOT_INTERNED;
4824n/a }
48250 fprintf(stderr, "total size of all interned strings: "
4826n/a "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4827n/a "mortal/immortal\n", mortal_size, immortal_size);
48280 Py_DECREF(keys);
48290 PyDict_Clear(interned);
48300 Py_DECREF(interned);
48310 interned = NULL;
4832n/a}