ยปCore Development>Code coverage>Objects/bytesobject.c

Python code coverage for Objects/bytesobject.c

#countcontent
1n/a/* bytes object implementation */
2n/a
3n/a#define PY_SSIZE_T_CLEAN
4n/a
5n/a#include "Python.h"
6n/a
7n/a#include "bytes_methods.h"
8n/a#include "pystrhex.h"
9n/a#include <stddef.h>
10n/a
11n/a/*[clinic input]
12n/aclass bytes "PyBytesObject *" "&PyBytes_Type"
13n/a[clinic start generated code]*/
14n/a/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
15n/a
16n/a#include "clinic/bytesobject.c.h"
17n/a
18n/a#ifdef COUNT_ALLOCS
19n/aPy_ssize_t null_strings, one_strings;
20n/a#endif
21n/a
22n/astatic PyBytesObject *characters[UCHAR_MAX + 1];
23n/astatic PyBytesObject *nullstring;
24n/a
25n/a/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26n/a for a string of length n should request PyBytesObject_SIZE + n bytes.
27n/a
28n/a Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29n/a 3 bytes per string allocation on a typical system.
30n/a*/
31n/a#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32n/a
33n/a/* Forward declaration */
34n/aPy_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35n/a char *str);
36n/a
37n/a/*
38n/a For PyBytes_FromString(), the parameter `str' points to a null-terminated
39n/a string containing exactly `size' bytes.
40n/a
41n/a For PyBytes_FromStringAndSize(), the parameter `str' is
42n/a either NULL or else points to a string containing at least `size' bytes.
43n/a For PyBytes_FromStringAndSize(), the string in the `str' parameter does
44n/a not have to be null-terminated. (Therefore it is safe to construct a
45n/a substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
46n/a If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
47n/a bytes (setting the last byte to the null terminating character) and you can
48n/a fill in the data yourself. If `str' is non-NULL then the resulting
49n/a PyBytes object must be treated as immutable and you must not fill in nor
50n/a alter the data yourself, since the strings may be shared.
51n/a
52n/a The PyObject member `op->ob_size', which denotes the number of "extra
53n/a items" in a variable-size object, will contain the number of bytes
54n/a allocated for string data, not counting the null terminating character.
55n/a It is therefore equal to the `size' parameter (for
56n/a PyBytes_FromStringAndSize()) or the length of the string in the `str'
57n/a parameter (for PyBytes_FromString()).
58n/a*/
59n/astatic PyObject *
60n/a_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
61n/a{
62n/a PyBytesObject *op;
63n/a assert(size >= 0);
64n/a
65n/a if (size == 0 && (op = nullstring) != NULL) {
66n/a#ifdef COUNT_ALLOCS
67n/a null_strings++;
68n/a#endif
69n/a Py_INCREF(op);
70n/a return (PyObject *)op;
71n/a }
72n/a
73n/a if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
74n/a PyErr_SetString(PyExc_OverflowError,
75n/a "byte string is too large");
76n/a return NULL;
77n/a }
78n/a
79n/a /* Inline PyObject_NewVar */
80n/a if (use_calloc)
81n/a op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
82n/a else
83n/a op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
84n/a if (op == NULL)
85n/a return PyErr_NoMemory();
86n/a (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
87n/a op->ob_shash = -1;
88n/a if (!use_calloc)
89n/a op->ob_sval[size] = '\0';
90n/a /* empty byte string singleton */
91n/a if (size == 0) {
92n/a nullstring = op;
93n/a Py_INCREF(op);
94n/a }
95n/a return (PyObject *) op;
96n/a}
97n/a
98n/aPyObject *
99n/aPyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
100n/a{
101n/a PyBytesObject *op;
102n/a if (size < 0) {
103n/a PyErr_SetString(PyExc_SystemError,
104n/a "Negative size passed to PyBytes_FromStringAndSize");
105n/a return NULL;
106n/a }
107n/a if (size == 1 && str != NULL &&
108n/a (op = characters[*str & UCHAR_MAX]) != NULL)
109n/a {
110n/a#ifdef COUNT_ALLOCS
111n/a one_strings++;
112n/a#endif
113n/a Py_INCREF(op);
114n/a return (PyObject *)op;
115n/a }
116n/a
117n/a op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
118n/a if (op == NULL)
119n/a return NULL;
120n/a if (str == NULL)
121n/a return (PyObject *) op;
122n/a
123n/a memcpy(op->ob_sval, str, size);
124n/a /* share short strings */
125n/a if (size == 1) {
126n/a characters[*str & UCHAR_MAX] = op;
127n/a Py_INCREF(op);
128n/a }
129n/a return (PyObject *) op;
130n/a}
131n/a
132n/aPyObject *
133n/aPyBytes_FromString(const char *str)
134n/a{
135n/a size_t size;
136n/a PyBytesObject *op;
137n/a
138n/a assert(str != NULL);
139n/a size = strlen(str);
140n/a if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
141n/a PyErr_SetString(PyExc_OverflowError,
142n/a "byte string is too long");
143n/a return NULL;
144n/a }
145n/a if (size == 0 && (op = nullstring) != NULL) {
146n/a#ifdef COUNT_ALLOCS
147n/a null_strings++;
148n/a#endif
149n/a Py_INCREF(op);
150n/a return (PyObject *)op;
151n/a }
152n/a if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
153n/a#ifdef COUNT_ALLOCS
154n/a one_strings++;
155n/a#endif
156n/a Py_INCREF(op);
157n/a return (PyObject *)op;
158n/a }
159n/a
160n/a /* Inline PyObject_NewVar */
161n/a op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
162n/a if (op == NULL)
163n/a return PyErr_NoMemory();
164n/a (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
165n/a op->ob_shash = -1;
166n/a memcpy(op->ob_sval, str, size+1);
167n/a /* share short strings */
168n/a if (size == 0) {
169n/a nullstring = op;
170n/a Py_INCREF(op);
171n/a } else if (size == 1) {
172n/a characters[*str & UCHAR_MAX] = op;
173n/a Py_INCREF(op);
174n/a }
175n/a return (PyObject *) op;
176n/a}
177n/a
178n/aPyObject *
179n/aPyBytes_FromFormatV(const char *format, va_list vargs)
180n/a{
181n/a char *s;
182n/a const char *f;
183n/a const char *p;
184n/a Py_ssize_t prec;
185n/a int longflag;
186n/a int size_tflag;
187n/a /* Longest 64-bit formatted numbers:
188n/a - "18446744073709551615\0" (21 bytes)
189n/a - "-9223372036854775808\0" (21 bytes)
190n/a Decimal takes the most space (it isn't enough for octal.)
191n/a
192n/a Longest 64-bit pointer representation:
193n/a "0xffffffffffffffff\0" (19 bytes). */
194n/a char buffer[21];
195n/a _PyBytesWriter writer;
196n/a
197n/a _PyBytesWriter_Init(&writer);
198n/a
199n/a s = _PyBytesWriter_Alloc(&writer, strlen(format));
200n/a if (s == NULL)
201n/a return NULL;
202n/a writer.overallocate = 1;
203n/a
204n/a#define WRITE_BYTES(str) \
205n/a do { \
206n/a s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
207n/a if (s == NULL) \
208n/a goto error; \
209n/a } while (0)
210n/a
211n/a for (f = format; *f; f++) {
212n/a if (*f != '%') {
213n/a *s++ = *f;
214n/a continue;
215n/a }
216n/a
217n/a p = f++;
218n/a
219n/a /* ignore the width (ex: 10 in "%10s") */
220n/a while (Py_ISDIGIT(*f))
221n/a f++;
222n/a
223n/a /* parse the precision (ex: 10 in "%.10s") */
224n/a prec = 0;
225n/a if (*f == '.') {
226n/a f++;
227n/a for (; Py_ISDIGIT(*f); f++) {
228n/a prec = (prec * 10) + (*f - '0');
229n/a }
230n/a }
231n/a
232n/a while (*f && *f != '%' && !Py_ISALPHA(*f))
233n/a f++;
234n/a
235n/a /* handle the long flag ('l'), but only for %ld and %lu.
236n/a others can be added when necessary. */
237n/a longflag = 0;
238n/a if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
239n/a longflag = 1;
240n/a ++f;
241n/a }
242n/a
243n/a /* handle the size_t flag ('z'). */
244n/a size_tflag = 0;
245n/a if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
246n/a size_tflag = 1;
247n/a ++f;
248n/a }
249n/a
250n/a /* subtract bytes preallocated for the format string
251n/a (ex: 2 for "%s") */
252n/a writer.min_size -= (f - p + 1);
253n/a
254n/a switch (*f) {
255n/a case 'c':
256n/a {
257n/a int c = va_arg(vargs, int);
258n/a if (c < 0 || c > 255) {
259n/a PyErr_SetString(PyExc_OverflowError,
260n/a "PyBytes_FromFormatV(): %c format "
261n/a "expects an integer in range [0; 255]");
262n/a goto error;
263n/a }
264n/a writer.min_size++;
265n/a *s++ = (unsigned char)c;
266n/a break;
267n/a }
268n/a
269n/a case 'd':
270n/a if (longflag)
271n/a sprintf(buffer, "%ld", va_arg(vargs, long));
272n/a else if (size_tflag)
273n/a sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
274n/a va_arg(vargs, Py_ssize_t));
275n/a else
276n/a sprintf(buffer, "%d", va_arg(vargs, int));
277n/a assert(strlen(buffer) < sizeof(buffer));
278n/a WRITE_BYTES(buffer);
279n/a break;
280n/a
281n/a case 'u':
282n/a if (longflag)
283n/a sprintf(buffer, "%lu",
284n/a va_arg(vargs, unsigned long));
285n/a else if (size_tflag)
286n/a sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
287n/a va_arg(vargs, size_t));
288n/a else
289n/a sprintf(buffer, "%u",
290n/a va_arg(vargs, unsigned int));
291n/a assert(strlen(buffer) < sizeof(buffer));
292n/a WRITE_BYTES(buffer);
293n/a break;
294n/a
295n/a case 'i':
296n/a sprintf(buffer, "%i", va_arg(vargs, int));
297n/a assert(strlen(buffer) < sizeof(buffer));
298n/a WRITE_BYTES(buffer);
299n/a break;
300n/a
301n/a case 'x':
302n/a sprintf(buffer, "%x", va_arg(vargs, int));
303n/a assert(strlen(buffer) < sizeof(buffer));
304n/a WRITE_BYTES(buffer);
305n/a break;
306n/a
307n/a case 's':
308n/a {
309n/a Py_ssize_t i;
310n/a
311n/a p = va_arg(vargs, const char*);
312n/a i = strlen(p);
313n/a if (prec > 0 && i > prec)
314n/a i = prec;
315n/a s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
316n/a if (s == NULL)
317n/a goto error;
318n/a break;
319n/a }
320n/a
321n/a case 'p':
322n/a sprintf(buffer, "%p", va_arg(vargs, void*));
323n/a assert(strlen(buffer) < sizeof(buffer));
324n/a /* %p is ill-defined: ensure leading 0x. */
325n/a if (buffer[1] == 'X')
326n/a buffer[1] = 'x';
327n/a else if (buffer[1] != 'x') {
328n/a memmove(buffer+2, buffer, strlen(buffer)+1);
329n/a buffer[0] = '0';
330n/a buffer[1] = 'x';
331n/a }
332n/a WRITE_BYTES(buffer);
333n/a break;
334n/a
335n/a case '%':
336n/a writer.min_size++;
337n/a *s++ = '%';
338n/a break;
339n/a
340n/a default:
341n/a if (*f == 0) {
342n/a /* fix min_size if we reached the end of the format string */
343n/a writer.min_size++;
344n/a }
345n/a
346n/a /* invalid format string: copy unformatted string and exit */
347n/a WRITE_BYTES(p);
348n/a return _PyBytesWriter_Finish(&writer, s);
349n/a }
350n/a }
351n/a
352n/a#undef WRITE_BYTES
353n/a
354n/a return _PyBytesWriter_Finish(&writer, s);
355n/a
356n/a error:
357n/a _PyBytesWriter_Dealloc(&writer);
358n/a return NULL;
359n/a}
360n/a
361n/aPyObject *
362n/aPyBytes_FromFormat(const char *format, ...)
363n/a{
364n/a PyObject* ret;
365n/a va_list vargs;
366n/a
367n/a#ifdef HAVE_STDARG_PROTOTYPES
368n/a va_start(vargs, format);
369n/a#else
370n/a va_start(vargs);
371n/a#endif
372n/a ret = PyBytes_FromFormatV(format, vargs);
373n/a va_end(vargs);
374n/a return ret;
375n/a}
376n/a
377n/a/* Helpers for formatstring */
378n/a
379n/aPy_LOCAL_INLINE(PyObject *)
380n/agetnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
381n/a{
382n/a Py_ssize_t argidx = *p_argidx;
383n/a if (argidx < arglen) {
384n/a (*p_argidx)++;
385n/a if (arglen < 0)
386n/a return args;
387n/a else
388n/a return PyTuple_GetItem(args, argidx);
389n/a }
390n/a PyErr_SetString(PyExc_TypeError,
391n/a "not enough arguments for format string");
392n/a return NULL;
393n/a}
394n/a
395n/a/* Format codes
396n/a * F_LJUST '-'
397n/a * F_SIGN '+'
398n/a * F_BLANK ' '
399n/a * F_ALT '#'
400n/a * F_ZERO '0'
401n/a */
402n/a#define F_LJUST (1<<0)
403n/a#define F_SIGN (1<<1)
404n/a#define F_BLANK (1<<2)
405n/a#define F_ALT (1<<3)
406n/a#define F_ZERO (1<<4)
407n/a
408n/a/* Returns a new reference to a PyBytes object, or NULL on failure. */
409n/a
410n/astatic char*
411n/aformatfloat(PyObject *v, int flags, int prec, int type,
412n/a PyObject **p_result, _PyBytesWriter *writer, char *str)
413n/a{
414n/a char *p;
415n/a PyObject *result;
416n/a double x;
417n/a size_t len;
418n/a
419n/a x = PyFloat_AsDouble(v);
420n/a if (x == -1.0 && PyErr_Occurred()) {
421n/a PyErr_Format(PyExc_TypeError, "float argument required, "
422n/a "not %.200s", Py_TYPE(v)->tp_name);
423n/a return NULL;
424n/a }
425n/a
426n/a if (prec < 0)
427n/a prec = 6;
428n/a
429n/a p = PyOS_double_to_string(x, type, prec,
430n/a (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431n/a
432n/a if (p == NULL)
433n/a return NULL;
434n/a
435n/a len = strlen(p);
436n/a if (writer != NULL) {
437n/a str = _PyBytesWriter_Prepare(writer, str, len);
438n/a if (str == NULL)
439n/a return NULL;
440n/a memcpy(str, p, len);
441n/a PyMem_Free(p);
442n/a str += len;
443n/a return str;
444n/a }
445n/a
446n/a result = PyBytes_FromStringAndSize(p, len);
447n/a PyMem_Free(p);
448n/a *p_result = result;
449n/a return str;
450n/a}
451n/a
452n/astatic PyObject *
453n/aformatlong(PyObject *v, int flags, int prec, int type)
454n/a{
455n/a PyObject *result, *iobj;
456n/a if (type == 'i')
457n/a type = 'd';
458n/a if (PyLong_Check(v))
459n/a return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
460n/a if (PyNumber_Check(v)) {
461n/a /* make sure number is a type of integer for o, x, and X */
462n/a if (type == 'o' || type == 'x' || type == 'X')
463n/a iobj = PyNumber_Index(v);
464n/a else
465n/a iobj = PyNumber_Long(v);
466n/a if (iobj == NULL) {
467n/a if (!PyErr_ExceptionMatches(PyExc_TypeError))
468n/a return NULL;
469n/a }
470n/a else if (!PyLong_Check(iobj))
471n/a Py_CLEAR(iobj);
472n/a if (iobj != NULL) {
473n/a result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
474n/a Py_DECREF(iobj);
475n/a return result;
476n/a }
477n/a }
478n/a PyErr_Format(PyExc_TypeError,
479n/a "%%%c format: %s is required, not %.200s", type,
480n/a (type == 'o' || type == 'x' || type == 'X') ? "an integer"
481n/a : "a number",
482n/a Py_TYPE(v)->tp_name);
483n/a return NULL;
484n/a}
485n/a
486n/astatic int
487n/abyte_converter(PyObject *arg, char *p)
488n/a{
489n/a if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
490n/a *p = PyBytes_AS_STRING(arg)[0];
491n/a return 1;
492n/a }
493n/a else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
494n/a *p = PyByteArray_AS_STRING(arg)[0];
495n/a return 1;
496n/a }
497n/a else {
498n/a PyObject *iobj;
499n/a long ival;
500n/a int overflow;
501n/a /* make sure number is a type of integer */
502n/a if (PyLong_Check(arg)) {
503n/a ival = PyLong_AsLongAndOverflow(arg, &overflow);
504n/a }
505n/a else {
506n/a iobj = PyNumber_Index(arg);
507n/a if (iobj == NULL) {
508n/a if (!PyErr_ExceptionMatches(PyExc_TypeError))
509n/a return 0;
510n/a goto onError;
511n/a }
512n/a ival = PyLong_AsLongAndOverflow(iobj, &overflow);
513n/a Py_DECREF(iobj);
514n/a }
515n/a if (!overflow && ival == -1 && PyErr_Occurred())
516n/a goto onError;
517n/a if (overflow || !(0 <= ival && ival <= 255)) {
518n/a PyErr_SetString(PyExc_OverflowError,
519n/a "%c arg not in range(256)");
520n/a return 0;
521n/a }
522n/a *p = (char)ival;
523n/a return 1;
524n/a }
525n/a onError:
526n/a PyErr_SetString(PyExc_TypeError,
527n/a "%c requires an integer in range(256) or a single byte");
528n/a return 0;
529n/a}
530n/a
531n/astatic PyObject *
532n/aformat_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
533n/a{
534n/a PyObject *func, *result;
535n/a _Py_IDENTIFIER(__bytes__);
536n/a /* is it a bytes object? */
537n/a if (PyBytes_Check(v)) {
538n/a *pbuf = PyBytes_AS_STRING(v);
539n/a *plen = PyBytes_GET_SIZE(v);
540n/a Py_INCREF(v);
541n/a return v;
542n/a }
543n/a if (PyByteArray_Check(v)) {
544n/a *pbuf = PyByteArray_AS_STRING(v);
545n/a *plen = PyByteArray_GET_SIZE(v);
546n/a Py_INCREF(v);
547n/a return v;
548n/a }
549n/a /* does it support __bytes__? */
550n/a func = _PyObject_LookupSpecial(v, &PyId___bytes__);
551n/a if (func != NULL) {
552n/a result = _PyObject_CallNoArg(func);
553n/a Py_DECREF(func);
554n/a if (result == NULL)
555n/a return NULL;
556n/a if (!PyBytes_Check(result)) {
557n/a PyErr_Format(PyExc_TypeError,
558n/a "__bytes__ returned non-bytes (type %.200s)",
559n/a Py_TYPE(result)->tp_name);
560n/a Py_DECREF(result);
561n/a return NULL;
562n/a }
563n/a *pbuf = PyBytes_AS_STRING(result);
564n/a *plen = PyBytes_GET_SIZE(result);
565n/a return result;
566n/a }
567n/a PyErr_Format(PyExc_TypeError,
568n/a "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
569n/a Py_TYPE(v)->tp_name);
570n/a return NULL;
571n/a}
572n/a
573n/a/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
574n/a
575n/aPyObject *
576n/a_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
577n/a PyObject *args, int use_bytearray)
578n/a{
579n/a const char *fmt;
580n/a char *res;
581n/a Py_ssize_t arglen, argidx;
582n/a Py_ssize_t fmtcnt;
583n/a int args_owned = 0;
584n/a PyObject *dict = NULL;
585n/a _PyBytesWriter writer;
586n/a
587n/a if (args == NULL) {
588n/a PyErr_BadInternalCall();
589n/a return NULL;
590n/a }
591n/a fmt = format;
592n/a fmtcnt = format_len;
593n/a
594n/a _PyBytesWriter_Init(&writer);
595n/a writer.use_bytearray = use_bytearray;
596n/a
597n/a res = _PyBytesWriter_Alloc(&writer, fmtcnt);
598n/a if (res == NULL)
599n/a return NULL;
600n/a if (!use_bytearray)
601n/a writer.overallocate = 1;
602n/a
603n/a if (PyTuple_Check(args)) {
604n/a arglen = PyTuple_GET_SIZE(args);
605n/a argidx = 0;
606n/a }
607n/a else {
608n/a arglen = -1;
609n/a argidx = -2;
610n/a }
611n/a if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
612n/a !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
613n/a !PyByteArray_Check(args)) {
614n/a dict = args;
615n/a }
616n/a
617n/a while (--fmtcnt >= 0) {
618n/a if (*fmt != '%') {
619n/a Py_ssize_t len;
620n/a char *pos;
621n/a
622n/a pos = strchr(fmt + 1, '%');
623n/a if (pos != NULL)
624n/a len = pos - fmt;
625n/a else
626n/a len = format_len - (fmt - format);
627n/a assert(len != 0);
628n/a
629n/a memcpy(res, fmt, len);
630n/a res += len;
631n/a fmt += len;
632n/a fmtcnt -= (len - 1);
633n/a }
634n/a else {
635n/a /* Got a format specifier */
636n/a int flags = 0;
637n/a Py_ssize_t width = -1;
638n/a int prec = -1;
639n/a int c = '\0';
640n/a int fill;
641n/a PyObject *v = NULL;
642n/a PyObject *temp = NULL;
643n/a const char *pbuf = NULL;
644n/a int sign;
645n/a Py_ssize_t len = 0;
646n/a char onechar; /* For byte_converter() */
647n/a Py_ssize_t alloc;
648n/a#ifdef Py_DEBUG
649n/a char *before;
650n/a#endif
651n/a
652n/a fmt++;
653n/a if (*fmt == '(') {
654n/a const char *keystart;
655n/a Py_ssize_t keylen;
656n/a PyObject *key;
657n/a int pcount = 1;
658n/a
659n/a if (dict == NULL) {
660n/a PyErr_SetString(PyExc_TypeError,
661n/a "format requires a mapping");
662n/a goto error;
663n/a }
664n/a ++fmt;
665n/a --fmtcnt;
666n/a keystart = fmt;
667n/a /* Skip over balanced parentheses */
668n/a while (pcount > 0 && --fmtcnt >= 0) {
669n/a if (*fmt == ')')
670n/a --pcount;
671n/a else if (*fmt == '(')
672n/a ++pcount;
673n/a fmt++;
674n/a }
675n/a keylen = fmt - keystart - 1;
676n/a if (fmtcnt < 0 || pcount > 0) {
677n/a PyErr_SetString(PyExc_ValueError,
678n/a "incomplete format key");
679n/a goto error;
680n/a }
681n/a key = PyBytes_FromStringAndSize(keystart,
682n/a keylen);
683n/a if (key == NULL)
684n/a goto error;
685n/a if (args_owned) {
686n/a Py_DECREF(args);
687n/a args_owned = 0;
688n/a }
689n/a args = PyObject_GetItem(dict, key);
690n/a Py_DECREF(key);
691n/a if (args == NULL) {
692n/a goto error;
693n/a }
694n/a args_owned = 1;
695n/a arglen = -1;
696n/a argidx = -2;
697n/a }
698n/a
699n/a /* Parse flags. Example: "%+i" => flags=F_SIGN. */
700n/a while (--fmtcnt >= 0) {
701n/a switch (c = *fmt++) {
702n/a case '-': flags |= F_LJUST; continue;
703n/a case '+': flags |= F_SIGN; continue;
704n/a case ' ': flags |= F_BLANK; continue;
705n/a case '#': flags |= F_ALT; continue;
706n/a case '0': flags |= F_ZERO; continue;
707n/a }
708n/a break;
709n/a }
710n/a
711n/a /* Parse width. Example: "%10s" => width=10 */
712n/a if (c == '*') {
713n/a v = getnextarg(args, arglen, &argidx);
714n/a if (v == NULL)
715n/a goto error;
716n/a if (!PyLong_Check(v)) {
717n/a PyErr_SetString(PyExc_TypeError,
718n/a "* wants int");
719n/a goto error;
720n/a }
721n/a width = PyLong_AsSsize_t(v);
722n/a if (width == -1 && PyErr_Occurred())
723n/a goto error;
724n/a if (width < 0) {
725n/a flags |= F_LJUST;
726n/a width = -width;
727n/a }
728n/a if (--fmtcnt >= 0)
729n/a c = *fmt++;
730n/a }
731n/a else if (c >= 0 && isdigit(c)) {
732n/a width = c - '0';
733n/a while (--fmtcnt >= 0) {
734n/a c = Py_CHARMASK(*fmt++);
735n/a if (!isdigit(c))
736n/a break;
737n/a if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
738n/a PyErr_SetString(
739n/a PyExc_ValueError,
740n/a "width too big");
741n/a goto error;
742n/a }
743n/a width = width*10 + (c - '0');
744n/a }
745n/a }
746n/a
747n/a /* Parse precision. Example: "%.3f" => prec=3 */
748n/a if (c == '.') {
749n/a prec = 0;
750n/a if (--fmtcnt >= 0)
751n/a c = *fmt++;
752n/a if (c == '*') {
753n/a v = getnextarg(args, arglen, &argidx);
754n/a if (v == NULL)
755n/a goto error;
756n/a if (!PyLong_Check(v)) {
757n/a PyErr_SetString(
758n/a PyExc_TypeError,
759n/a "* wants int");
760n/a goto error;
761n/a }
762n/a prec = _PyLong_AsInt(v);
763n/a if (prec == -1 && PyErr_Occurred())
764n/a goto error;
765n/a if (prec < 0)
766n/a prec = 0;
767n/a if (--fmtcnt >= 0)
768n/a c = *fmt++;
769n/a }
770n/a else if (c >= 0 && isdigit(c)) {
771n/a prec = c - '0';
772n/a while (--fmtcnt >= 0) {
773n/a c = Py_CHARMASK(*fmt++);
774n/a if (!isdigit(c))
775n/a break;
776n/a if (prec > (INT_MAX - ((int)c - '0')) / 10) {
777n/a PyErr_SetString(
778n/a PyExc_ValueError,
779n/a "prec too big");
780n/a goto error;
781n/a }
782n/a prec = prec*10 + (c - '0');
783n/a }
784n/a }
785n/a } /* prec */
786n/a if (fmtcnt >= 0) {
787n/a if (c == 'h' || c == 'l' || c == 'L') {
788n/a if (--fmtcnt >= 0)
789n/a c = *fmt++;
790n/a }
791n/a }
792n/a if (fmtcnt < 0) {
793n/a PyErr_SetString(PyExc_ValueError,
794n/a "incomplete format");
795n/a goto error;
796n/a }
797n/a if (c != '%') {
798n/a v = getnextarg(args, arglen, &argidx);
799n/a if (v == NULL)
800n/a goto error;
801n/a }
802n/a
803n/a if (fmtcnt < 0) {
804n/a /* last writer: disable writer overallocation */
805n/a writer.overallocate = 0;
806n/a }
807n/a
808n/a sign = 0;
809n/a fill = ' ';
810n/a switch (c) {
811n/a case '%':
812n/a *res++ = '%';
813n/a continue;
814n/a
815n/a case 'r':
816n/a // %r is only for 2/3 code; 3 only code should use %a
817n/a case 'a':
818n/a temp = PyObject_ASCII(v);
819n/a if (temp == NULL)
820n/a goto error;
821n/a assert(PyUnicode_IS_ASCII(temp));
822n/a pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
823n/a len = PyUnicode_GET_LENGTH(temp);
824n/a if (prec >= 0 && len > prec)
825n/a len = prec;
826n/a break;
827n/a
828n/a case 's':
829n/a // %s is only for 2/3 code; 3 only code should use %b
830n/a case 'b':
831n/a temp = format_obj(v, &pbuf, &len);
832n/a if (temp == NULL)
833n/a goto error;
834n/a if (prec >= 0 && len > prec)
835n/a len = prec;
836n/a break;
837n/a
838n/a case 'i':
839n/a case 'd':
840n/a case 'u':
841n/a case 'o':
842n/a case 'x':
843n/a case 'X':
844n/a if (PyLong_CheckExact(v)
845n/a && width == -1 && prec == -1
846n/a && !(flags & (F_SIGN | F_BLANK))
847n/a && c != 'X')
848n/a {
849n/a /* Fast path */
850n/a int alternate = flags & F_ALT;
851n/a int base;
852n/a
853n/a switch(c)
854n/a {
855n/a default:
856n/a assert(0 && "'type' not in [diuoxX]");
857n/a case 'd':
858n/a case 'i':
859n/a case 'u':
860n/a base = 10;
861n/a break;
862n/a case 'o':
863n/a base = 8;
864n/a break;
865n/a case 'x':
866n/a case 'X':
867n/a base = 16;
868n/a break;
869n/a }
870n/a
871n/a /* Fast path */
872n/a writer.min_size -= 2; /* size preallocated for "%d" */
873n/a res = _PyLong_FormatBytesWriter(&writer, res,
874n/a v, base, alternate);
875n/a if (res == NULL)
876n/a goto error;
877n/a continue;
878n/a }
879n/a
880n/a temp = formatlong(v, flags, prec, c);
881n/a if (!temp)
882n/a goto error;
883n/a assert(PyUnicode_IS_ASCII(temp));
884n/a pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
885n/a len = PyUnicode_GET_LENGTH(temp);
886n/a sign = 1;
887n/a if (flags & F_ZERO)
888n/a fill = '0';
889n/a break;
890n/a
891n/a case 'e':
892n/a case 'E':
893n/a case 'f':
894n/a case 'F':
895n/a case 'g':
896n/a case 'G':
897n/a if (width == -1 && prec == -1
898n/a && !(flags & (F_SIGN | F_BLANK)))
899n/a {
900n/a /* Fast path */
901n/a writer.min_size -= 2; /* size preallocated for "%f" */
902n/a res = formatfloat(v, flags, prec, c, NULL, &writer, res);
903n/a if (res == NULL)
904n/a goto error;
905n/a continue;
906n/a }
907n/a
908n/a if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
909n/a goto error;
910n/a pbuf = PyBytes_AS_STRING(temp);
911n/a len = PyBytes_GET_SIZE(temp);
912n/a sign = 1;
913n/a if (flags & F_ZERO)
914n/a fill = '0';
915n/a break;
916n/a
917n/a case 'c':
918n/a pbuf = &onechar;
919n/a len = byte_converter(v, &onechar);
920n/a if (!len)
921n/a goto error;
922n/a if (width == -1) {
923n/a /* Fast path */
924n/a *res++ = onechar;
925n/a continue;
926n/a }
927n/a break;
928n/a
929n/a default:
930n/a PyErr_Format(PyExc_ValueError,
931n/a "unsupported format character '%c' (0x%x) "
932n/a "at index %zd",
933n/a c, c,
934n/a (Py_ssize_t)(fmt - 1 - format));
935n/a goto error;
936n/a }
937n/a
938n/a if (sign) {
939n/a if (*pbuf == '-' || *pbuf == '+') {
940n/a sign = *pbuf++;
941n/a len--;
942n/a }
943n/a else if (flags & F_SIGN)
944n/a sign = '+';
945n/a else if (flags & F_BLANK)
946n/a sign = ' ';
947n/a else
948n/a sign = 0;
949n/a }
950n/a if (width < len)
951n/a width = len;
952n/a
953n/a alloc = width;
954n/a if (sign != 0 && len == width)
955n/a alloc++;
956n/a /* 2: size preallocated for %s */
957n/a if (alloc > 2) {
958n/a res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
959n/a if (res == NULL)
960n/a goto error;
961n/a }
962n/a#ifdef Py_DEBUG
963n/a before = res;
964n/a#endif
965n/a
966n/a /* Write the sign if needed */
967n/a if (sign) {
968n/a if (fill != ' ')
969n/a *res++ = sign;
970n/a if (width > len)
971n/a width--;
972n/a }
973n/a
974n/a /* Write the numeric prefix for "x", "X" and "o" formats
975n/a if the alternate form is used.
976n/a For example, write "0x" for the "%#x" format. */
977n/a if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
978n/a assert(pbuf[0] == '0');
979n/a assert(pbuf[1] == c);
980n/a if (fill != ' ') {
981n/a *res++ = *pbuf++;
982n/a *res++ = *pbuf++;
983n/a }
984n/a width -= 2;
985n/a if (width < 0)
986n/a width = 0;
987n/a len -= 2;
988n/a }
989n/a
990n/a /* Pad left with the fill character if needed */
991n/a if (width > len && !(flags & F_LJUST)) {
992n/a memset(res, fill, width - len);
993n/a res += (width - len);
994n/a width = len;
995n/a }
996n/a
997n/a /* If padding with spaces: write sign if needed and/or numeric
998n/a prefix if the alternate form is used */
999n/a if (fill == ' ') {
1000n/a if (sign)
1001n/a *res++ = sign;
1002n/a if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1003n/a assert(pbuf[0] == '0');
1004n/a assert(pbuf[1] == c);
1005n/a *res++ = *pbuf++;
1006n/a *res++ = *pbuf++;
1007n/a }
1008n/a }
1009n/a
1010n/a /* Copy bytes */
1011n/a memcpy(res, pbuf, len);
1012n/a res += len;
1013n/a
1014n/a /* Pad right with the fill character if needed */
1015n/a if (width > len) {
1016n/a memset(res, ' ', width - len);
1017n/a res += (width - len);
1018n/a }
1019n/a
1020n/a if (dict && (argidx < arglen) && c != '%') {
1021n/a PyErr_SetString(PyExc_TypeError,
1022n/a "not all arguments converted during bytes formatting");
1023n/a Py_XDECREF(temp);
1024n/a goto error;
1025n/a }
1026n/a Py_XDECREF(temp);
1027n/a
1028n/a#ifdef Py_DEBUG
1029n/a /* check that we computed the exact size for this write */
1030n/a assert((res - before) == alloc);
1031n/a#endif
1032n/a } /* '%' */
1033n/a
1034n/a /* If overallocation was disabled, ensure that it was the last
1035n/a write. Otherwise, we missed an optimization */
1036n/a assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
1037n/a } /* until end */
1038n/a
1039n/a if (argidx < arglen && !dict) {
1040n/a PyErr_SetString(PyExc_TypeError,
1041n/a "not all arguments converted during bytes formatting");
1042n/a goto error;
1043n/a }
1044n/a
1045n/a if (args_owned) {
1046n/a Py_DECREF(args);
1047n/a }
1048n/a return _PyBytesWriter_Finish(&writer, res);
1049n/a
1050n/a error:
1051n/a _PyBytesWriter_Dealloc(&writer);
1052n/a if (args_owned) {
1053n/a Py_DECREF(args);
1054n/a }
1055n/a return NULL;
1056n/a}
1057n/a
1058n/a/* =-= */
1059n/a
1060n/astatic void
1061n/abytes_dealloc(PyObject *op)
1062n/a{
1063n/a Py_TYPE(op)->tp_free(op);
1064n/a}
1065n/a
1066n/a/* Unescape a backslash-escaped string. If unicode is non-zero,
1067n/a the string is a u-literal. If recode_encoding is non-zero,
1068n/a the string is UTF-8 encoded and should be re-encoded in the
1069n/a specified encoding. */
1070n/a
1071n/astatic char *
1072n/a_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1073n/a const char *errors, const char *recode_encoding,
1074n/a _PyBytesWriter *writer, char *p)
1075n/a{
1076n/a PyObject *u, *w;
1077n/a const char* t;
1078n/a
1079n/a t = *s;
1080n/a /* Decode non-ASCII bytes as UTF-8. */
1081n/a while (t < end && (*t & 0x80))
1082n/a t++;
1083n/a u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1084n/a if (u == NULL)
1085n/a return NULL;
1086n/a
1087n/a /* Recode them in target encoding. */
1088n/a w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1089n/a Py_DECREF(u);
1090n/a if (w == NULL)
1091n/a return NULL;
1092n/a assert(PyBytes_Check(w));
1093n/a
1094n/a /* Append bytes to output buffer. */
1095n/a writer->min_size--; /* subtract 1 preallocated byte */
1096n/a p = _PyBytesWriter_WriteBytes(writer, p,
1097n/a PyBytes_AS_STRING(w),
1098n/a PyBytes_GET_SIZE(w));
1099n/a Py_DECREF(w);
1100n/a if (p == NULL)
1101n/a return NULL;
1102n/a
1103n/a *s = t;
1104n/a return p;
1105n/a}
1106n/a
1107n/aPyObject *_PyBytes_DecodeEscape(const char *s,
1108n/a Py_ssize_t len,
1109n/a const char *errors,
1110n/a Py_ssize_t unicode,
1111n/a const char *recode_encoding,
1112n/a const char **first_invalid_escape)
1113n/a{
1114n/a int c;
1115n/a char *p;
1116n/a const char *end;
1117n/a _PyBytesWriter writer;
1118n/a
1119n/a _PyBytesWriter_Init(&writer);
1120n/a
1121n/a p = _PyBytesWriter_Alloc(&writer, len);
1122n/a if (p == NULL)
1123n/a return NULL;
1124n/a writer.overallocate = 1;
1125n/a
1126n/a *first_invalid_escape = NULL;
1127n/a
1128n/a end = s + len;
1129n/a while (s < end) {
1130n/a if (*s != '\\') {
1131n/a non_esc:
1132n/a if (!(recode_encoding && (*s & 0x80))) {
1133n/a *p++ = *s++;
1134n/a }
1135n/a else {
1136n/a /* non-ASCII character and need to recode */
1137n/a p = _PyBytes_DecodeEscapeRecode(&s, end,
1138n/a errors, recode_encoding,
1139n/a &writer, p);
1140n/a if (p == NULL)
1141n/a goto failed;
1142n/a }
1143n/a continue;
1144n/a }
1145n/a
1146n/a s++;
1147n/a if (s == end) {
1148n/a PyErr_SetString(PyExc_ValueError,
1149n/a "Trailing \\ in string");
1150n/a goto failed;
1151n/a }
1152n/a
1153n/a switch (*s++) {
1154n/a /* XXX This assumes ASCII! */
1155n/a case '\n': break;
1156n/a case '\\': *p++ = '\\'; break;
1157n/a case '\'': *p++ = '\''; break;
1158n/a case '\"': *p++ = '\"'; break;
1159n/a case 'b': *p++ = '\b'; break;
1160n/a case 'f': *p++ = '\014'; break; /* FF */
1161n/a case 't': *p++ = '\t'; break;
1162n/a case 'n': *p++ = '\n'; break;
1163n/a case 'r': *p++ = '\r'; break;
1164n/a case 'v': *p++ = '\013'; break; /* VT */
1165n/a case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1166n/a case '0': case '1': case '2': case '3':
1167n/a case '4': case '5': case '6': case '7':
1168n/a c = s[-1] - '0';
1169n/a if (s < end && '0' <= *s && *s <= '7') {
1170n/a c = (c<<3) + *s++ - '0';
1171n/a if (s < end && '0' <= *s && *s <= '7')
1172n/a c = (c<<3) + *s++ - '0';
1173n/a }
1174n/a *p++ = c;
1175n/a break;
1176n/a case 'x':
1177n/a if (s+1 < end) {
1178n/a int digit1, digit2;
1179n/a digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1180n/a digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1181n/a if (digit1 < 16 && digit2 < 16) {
1182n/a *p++ = (unsigned char)((digit1 << 4) + digit2);
1183n/a s += 2;
1184n/a break;
1185n/a }
1186n/a }
1187n/a /* invalid hexadecimal digits */
1188n/a
1189n/a if (!errors || strcmp(errors, "strict") == 0) {
1190n/a PyErr_Format(PyExc_ValueError,
1191n/a "invalid \\x escape at position %d",
1192n/a s - 2 - (end - len));
1193n/a goto failed;
1194n/a }
1195n/a if (strcmp(errors, "replace") == 0) {
1196n/a *p++ = '?';
1197n/a } else if (strcmp(errors, "ignore") == 0)
1198n/a /* do nothing */;
1199n/a else {
1200n/a PyErr_Format(PyExc_ValueError,
1201n/a "decoding error; unknown "
1202n/a "error handling code: %.400s",
1203n/a errors);
1204n/a goto failed;
1205n/a }
1206n/a /* skip \x */
1207n/a if (s < end && Py_ISXDIGIT(s[0]))
1208n/a s++; /* and a hexdigit */
1209n/a break;
1210n/a
1211n/a default:
1212n/a if (*first_invalid_escape == NULL) {
1213n/a *first_invalid_escape = s-1; /* Back up one char, since we've
1214n/a already incremented s. */
1215n/a }
1216n/a *p++ = '\\';
1217n/a s--;
1218n/a goto non_esc; /* an arbitrary number of unescaped
1219n/a UTF-8 bytes may follow. */
1220n/a }
1221n/a }
1222n/a
1223n/a return _PyBytesWriter_Finish(&writer, p);
1224n/a
1225n/a failed:
1226n/a _PyBytesWriter_Dealloc(&writer);
1227n/a return NULL;
1228n/a}
1229n/a
1230n/aPyObject *PyBytes_DecodeEscape(const char *s,
1231n/a Py_ssize_t len,
1232n/a const char *errors,
1233n/a Py_ssize_t unicode,
1234n/a const char *recode_encoding)
1235n/a{
1236n/a const char* first_invalid_escape;
1237n/a PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1238n/a recode_encoding,
1239n/a &first_invalid_escape);
1240n/a if (result == NULL)
1241n/a return NULL;
1242n/a if (first_invalid_escape != NULL) {
1243n/a if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1244n/a "invalid escape sequence '\\%c'",
1245n/a *first_invalid_escape) < 0) {
1246n/a Py_DECREF(result);
1247n/a return NULL;
1248n/a }
1249n/a }
1250n/a return result;
1251n/a
1252n/a}
1253n/a/* -------------------------------------------------------------------- */
1254n/a/* object api */
1255n/a
1256n/aPy_ssize_t
1257n/aPyBytes_Size(PyObject *op)
1258n/a{
1259n/a if (!PyBytes_Check(op)) {
1260n/a PyErr_Format(PyExc_TypeError,
1261n/a "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1262n/a return -1;
1263n/a }
1264n/a return Py_SIZE(op);
1265n/a}
1266n/a
1267n/achar *
1268n/aPyBytes_AsString(PyObject *op)
1269n/a{
1270n/a if (!PyBytes_Check(op)) {
1271n/a PyErr_Format(PyExc_TypeError,
1272n/a "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1273n/a return NULL;
1274n/a }
1275n/a return ((PyBytesObject *)op)->ob_sval;
1276n/a}
1277n/a
1278n/aint
1279n/aPyBytes_AsStringAndSize(PyObject *obj,
1280n/a char **s,
1281n/a Py_ssize_t *len)
1282n/a{
1283n/a if (s == NULL) {
1284n/a PyErr_BadInternalCall();
1285n/a return -1;
1286n/a }
1287n/a
1288n/a if (!PyBytes_Check(obj)) {
1289n/a PyErr_Format(PyExc_TypeError,
1290n/a "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1291n/a return -1;
1292n/a }
1293n/a
1294n/a *s = PyBytes_AS_STRING(obj);
1295n/a if (len != NULL)
1296n/a *len = PyBytes_GET_SIZE(obj);
1297n/a else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1298n/a PyErr_SetString(PyExc_ValueError,
1299n/a "embedded null byte");
1300n/a return -1;
1301n/a }
1302n/a return 0;
1303n/a}
1304n/a
1305n/a/* -------------------------------------------------------------------- */
1306n/a/* Methods */
1307n/a
1308n/a#include "stringlib/stringdefs.h"
1309n/a
1310n/a#include "stringlib/fastsearch.h"
1311n/a#include "stringlib/count.h"
1312n/a#include "stringlib/find.h"
1313n/a#include "stringlib/join.h"
1314n/a#include "stringlib/partition.h"
1315n/a#include "stringlib/split.h"
1316n/a#include "stringlib/ctype.h"
1317n/a
1318n/a#include "stringlib/transmogrify.h"
1319n/a
1320n/aPyObject *
1321n/aPyBytes_Repr(PyObject *obj, int smartquotes)
1322n/a{
1323n/a PyBytesObject* op = (PyBytesObject*) obj;
1324n/a Py_ssize_t i, length = Py_SIZE(op);
1325n/a Py_ssize_t newsize, squotes, dquotes;
1326n/a PyObject *v;
1327n/a unsigned char quote, *s, *p;
1328n/a
1329n/a /* Compute size of output string */
1330n/a squotes = dquotes = 0;
1331n/a newsize = 3; /* b'' */
1332n/a s = (unsigned char*)op->ob_sval;
1333n/a for (i = 0; i < length; i++) {
1334n/a Py_ssize_t incr = 1;
1335n/a switch(s[i]) {
1336n/a case '\'': squotes++; break;
1337n/a case '"': dquotes++; break;
1338n/a case '\\': case '\t': case '\n': case '\r':
1339n/a incr = 2; break; /* \C */
1340n/a default:
1341n/a if (s[i] < ' ' || s[i] >= 0x7f)
1342n/a incr = 4; /* \xHH */
1343n/a }
1344n/a if (newsize > PY_SSIZE_T_MAX - incr)
1345n/a goto overflow;
1346n/a newsize += incr;
1347n/a }
1348n/a quote = '\'';
1349n/a if (smartquotes && squotes && !dquotes)
1350n/a quote = '"';
1351n/a if (squotes && quote == '\'') {
1352n/a if (newsize > PY_SSIZE_T_MAX - squotes)
1353n/a goto overflow;
1354n/a newsize += squotes;
1355n/a }
1356n/a
1357n/a v = PyUnicode_New(newsize, 127);
1358n/a if (v == NULL) {
1359n/a return NULL;
1360n/a }
1361n/a p = PyUnicode_1BYTE_DATA(v);
1362n/a
1363n/a *p++ = 'b', *p++ = quote;
1364n/a for (i = 0; i < length; i++) {
1365n/a unsigned char c = op->ob_sval[i];
1366n/a if (c == quote || c == '\\')
1367n/a *p++ = '\\', *p++ = c;
1368n/a else if (c == '\t')
1369n/a *p++ = '\\', *p++ = 't';
1370n/a else if (c == '\n')
1371n/a *p++ = '\\', *p++ = 'n';
1372n/a else if (c == '\r')
1373n/a *p++ = '\\', *p++ = 'r';
1374n/a else if (c < ' ' || c >= 0x7f) {
1375n/a *p++ = '\\';
1376n/a *p++ = 'x';
1377n/a *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1378n/a *p++ = Py_hexdigits[c & 0xf];
1379n/a }
1380n/a else
1381n/a *p++ = c;
1382n/a }
1383n/a *p++ = quote;
1384n/a assert(_PyUnicode_CheckConsistency(v, 1));
1385n/a return v;
1386n/a
1387n/a overflow:
1388n/a PyErr_SetString(PyExc_OverflowError,
1389n/a "bytes object is too large to make repr");
1390n/a return NULL;
1391n/a}
1392n/a
1393n/astatic PyObject *
1394n/abytes_repr(PyObject *op)
1395n/a{
1396n/a return PyBytes_Repr(op, 1);
1397n/a}
1398n/a
1399n/astatic PyObject *
1400n/abytes_str(PyObject *op)
1401n/a{
1402n/a if (Py_BytesWarningFlag) {
1403n/a if (PyErr_WarnEx(PyExc_BytesWarning,
1404n/a "str() on a bytes instance", 1))
1405n/a return NULL;
1406n/a }
1407n/a return bytes_repr(op);
1408n/a}
1409n/a
1410n/astatic Py_ssize_t
1411n/abytes_length(PyBytesObject *a)
1412n/a{
1413n/a return Py_SIZE(a);
1414n/a}
1415n/a
1416n/a/* This is also used by PyBytes_Concat() */
1417n/astatic PyObject *
1418n/abytes_concat(PyObject *a, PyObject *b)
1419n/a{
1420n/a Py_buffer va, vb;
1421n/a PyObject *result = NULL;
1422n/a
1423n/a va.len = -1;
1424n/a vb.len = -1;
1425n/a if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1426n/a PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1427n/a PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1428n/a Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1429n/a goto done;
1430n/a }
1431n/a
1432n/a /* Optimize end cases */
1433n/a if (va.len == 0 && PyBytes_CheckExact(b)) {
1434n/a result = b;
1435n/a Py_INCREF(result);
1436n/a goto done;
1437n/a }
1438n/a if (vb.len == 0 && PyBytes_CheckExact(a)) {
1439n/a result = a;
1440n/a Py_INCREF(result);
1441n/a goto done;
1442n/a }
1443n/a
1444n/a if (va.len > PY_SSIZE_T_MAX - vb.len) {
1445n/a PyErr_NoMemory();
1446n/a goto done;
1447n/a }
1448n/a
1449n/a result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1450n/a if (result != NULL) {
1451n/a memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1452n/a memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1453n/a }
1454n/a
1455n/a done:
1456n/a if (va.len != -1)
1457n/a PyBuffer_Release(&va);
1458n/a if (vb.len != -1)
1459n/a PyBuffer_Release(&vb);
1460n/a return result;
1461n/a}
1462n/a
1463n/astatic PyObject *
1464n/abytes_repeat(PyBytesObject *a, Py_ssize_t n)
1465n/a{
1466n/a Py_ssize_t i;
1467n/a Py_ssize_t j;
1468n/a Py_ssize_t size;
1469n/a PyBytesObject *op;
1470n/a size_t nbytes;
1471n/a if (n < 0)
1472n/a n = 0;
1473n/a /* watch out for overflows: the size can overflow int,
1474n/a * and the # of bytes needed can overflow size_t
1475n/a */
1476n/a if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1477n/a PyErr_SetString(PyExc_OverflowError,
1478n/a "repeated bytes are too long");
1479n/a return NULL;
1480n/a }
1481n/a size = Py_SIZE(a) * n;
1482n/a if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1483n/a Py_INCREF(a);
1484n/a return (PyObject *)a;
1485n/a }
1486n/a nbytes = (size_t)size;
1487n/a if (nbytes + PyBytesObject_SIZE <= nbytes) {
1488n/a PyErr_SetString(PyExc_OverflowError,
1489n/a "repeated bytes are too long");
1490n/a return NULL;
1491n/a }
1492n/a op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1493n/a if (op == NULL)
1494n/a return PyErr_NoMemory();
1495n/a (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
1496n/a op->ob_shash = -1;
1497n/a op->ob_sval[size] = '\0';
1498n/a if (Py_SIZE(a) == 1 && n > 0) {
1499n/a memset(op->ob_sval, a->ob_sval[0] , n);
1500n/a return (PyObject *) op;
1501n/a }
1502n/a i = 0;
1503n/a if (i < size) {
1504n/a memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
1505n/a i = Py_SIZE(a);
1506n/a }
1507n/a while (i < size) {
1508n/a j = (i <= size-i) ? i : size-i;
1509n/a memcpy(op->ob_sval+i, op->ob_sval, j);
1510n/a i += j;
1511n/a }
1512n/a return (PyObject *) op;
1513n/a}
1514n/a
1515n/astatic int
1516n/abytes_contains(PyObject *self, PyObject *arg)
1517n/a{
1518n/a return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1519n/a}
1520n/a
1521n/astatic PyObject *
1522n/abytes_item(PyBytesObject *a, Py_ssize_t i)
1523n/a{
1524n/a if (i < 0 || i >= Py_SIZE(a)) {
1525n/a PyErr_SetString(PyExc_IndexError, "index out of range");
1526n/a return NULL;
1527n/a }
1528n/a return PyLong_FromLong((unsigned char)a->ob_sval[i]);
1529n/a}
1530n/a
1531n/astatic int
1532n/abytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1533n/a{
1534n/a int cmp;
1535n/a Py_ssize_t len;
1536n/a
1537n/a len = Py_SIZE(a);
1538n/a if (Py_SIZE(b) != len)
1539n/a return 0;
1540n/a
1541n/a if (a->ob_sval[0] != b->ob_sval[0])
1542n/a return 0;
1543n/a
1544n/a cmp = memcmp(a->ob_sval, b->ob_sval, len);
1545n/a return (cmp == 0);
1546n/a}
1547n/a
1548n/astatic PyObject*
1549n/abytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1550n/a{
1551n/a int c;
1552n/a Py_ssize_t len_a, len_b;
1553n/a Py_ssize_t min_len;
1554n/a PyObject *result;
1555n/a int rc;
1556n/a
1557n/a /* Make sure both arguments are strings. */
1558n/a if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1559n/a if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
1560n/a rc = PyObject_IsInstance((PyObject*)a,
1561n/a (PyObject*)&PyUnicode_Type);
1562n/a if (!rc)
1563n/a rc = PyObject_IsInstance((PyObject*)b,
1564n/a (PyObject*)&PyUnicode_Type);
1565n/a if (rc < 0)
1566n/a return NULL;
1567n/a if (rc) {
1568n/a if (PyErr_WarnEx(PyExc_BytesWarning,
1569n/a "Comparison between bytes and string", 1))
1570n/a return NULL;
1571n/a }
1572n/a else {
1573n/a rc = PyObject_IsInstance((PyObject*)a,
1574n/a (PyObject*)&PyLong_Type);
1575n/a if (!rc)
1576n/a rc = PyObject_IsInstance((PyObject*)b,
1577n/a (PyObject*)&PyLong_Type);
1578n/a if (rc < 0)
1579n/a return NULL;
1580n/a if (rc) {
1581n/a if (PyErr_WarnEx(PyExc_BytesWarning,
1582n/a "Comparison between bytes and int", 1))
1583n/a return NULL;
1584n/a }
1585n/a }
1586n/a }
1587n/a result = Py_NotImplemented;
1588n/a }
1589n/a else if (a == b) {
1590n/a switch (op) {
1591n/a case Py_EQ:
1592n/a case Py_LE:
1593n/a case Py_GE:
1594n/a /* a string is equal to itself */
1595n/a result = Py_True;
1596n/a break;
1597n/a case Py_NE:
1598n/a case Py_LT:
1599n/a case Py_GT:
1600n/a result = Py_False;
1601n/a break;
1602n/a default:
1603n/a PyErr_BadArgument();
1604n/a return NULL;
1605n/a }
1606n/a }
1607n/a else if (op == Py_EQ || op == Py_NE) {
1608n/a int eq = bytes_compare_eq(a, b);
1609n/a eq ^= (op == Py_NE);
1610n/a result = eq ? Py_True : Py_False;
1611n/a }
1612n/a else {
1613n/a len_a = Py_SIZE(a);
1614n/a len_b = Py_SIZE(b);
1615n/a min_len = Py_MIN(len_a, len_b);
1616n/a if (min_len > 0) {
1617n/a c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1618n/a if (c == 0)
1619n/a c = memcmp(a->ob_sval, b->ob_sval, min_len);
1620n/a }
1621n/a else
1622n/a c = 0;
1623n/a if (c == 0)
1624n/a c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1625n/a switch (op) {
1626n/a case Py_LT: c = c < 0; break;
1627n/a case Py_LE: c = c <= 0; break;
1628n/a case Py_GT: c = c > 0; break;
1629n/a case Py_GE: c = c >= 0; break;
1630n/a default:
1631n/a PyErr_BadArgument();
1632n/a return NULL;
1633n/a }
1634n/a result = c ? Py_True : Py_False;
1635n/a }
1636n/a
1637n/a Py_INCREF(result);
1638n/a return result;
1639n/a}
1640n/a
1641n/astatic Py_hash_t
1642n/abytes_hash(PyBytesObject *a)
1643n/a{
1644n/a if (a->ob_shash == -1) {
1645n/a /* Can't fail */
1646n/a a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1647n/a }
1648n/a return a->ob_shash;
1649n/a}
1650n/a
1651n/astatic PyObject*
1652n/abytes_subscript(PyBytesObject* self, PyObject* item)
1653n/a{
1654n/a if (PyIndex_Check(item)) {
1655n/a Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1656n/a if (i == -1 && PyErr_Occurred())
1657n/a return NULL;
1658n/a if (i < 0)
1659n/a i += PyBytes_GET_SIZE(self);
1660n/a if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1661n/a PyErr_SetString(PyExc_IndexError,
1662n/a "index out of range");
1663n/a return NULL;
1664n/a }
1665n/a return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1666n/a }
1667n/a else if (PySlice_Check(item)) {
1668n/a Py_ssize_t start, stop, step, slicelength, cur, i;
1669n/a char* source_buf;
1670n/a char* result_buf;
1671n/a PyObject* result;
1672n/a
1673n/a if (PySlice_GetIndicesEx(item,
1674n/a PyBytes_GET_SIZE(self),
1675n/a &start, &stop, &step, &slicelength) < 0) {
1676n/a return NULL;
1677n/a }
1678n/a
1679n/a if (slicelength <= 0) {
1680n/a return PyBytes_FromStringAndSize("", 0);
1681n/a }
1682n/a else if (start == 0 && step == 1 &&
1683n/a slicelength == PyBytes_GET_SIZE(self) &&
1684n/a PyBytes_CheckExact(self)) {
1685n/a Py_INCREF(self);
1686n/a return (PyObject *)self;
1687n/a }
1688n/a else if (step == 1) {
1689n/a return PyBytes_FromStringAndSize(
1690n/a PyBytes_AS_STRING(self) + start,
1691n/a slicelength);
1692n/a }
1693n/a else {
1694n/a source_buf = PyBytes_AS_STRING(self);
1695n/a result = PyBytes_FromStringAndSize(NULL, slicelength);
1696n/a if (result == NULL)
1697n/a return NULL;
1698n/a
1699n/a result_buf = PyBytes_AS_STRING(result);
1700n/a for (cur = start, i = 0; i < slicelength;
1701n/a cur += step, i++) {
1702n/a result_buf[i] = source_buf[cur];
1703n/a }
1704n/a
1705n/a return result;
1706n/a }
1707n/a }
1708n/a else {
1709n/a PyErr_Format(PyExc_TypeError,
1710n/a "byte indices must be integers or slices, not %.200s",
1711n/a Py_TYPE(item)->tp_name);
1712n/a return NULL;
1713n/a }
1714n/a}
1715n/a
1716n/astatic int
1717n/abytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1718n/a{
1719n/a return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1720n/a 1, flags);
1721n/a}
1722n/a
1723n/astatic PySequenceMethods bytes_as_sequence = {
1724n/a (lenfunc)bytes_length, /*sq_length*/
1725n/a (binaryfunc)bytes_concat, /*sq_concat*/
1726n/a (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1727n/a (ssizeargfunc)bytes_item, /*sq_item*/
1728n/a 0, /*sq_slice*/
1729n/a 0, /*sq_ass_item*/
1730n/a 0, /*sq_ass_slice*/
1731n/a (objobjproc)bytes_contains /*sq_contains*/
1732n/a};
1733n/a
1734n/astatic PyMappingMethods bytes_as_mapping = {
1735n/a (lenfunc)bytes_length,
1736n/a (binaryfunc)bytes_subscript,
1737n/a 0,
1738n/a};
1739n/a
1740n/astatic PyBufferProcs bytes_as_buffer = {
1741n/a (getbufferproc)bytes_buffer_getbuffer,
1742n/a NULL,
1743n/a};
1744n/a
1745n/a
1746n/a#define LEFTSTRIP 0
1747n/a#define RIGHTSTRIP 1
1748n/a#define BOTHSTRIP 2
1749n/a
1750n/a/*[clinic input]
1751n/abytes.split
1752n/a
1753n/a sep: object = None
1754n/a The delimiter according which to split the bytes.
1755n/a None (the default value) means split on ASCII whitespace characters
1756n/a (space, tab, return, newline, formfeed, vertical tab).
1757n/a maxsplit: Py_ssize_t = -1
1758n/a Maximum number of splits to do.
1759n/a -1 (the default value) means no limit.
1760n/a
1761n/aReturn a list of the sections in the bytes, using sep as the delimiter.
1762n/a[clinic start generated code]*/
1763n/a
1764n/astatic PyObject *
1765n/abytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1766n/a/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1767n/a{
1768n/a Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1769n/a const char *s = PyBytes_AS_STRING(self), *sub;
1770n/a Py_buffer vsub;
1771n/a PyObject *list;
1772n/a
1773n/a if (maxsplit < 0)
1774n/a maxsplit = PY_SSIZE_T_MAX;
1775n/a if (sep == Py_None)
1776n/a return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1777n/a if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1778n/a return NULL;
1779n/a sub = vsub.buf;
1780n/a n = vsub.len;
1781n/a
1782n/a list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1783n/a PyBuffer_Release(&vsub);
1784n/a return list;
1785n/a}
1786n/a
1787n/a/*[clinic input]
1788n/abytes.partition
1789n/a
1790n/a sep: Py_buffer
1791n/a /
1792n/a
1793n/aPartition the bytes into three parts using the given separator.
1794n/a
1795n/aThis will search for the separator sep in the bytes. If the separator is found,
1796n/areturns a 3-tuple containing the part before the separator, the separator
1797n/aitself, and the part after it.
1798n/a
1799n/aIf the separator is not found, returns a 3-tuple containing the original bytes
1800n/aobject and two empty bytes objects.
1801n/a[clinic start generated code]*/
1802n/a
1803n/astatic PyObject *
1804n/abytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1805n/a/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1806n/a{
1807n/a return stringlib_partition(
1808n/a (PyObject*) self,
1809n/a PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1810n/a sep->obj, (const char *)sep->buf, sep->len
1811n/a );
1812n/a}
1813n/a
1814n/a/*[clinic input]
1815n/abytes.rpartition
1816n/a
1817n/a sep: Py_buffer
1818n/a /
1819n/a
1820n/aPartition the bytes into three parts using the given separator.
1821n/a
1822n/aThis will search for the separator sep in the bytes, starting and the end. If
1823n/athe separator is found, returns a 3-tuple containing the part before the
1824n/aseparator, the separator itself, and the part after it.
1825n/a
1826n/aIf the separator is not found, returns a 3-tuple containing two empty bytes
1827n/aobjects and the original bytes object.
1828n/a[clinic start generated code]*/
1829n/a
1830n/astatic PyObject *
1831n/abytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1832n/a/*[clinic end generated code: output=191b114cbb028e50 input=67f689e63a62d478]*/
1833n/a{
1834n/a return stringlib_rpartition(
1835n/a (PyObject*) self,
1836n/a PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1837n/a sep->obj, (const char *)sep->buf, sep->len
1838n/a );
1839n/a}
1840n/a
1841n/a/*[clinic input]
1842n/abytes.rsplit = bytes.split
1843n/a
1844n/aReturn a list of the sections in the bytes, using sep as the delimiter.
1845n/a
1846n/aSplitting is done starting at the end of the bytes and working to the front.
1847n/a[clinic start generated code]*/
1848n/a
1849n/astatic PyObject *
1850n/abytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1851n/a/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1852n/a{
1853n/a Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1854n/a const char *s = PyBytes_AS_STRING(self), *sub;
1855n/a Py_buffer vsub;
1856n/a PyObject *list;
1857n/a
1858n/a if (maxsplit < 0)
1859n/a maxsplit = PY_SSIZE_T_MAX;
1860n/a if (sep == Py_None)
1861n/a return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1862n/a if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1863n/a return NULL;
1864n/a sub = vsub.buf;
1865n/a n = vsub.len;
1866n/a
1867n/a list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1868n/a PyBuffer_Release(&vsub);
1869n/a return list;
1870n/a}
1871n/a
1872n/a
1873n/a/*[clinic input]
1874n/abytes.join
1875n/a
1876n/a iterable_of_bytes: object
1877n/a /
1878n/a
1879n/aConcatenate any number of bytes objects.
1880n/a
1881n/aThe bytes whose method is called is inserted in between each pair.
1882n/a
1883n/aThe result is returned as a new bytes object.
1884n/a
1885n/aExample: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1886n/a[clinic start generated code]*/
1887n/a
1888n/astatic PyObject *
1889n/abytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1890n/a/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1891n/a{
1892n/a return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1893n/a}
1894n/a
1895n/aPyObject *
1896n/a_PyBytes_Join(PyObject *sep, PyObject *x)
1897n/a{
1898n/a assert(sep != NULL && PyBytes_Check(sep));
1899n/a assert(x != NULL);
1900n/a return bytes_join((PyBytesObject*)sep, x);
1901n/a}
1902n/a
1903n/astatic PyObject *
1904n/abytes_find(PyBytesObject *self, PyObject *args)
1905n/a{
1906n/a return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1907n/a}
1908n/a
1909n/astatic PyObject *
1910n/abytes_index(PyBytesObject *self, PyObject *args)
1911n/a{
1912n/a return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1913n/a}
1914n/a
1915n/a
1916n/astatic PyObject *
1917n/abytes_rfind(PyBytesObject *self, PyObject *args)
1918n/a{
1919n/a return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1920n/a}
1921n/a
1922n/a
1923n/astatic PyObject *
1924n/abytes_rindex(PyBytesObject *self, PyObject *args)
1925n/a{
1926n/a return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1927n/a}
1928n/a
1929n/a
1930n/aPy_LOCAL_INLINE(PyObject *)
1931n/ado_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1932n/a{
1933n/a Py_buffer vsep;
1934n/a char *s = PyBytes_AS_STRING(self);
1935n/a Py_ssize_t len = PyBytes_GET_SIZE(self);
1936n/a char *sep;
1937n/a Py_ssize_t seplen;
1938n/a Py_ssize_t i, j;
1939n/a
1940n/a if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1941n/a return NULL;
1942n/a sep = vsep.buf;
1943n/a seplen = vsep.len;
1944n/a
1945n/a i = 0;
1946n/a if (striptype != RIGHTSTRIP) {
1947n/a while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1948n/a i++;
1949n/a }
1950n/a }
1951n/a
1952n/a j = len;
1953n/a if (striptype != LEFTSTRIP) {
1954n/a do {
1955n/a j--;
1956n/a } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1957n/a j++;
1958n/a }
1959n/a
1960n/a PyBuffer_Release(&vsep);
1961n/a
1962n/a if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1963n/a Py_INCREF(self);
1964n/a return (PyObject*)self;
1965n/a }
1966n/a else
1967n/a return PyBytes_FromStringAndSize(s+i, j-i);
1968n/a}
1969n/a
1970n/a
1971n/aPy_LOCAL_INLINE(PyObject *)
1972n/ado_strip(PyBytesObject *self, int striptype)
1973n/a{
1974n/a char *s = PyBytes_AS_STRING(self);
1975n/a Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1976n/a
1977n/a i = 0;
1978n/a if (striptype != RIGHTSTRIP) {
1979n/a while (i < len && Py_ISSPACE(s[i])) {
1980n/a i++;
1981n/a }
1982n/a }
1983n/a
1984n/a j = len;
1985n/a if (striptype != LEFTSTRIP) {
1986n/a do {
1987n/a j--;
1988n/a } while (j >= i && Py_ISSPACE(s[j]));
1989n/a j++;
1990n/a }
1991n/a
1992n/a if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1993n/a Py_INCREF(self);
1994n/a return (PyObject*)self;
1995n/a }
1996n/a else
1997n/a return PyBytes_FromStringAndSize(s+i, j-i);
1998n/a}
1999n/a
2000n/a
2001n/aPy_LOCAL_INLINE(PyObject *)
2002n/ado_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2003n/a{
2004n/a if (bytes != NULL && bytes != Py_None) {
2005n/a return do_xstrip(self, striptype, bytes);
2006n/a }
2007n/a return do_strip(self, striptype);
2008n/a}
2009n/a
2010n/a/*[clinic input]
2011n/abytes.strip
2012n/a
2013n/a bytes: object = None
2014n/a /
2015n/a
2016n/aStrip leading and trailing bytes contained in the argument.
2017n/a
2018n/aIf the argument is omitted or None, strip leading and trailing ASCII whitespace.
2019n/a[clinic start generated code]*/
2020n/a
2021n/astatic PyObject *
2022n/abytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2023n/a/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
2024n/a{
2025n/a return do_argstrip(self, BOTHSTRIP, bytes);
2026n/a}
2027n/a
2028n/a/*[clinic input]
2029n/abytes.lstrip
2030n/a
2031n/a bytes: object = None
2032n/a /
2033n/a
2034n/aStrip leading bytes contained in the argument.
2035n/a
2036n/aIf the argument is omitted or None, strip leading ASCII whitespace.
2037n/a[clinic start generated code]*/
2038n/a
2039n/astatic PyObject *
2040n/abytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2041n/a/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2042n/a{
2043n/a return do_argstrip(self, LEFTSTRIP, bytes);
2044n/a}
2045n/a
2046n/a/*[clinic input]
2047n/abytes.rstrip
2048n/a
2049n/a bytes: object = None
2050n/a /
2051n/a
2052n/aStrip trailing bytes contained in the argument.
2053n/a
2054n/aIf the argument is omitted or None, strip trailing ASCII whitespace.
2055n/a[clinic start generated code]*/
2056n/a
2057n/astatic PyObject *
2058n/abytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2059n/a/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2060n/a{
2061n/a return do_argstrip(self, RIGHTSTRIP, bytes);
2062n/a}
2063n/a
2064n/a
2065n/astatic PyObject *
2066n/abytes_count(PyBytesObject *self, PyObject *args)
2067n/a{
2068n/a return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2069n/a}
2070n/a
2071n/a
2072n/a/*[clinic input]
2073n/abytes.translate
2074n/a
2075n/a table: object
2076n/a Translation table, which must be a bytes object of length 256.
2077n/a /
2078n/a delete as deletechars: object(c_default="NULL") = b''
2079n/a
2080n/aReturn a copy with each character mapped by the given translation table.
2081n/a
2082n/aAll characters occurring in the optional argument delete are removed.
2083n/aThe remaining characters are mapped through the given translation table.
2084n/a[clinic start generated code]*/
2085n/a
2086n/astatic PyObject *
2087n/abytes_translate_impl(PyBytesObject *self, PyObject *table,
2088n/a PyObject *deletechars)
2089n/a/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2090n/a{
2091n/a char *input, *output;
2092n/a Py_buffer table_view = {NULL, NULL};
2093n/a Py_buffer del_table_view = {NULL, NULL};
2094n/a const char *table_chars;
2095n/a Py_ssize_t i, c, changed = 0;
2096n/a PyObject *input_obj = (PyObject*)self;
2097n/a const char *output_start, *del_table_chars=NULL;
2098n/a Py_ssize_t inlen, tablen, dellen = 0;
2099n/a PyObject *result;
2100n/a int trans_table[256];
2101n/a
2102n/a if (PyBytes_Check(table)) {
2103n/a table_chars = PyBytes_AS_STRING(table);
2104n/a tablen = PyBytes_GET_SIZE(table);
2105n/a }
2106n/a else if (table == Py_None) {
2107n/a table_chars = NULL;
2108n/a tablen = 256;
2109n/a }
2110n/a else {
2111n/a if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2112n/a return NULL;
2113n/a table_chars = table_view.buf;
2114n/a tablen = table_view.len;
2115n/a }
2116n/a
2117n/a if (tablen != 256) {
2118n/a PyErr_SetString(PyExc_ValueError,
2119n/a "translation table must be 256 characters long");
2120n/a PyBuffer_Release(&table_view);
2121n/a return NULL;
2122n/a }
2123n/a
2124n/a if (deletechars != NULL) {
2125n/a if (PyBytes_Check(deletechars)) {
2126n/a del_table_chars = PyBytes_AS_STRING(deletechars);
2127n/a dellen = PyBytes_GET_SIZE(deletechars);
2128n/a }
2129n/a else {
2130n/a if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2131n/a PyBuffer_Release(&table_view);
2132n/a return NULL;
2133n/a }
2134n/a del_table_chars = del_table_view.buf;
2135n/a dellen = del_table_view.len;
2136n/a }
2137n/a }
2138n/a else {
2139n/a del_table_chars = NULL;
2140n/a dellen = 0;
2141n/a }
2142n/a
2143n/a inlen = PyBytes_GET_SIZE(input_obj);
2144n/a result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2145n/a if (result == NULL) {
2146n/a PyBuffer_Release(&del_table_view);
2147n/a PyBuffer_Release(&table_view);
2148n/a return NULL;
2149n/a }
2150n/a output_start = output = PyBytes_AS_STRING(result);
2151n/a input = PyBytes_AS_STRING(input_obj);
2152n/a
2153n/a if (dellen == 0 && table_chars != NULL) {
2154n/a /* If no deletions are required, use faster code */
2155n/a for (i = inlen; --i >= 0; ) {
2156n/a c = Py_CHARMASK(*input++);
2157n/a if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2158n/a changed = 1;
2159n/a }
2160n/a if (!changed && PyBytes_CheckExact(input_obj)) {
2161n/a Py_INCREF(input_obj);
2162n/a Py_DECREF(result);
2163n/a result = input_obj;
2164n/a }
2165n/a PyBuffer_Release(&del_table_view);
2166n/a PyBuffer_Release(&table_view);
2167n/a return result;
2168n/a }
2169n/a
2170n/a if (table_chars == NULL) {
2171n/a for (i = 0; i < 256; i++)
2172n/a trans_table[i] = Py_CHARMASK(i);
2173n/a } else {
2174n/a for (i = 0; i < 256; i++)
2175n/a trans_table[i] = Py_CHARMASK(table_chars[i]);
2176n/a }
2177n/a PyBuffer_Release(&table_view);
2178n/a
2179n/a for (i = 0; i < dellen; i++)
2180n/a trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2181n/a PyBuffer_Release(&del_table_view);
2182n/a
2183n/a for (i = inlen; --i >= 0; ) {
2184n/a c = Py_CHARMASK(*input++);
2185n/a if (trans_table[c] != -1)
2186n/a if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2187n/a continue;
2188n/a changed = 1;
2189n/a }
2190n/a if (!changed && PyBytes_CheckExact(input_obj)) {
2191n/a Py_DECREF(result);
2192n/a Py_INCREF(input_obj);
2193n/a return input_obj;
2194n/a }
2195n/a /* Fix the size of the resulting string */
2196n/a if (inlen > 0)
2197n/a _PyBytes_Resize(&result, output - output_start);
2198n/a return result;
2199n/a}
2200n/a
2201n/a
2202n/a/*[clinic input]
2203n/a
2204n/a@staticmethod
2205n/abytes.maketrans
2206n/a
2207n/a frm: Py_buffer
2208n/a to: Py_buffer
2209n/a /
2210n/a
2211n/aReturn a translation table useable for the bytes or bytearray translate method.
2212n/a
2213n/aThe returned table will be one where each byte in frm is mapped to the byte at
2214n/athe same position in to.
2215n/a
2216n/aThe bytes objects frm and to must be of the same length.
2217n/a[clinic start generated code]*/
2218n/a
2219n/astatic PyObject *
2220n/abytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2221n/a/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2222n/a{
2223n/a return _Py_bytes_maketrans(frm, to);
2224n/a}
2225n/a
2226n/a
2227n/a/*[clinic input]
2228n/abytes.replace
2229n/a
2230n/a old: Py_buffer
2231n/a new: Py_buffer
2232n/a count: Py_ssize_t = -1
2233n/a Maximum number of occurrences to replace.
2234n/a -1 (the default value) means replace all occurrences.
2235n/a /
2236n/a
2237n/aReturn a copy with all occurrences of substring old replaced by new.
2238n/a
2239n/aIf the optional argument count is given, only the first count occurrences are
2240n/areplaced.
2241n/a[clinic start generated code]*/
2242n/a
2243n/astatic PyObject *
2244n/abytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2245n/a Py_ssize_t count)
2246n/a/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2247n/a{
2248n/a return stringlib_replace((PyObject *)self,
2249n/a (const char *)old->buf, old->len,
2250n/a (const char *)new->buf, new->len, count);
2251n/a}
2252n/a
2253n/a/** End DALKE **/
2254n/a
2255n/a
2256n/astatic PyObject *
2257n/abytes_startswith(PyBytesObject *self, PyObject *args)
2258n/a{
2259n/a return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2260n/a}
2261n/a
2262n/astatic PyObject *
2263n/abytes_endswith(PyBytesObject *self, PyObject *args)
2264n/a{
2265n/a return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2266n/a}
2267n/a
2268n/a
2269n/a/*[clinic input]
2270n/abytes.decode
2271n/a
2272n/a encoding: str(c_default="NULL") = 'utf-8'
2273n/a The encoding with which to decode the bytes.
2274n/a errors: str(c_default="NULL") = 'strict'
2275n/a The error handling scheme to use for the handling of decoding errors.
2276n/a The default is 'strict' meaning that decoding errors raise a
2277n/a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2278n/a as well as any other name registered with codecs.register_error that
2279n/a can handle UnicodeDecodeErrors.
2280n/a
2281n/aDecode the bytes using the codec registered for encoding.
2282n/a[clinic start generated code]*/
2283n/a
2284n/astatic PyObject *
2285n/abytes_decode_impl(PyBytesObject *self, const char *encoding,
2286n/a const char *errors)
2287n/a/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2288n/a{
2289n/a return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2290n/a}
2291n/a
2292n/a
2293n/a/*[clinic input]
2294n/abytes.splitlines
2295n/a
2296n/a keepends: int(c_default="0") = False
2297n/a
2298n/aReturn a list of the lines in the bytes, breaking at line boundaries.
2299n/a
2300n/aLine breaks are not included in the resulting list unless keepends is given and
2301n/atrue.
2302n/a[clinic start generated code]*/
2303n/a
2304n/astatic PyObject *
2305n/abytes_splitlines_impl(PyBytesObject *self, int keepends)
2306n/a/*[clinic end generated code: output=3484149a5d880ffb input=7f4aac67144f9944]*/
2307n/a{
2308n/a return stringlib_splitlines(
2309n/a (PyObject*) self, PyBytes_AS_STRING(self),
2310n/a PyBytes_GET_SIZE(self), keepends
2311n/a );
2312n/a}
2313n/a
2314n/a/*[clinic input]
2315n/a@classmethod
2316n/abytes.fromhex
2317n/a
2318n/a string: unicode
2319n/a /
2320n/a
2321n/aCreate a bytes object from a string of hexadecimal numbers.
2322n/a
2323n/aSpaces between two numbers are accepted.
2324n/aExample: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2325n/a[clinic start generated code]*/
2326n/a
2327n/astatic PyObject *
2328n/abytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2329n/a/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2330n/a{
2331n/a PyObject *result = _PyBytes_FromHex(string, 0);
2332n/a if (type != &PyBytes_Type && result != NULL) {
2333n/a Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2334n/a result, NULL));
2335n/a }
2336n/a return result;
2337n/a}
2338n/a
2339n/aPyObject*
2340n/a_PyBytes_FromHex(PyObject *string, int use_bytearray)
2341n/a{
2342n/a char *buf;
2343n/a Py_ssize_t hexlen, invalid_char;
2344n/a unsigned int top, bot;
2345n/a Py_UCS1 *str, *end;
2346n/a _PyBytesWriter writer;
2347n/a
2348n/a _PyBytesWriter_Init(&writer);
2349n/a writer.use_bytearray = use_bytearray;
2350n/a
2351n/a assert(PyUnicode_Check(string));
2352n/a if (PyUnicode_READY(string))
2353n/a return NULL;
2354n/a hexlen = PyUnicode_GET_LENGTH(string);
2355n/a
2356n/a if (!PyUnicode_IS_ASCII(string)) {
2357n/a void *data = PyUnicode_DATA(string);
2358n/a unsigned int kind = PyUnicode_KIND(string);
2359n/a Py_ssize_t i;
2360n/a
2361n/a /* search for the first non-ASCII character */
2362n/a for (i = 0; i < hexlen; i++) {
2363n/a if (PyUnicode_READ(kind, data, i) >= 128)
2364n/a break;
2365n/a }
2366n/a invalid_char = i;
2367n/a goto error;
2368n/a }
2369n/a
2370n/a assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2371n/a str = PyUnicode_1BYTE_DATA(string);
2372n/a
2373n/a /* This overestimates if there are spaces */
2374n/a buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2375n/a if (buf == NULL)
2376n/a return NULL;
2377n/a
2378n/a end = str + hexlen;
2379n/a while (str < end) {
2380n/a /* skip over spaces in the input */
2381n/a if (Py_ISSPACE(*str)) {
2382n/a do {
2383n/a str++;
2384n/a } while (Py_ISSPACE(*str));
2385n/a if (str >= end)
2386n/a break;
2387n/a }
2388n/a
2389n/a top = _PyLong_DigitValue[*str];
2390n/a if (top >= 16) {
2391n/a invalid_char = str - PyUnicode_1BYTE_DATA(string);
2392n/a goto error;
2393n/a }
2394n/a str++;
2395n/a
2396n/a bot = _PyLong_DigitValue[*str];
2397n/a if (bot >= 16) {
2398n/a invalid_char = str - PyUnicode_1BYTE_DATA(string);
2399n/a goto error;
2400n/a }
2401n/a str++;
2402n/a
2403n/a *buf++ = (unsigned char)((top << 4) + bot);
2404n/a }
2405n/a
2406n/a return _PyBytesWriter_Finish(&writer, buf);
2407n/a
2408n/a error:
2409n/a PyErr_Format(PyExc_ValueError,
2410n/a "non-hexadecimal number found in "
2411n/a "fromhex() arg at position %zd", invalid_char);
2412n/a _PyBytesWriter_Dealloc(&writer);
2413n/a return NULL;
2414n/a}
2415n/a
2416n/aPyDoc_STRVAR(hex__doc__,
2417n/a"B.hex() -> string\n\
2418n/a\n\
2419n/aCreate a string of hexadecimal numbers from a bytes object.\n\
2420n/aExample: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2421n/a
2422n/astatic PyObject *
2423n/abytes_hex(PyBytesObject *self)
2424n/a{
2425n/a char* argbuf = PyBytes_AS_STRING(self);
2426n/a Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2427n/a return _Py_strhex(argbuf, arglen);
2428n/a}
2429n/a
2430n/astatic PyObject *
2431n/abytes_getnewargs(PyBytesObject *v)
2432n/a{
2433n/a return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2434n/a}
2435n/a
2436n/a
2437n/astatic PyMethodDef
2438n/abytes_methods[] = {
2439n/a {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2440n/a {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2441n/a _Py_capitalize__doc__},
2442n/a {"center", (PyCFunction)stringlib_center, METH_VARARGS,
2443n/a _Py_center__doc__},
2444n/a {"count", (PyCFunction)bytes_count, METH_VARARGS,
2445n/a _Py_count__doc__},
2446n/a BYTES_DECODE_METHODDEF
2447n/a {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2448n/a _Py_endswith__doc__},
2449n/a {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
2450n/a _Py_expandtabs__doc__},
2451n/a {"find", (PyCFunction)bytes_find, METH_VARARGS,
2452n/a _Py_find__doc__},
2453n/a BYTES_FROMHEX_METHODDEF
2454n/a {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
2455n/a {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2456n/a {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2457n/a _Py_isalnum__doc__},
2458n/a {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2459n/a _Py_isalpha__doc__},
2460n/a {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2461n/a _Py_isdigit__doc__},
2462n/a {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2463n/a _Py_islower__doc__},
2464n/a {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2465n/a _Py_isspace__doc__},
2466n/a {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2467n/a _Py_istitle__doc__},
2468n/a {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2469n/a _Py_isupper__doc__},
2470n/a BYTES_JOIN_METHODDEF
2471n/a {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
2472n/a {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2473n/a BYTES_LSTRIP_METHODDEF
2474n/a BYTES_MAKETRANS_METHODDEF
2475n/a BYTES_PARTITION_METHODDEF
2476n/a BYTES_REPLACE_METHODDEF
2477n/a {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2478n/a {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2479n/a {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
2480n/a BYTES_RPARTITION_METHODDEF
2481n/a BYTES_RSPLIT_METHODDEF
2482n/a BYTES_RSTRIP_METHODDEF
2483n/a BYTES_SPLIT_METHODDEF
2484n/a BYTES_SPLITLINES_METHODDEF
2485n/a {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2486n/a _Py_startswith__doc__},
2487n/a BYTES_STRIP_METHODDEF
2488n/a {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2489n/a _Py_swapcase__doc__},
2490n/a {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2491n/a BYTES_TRANSLATE_METHODDEF
2492n/a {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2493n/a {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
2494n/a {NULL, NULL} /* sentinel */
2495n/a};
2496n/a
2497n/astatic PyObject *
2498n/abytes_mod(PyObject *self, PyObject *arg)
2499n/a{
2500n/a if (!PyBytes_Check(self)) {
2501n/a Py_RETURN_NOTIMPLEMENTED;
2502n/a }
2503n/a return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2504n/a arg, 0);
2505n/a}
2506n/a
2507n/astatic PyNumberMethods bytes_as_number = {
2508n/a 0, /*nb_add*/
2509n/a 0, /*nb_subtract*/
2510n/a 0, /*nb_multiply*/
2511n/a bytes_mod, /*nb_remainder*/
2512n/a};
2513n/a
2514n/astatic PyObject *
2515n/abytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2516n/a
2517n/astatic PyObject *
2518n/abytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2519n/a{
2520n/a PyObject *x = NULL;
2521n/a const char *encoding = NULL;
2522n/a const char *errors = NULL;
2523n/a PyObject *new = NULL;
2524n/a PyObject *func;
2525n/a Py_ssize_t size;
2526n/a static char *kwlist[] = {"source", "encoding", "errors", 0};
2527n/a _Py_IDENTIFIER(__bytes__);
2528n/a
2529n/a if (type != &PyBytes_Type)
2530n/a return bytes_subtype_new(type, args, kwds);
2531n/a if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2532n/a &encoding, &errors))
2533n/a return NULL;
2534n/a if (x == NULL) {
2535n/a if (encoding != NULL || errors != NULL) {
2536n/a PyErr_SetString(PyExc_TypeError,
2537n/a "encoding or errors without sequence "
2538n/a "argument");
2539n/a return NULL;
2540n/a }
2541n/a return PyBytes_FromStringAndSize(NULL, 0);
2542n/a }
2543n/a
2544n/a if (encoding != NULL) {
2545n/a /* Encode via the codec registry */
2546n/a if (!PyUnicode_Check(x)) {
2547n/a PyErr_SetString(PyExc_TypeError,
2548n/a "encoding without a string argument");
2549n/a return NULL;
2550n/a }
2551n/a new = PyUnicode_AsEncodedString(x, encoding, errors);
2552n/a if (new == NULL)
2553n/a return NULL;
2554n/a assert(PyBytes_Check(new));
2555n/a return new;
2556n/a }
2557n/a
2558n/a if (errors != NULL) {
2559n/a PyErr_SetString(PyExc_TypeError,
2560n/a PyUnicode_Check(x) ?
2561n/a "string argument without an encoding" :
2562n/a "errors without a string argument");
2563n/a return NULL;
2564n/a }
2565n/a
2566n/a /* We'd like to call PyObject_Bytes here, but we need to check for an
2567n/a integer argument before deferring to PyBytes_FromObject, something
2568n/a PyObject_Bytes doesn't do. */
2569n/a func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2570n/a if (func != NULL) {
2571n/a new = _PyObject_CallNoArg(func);
2572n/a Py_DECREF(func);
2573n/a if (new == NULL)
2574n/a return NULL;
2575n/a if (!PyBytes_Check(new)) {
2576n/a PyErr_Format(PyExc_TypeError,
2577n/a "__bytes__ returned non-bytes (type %.200s)",
2578n/a Py_TYPE(new)->tp_name);
2579n/a Py_DECREF(new);
2580n/a return NULL;
2581n/a }
2582n/a return new;
2583n/a }
2584n/a else if (PyErr_Occurred())
2585n/a return NULL;
2586n/a
2587n/a if (PyUnicode_Check(x)) {
2588n/a PyErr_SetString(PyExc_TypeError,
2589n/a "string argument without an encoding");
2590n/a return NULL;
2591n/a }
2592n/a /* Is it an integer? */
2593n/a if (PyIndex_Check(x)) {
2594n/a size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2595n/a if (size == -1 && PyErr_Occurred()) {
2596n/a if (PyErr_ExceptionMatches(PyExc_OverflowError))
2597n/a return NULL;
2598n/a PyErr_Clear(); /* fall through */
2599n/a }
2600n/a else {
2601n/a if (size < 0) {
2602n/a PyErr_SetString(PyExc_ValueError, "negative count");
2603n/a return NULL;
2604n/a }
2605n/a new = _PyBytes_FromSize(size, 1);
2606n/a if (new == NULL)
2607n/a return NULL;
2608n/a return new;
2609n/a }
2610n/a }
2611n/a
2612n/a return PyBytes_FromObject(x);
2613n/a}
2614n/a
2615n/astatic PyObject*
2616n/a_PyBytes_FromBuffer(PyObject *x)
2617n/a{
2618n/a PyObject *new;
2619n/a Py_buffer view;
2620n/a
2621n/a if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2622n/a return NULL;
2623n/a
2624n/a new = PyBytes_FromStringAndSize(NULL, view.len);
2625n/a if (!new)
2626n/a goto fail;
2627n/a if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2628n/a &view, view.len, 'C') < 0)
2629n/a goto fail;
2630n/a PyBuffer_Release(&view);
2631n/a return new;
2632n/a
2633n/afail:
2634n/a Py_XDECREF(new);
2635n/a PyBuffer_Release(&view);
2636n/a return NULL;
2637n/a}
2638n/a
2639n/a#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \
2640n/a do { \
2641n/a PyObject *bytes; \
2642n/a Py_ssize_t i; \
2643n/a Py_ssize_t value; \
2644n/a char *str; \
2645n/a PyObject *item; \
2646n/a \
2647n/a bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \
2648n/a if (bytes == NULL) \
2649n/a return NULL; \
2650n/a str = ((PyBytesObject *)bytes)->ob_sval; \
2651n/a \
2652n/a for (i = 0; i < Py_SIZE(x); i++) { \
2653n/a item = GET_ITEM((x), i); \
2654n/a value = PyNumber_AsSsize_t(item, NULL); \
2655n/a if (value == -1 && PyErr_Occurred()) \
2656n/a goto error; \
2657n/a \
2658n/a if (value < 0 || value >= 256) { \
2659n/a PyErr_SetString(PyExc_ValueError, \
2660n/a "bytes must be in range(0, 256)"); \
2661n/a goto error; \
2662n/a } \
2663n/a *str++ = (char) value; \
2664n/a } \
2665n/a return bytes; \
2666n/a \
2667n/a error: \
2668n/a Py_DECREF(bytes); \
2669n/a return NULL; \
2670n/a } while (0)
2671n/a
2672n/astatic PyObject*
2673n/a_PyBytes_FromList(PyObject *x)
2674n/a{
2675n/a _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
2676n/a}
2677n/a
2678n/astatic PyObject*
2679n/a_PyBytes_FromTuple(PyObject *x)
2680n/a{
2681n/a _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
2682n/a}
2683n/a
2684n/astatic PyObject *
2685n/a_PyBytes_FromIterator(PyObject *it, PyObject *x)
2686n/a{
2687n/a char *str;
2688n/a Py_ssize_t i, size;
2689n/a _PyBytesWriter writer;
2690n/a
2691n/a /* For iterator version, create a string object and resize as needed */
2692n/a size = PyObject_LengthHint(x, 64);
2693n/a if (size == -1 && PyErr_Occurred())
2694n/a return NULL;
2695n/a
2696n/a _PyBytesWriter_Init(&writer);
2697n/a str = _PyBytesWriter_Alloc(&writer, size);
2698n/a if (str == NULL)
2699n/a return NULL;
2700n/a writer.overallocate = 1;
2701n/a size = writer.allocated;
2702n/a
2703n/a /* Run the iterator to exhaustion */
2704n/a for (i = 0; ; i++) {
2705n/a PyObject *item;
2706n/a Py_ssize_t value;
2707n/a
2708n/a /* Get the next item */
2709n/a item = PyIter_Next(it);
2710n/a if (item == NULL) {
2711n/a if (PyErr_Occurred())
2712n/a goto error;
2713n/a break;
2714n/a }
2715n/a
2716n/a /* Interpret it as an int (__index__) */
2717n/a value = PyNumber_AsSsize_t(item, NULL);
2718n/a Py_DECREF(item);
2719n/a if (value == -1 && PyErr_Occurred())
2720n/a goto error;
2721n/a
2722n/a /* Range check */
2723n/a if (value < 0 || value >= 256) {
2724n/a PyErr_SetString(PyExc_ValueError,
2725n/a "bytes must be in range(0, 256)");
2726n/a goto error;
2727n/a }
2728n/a
2729n/a /* Append the byte */
2730n/a if (i >= size) {
2731n/a str = _PyBytesWriter_Resize(&writer, str, size+1);
2732n/a if (str == NULL)
2733n/a return NULL;
2734n/a size = writer.allocated;
2735n/a }
2736n/a *str++ = (char) value;
2737n/a }
2738n/a
2739n/a return _PyBytesWriter_Finish(&writer, str);
2740n/a
2741n/a error:
2742n/a _PyBytesWriter_Dealloc(&writer);
2743n/a return NULL;
2744n/a}
2745n/a
2746n/aPyObject *
2747n/aPyBytes_FromObject(PyObject *x)
2748n/a{
2749n/a PyObject *it, *result;
2750n/a
2751n/a if (x == NULL) {
2752n/a PyErr_BadInternalCall();
2753n/a return NULL;
2754n/a }
2755n/a
2756n/a if (PyBytes_CheckExact(x)) {
2757n/a Py_INCREF(x);
2758n/a return x;
2759n/a }
2760n/a
2761n/a /* Use the modern buffer interface */
2762n/a if (PyObject_CheckBuffer(x))
2763n/a return _PyBytes_FromBuffer(x);
2764n/a
2765n/a if (PyList_CheckExact(x))
2766n/a return _PyBytes_FromList(x);
2767n/a
2768n/a if (PyTuple_CheckExact(x))
2769n/a return _PyBytes_FromTuple(x);
2770n/a
2771n/a if (!PyUnicode_Check(x)) {
2772n/a it = PyObject_GetIter(x);
2773n/a if (it != NULL) {
2774n/a result = _PyBytes_FromIterator(it, x);
2775n/a Py_DECREF(it);
2776n/a return result;
2777n/a }
2778n/a }
2779n/a
2780n/a PyErr_Format(PyExc_TypeError,
2781n/a "cannot convert '%.200s' object to bytes",
2782n/a x->ob_type->tp_name);
2783n/a return NULL;
2784n/a}
2785n/a
2786n/astatic PyObject *
2787n/abytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2788n/a{
2789n/a PyObject *tmp, *pnew;
2790n/a Py_ssize_t n;
2791n/a
2792n/a assert(PyType_IsSubtype(type, &PyBytes_Type));
2793n/a tmp = bytes_new(&PyBytes_Type, args, kwds);
2794n/a if (tmp == NULL)
2795n/a return NULL;
2796n/a assert(PyBytes_Check(tmp));
2797n/a n = PyBytes_GET_SIZE(tmp);
2798n/a pnew = type->tp_alloc(type, n);
2799n/a if (pnew != NULL) {
2800n/a memcpy(PyBytes_AS_STRING(pnew),
2801n/a PyBytes_AS_STRING(tmp), n+1);
2802n/a ((PyBytesObject *)pnew)->ob_shash =
2803n/a ((PyBytesObject *)tmp)->ob_shash;
2804n/a }
2805n/a Py_DECREF(tmp);
2806n/a return pnew;
2807n/a}
2808n/a
2809n/aPyDoc_STRVAR(bytes_doc,
2810n/a"bytes(iterable_of_ints) -> bytes\n\
2811n/abytes(string, encoding[, errors]) -> bytes\n\
2812n/abytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2813n/abytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2814n/abytes() -> empty bytes object\n\
2815n/a\n\
2816n/aConstruct an immutable array of bytes from:\n\
2817n/a - an iterable yielding integers in range(256)\n\
2818n/a - a text string encoded using the specified encoding\n\
2819n/a - any object implementing the buffer API.\n\
2820n/a - an integer");
2821n/a
2822n/astatic PyObject *bytes_iter(PyObject *seq);
2823n/a
2824n/aPyTypeObject PyBytes_Type = {
2825n/a PyVarObject_HEAD_INIT(&PyType_Type, 0)
2826n/a "bytes",
2827n/a PyBytesObject_SIZE,
2828n/a sizeof(char),
2829n/a bytes_dealloc, /* tp_dealloc */
2830n/a 0, /* tp_print */
2831n/a 0, /* tp_getattr */
2832n/a 0, /* tp_setattr */
2833n/a 0, /* tp_reserved */
2834n/a (reprfunc)bytes_repr, /* tp_repr */
2835n/a &bytes_as_number, /* tp_as_number */
2836n/a &bytes_as_sequence, /* tp_as_sequence */
2837n/a &bytes_as_mapping, /* tp_as_mapping */
2838n/a (hashfunc)bytes_hash, /* tp_hash */
2839n/a 0, /* tp_call */
2840n/a bytes_str, /* tp_str */
2841n/a PyObject_GenericGetAttr, /* tp_getattro */
2842n/a 0, /* tp_setattro */
2843n/a &bytes_as_buffer, /* tp_as_buffer */
2844n/a Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2845n/a Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2846n/a bytes_doc, /* tp_doc */
2847n/a 0, /* tp_traverse */
2848n/a 0, /* tp_clear */
2849n/a (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2850n/a 0, /* tp_weaklistoffset */
2851n/a bytes_iter, /* tp_iter */
2852n/a 0, /* tp_iternext */
2853n/a bytes_methods, /* tp_methods */
2854n/a 0, /* tp_members */
2855n/a 0, /* tp_getset */
2856n/a &PyBaseObject_Type, /* tp_base */
2857n/a 0, /* tp_dict */
2858n/a 0, /* tp_descr_get */
2859n/a 0, /* tp_descr_set */
2860n/a 0, /* tp_dictoffset */
2861n/a 0, /* tp_init */
2862n/a 0, /* tp_alloc */
2863n/a bytes_new, /* tp_new */
2864n/a PyObject_Del, /* tp_free */
2865n/a};
2866n/a
2867n/avoid
2868n/aPyBytes_Concat(PyObject **pv, PyObject *w)
2869n/a{
2870n/a assert(pv != NULL);
2871n/a if (*pv == NULL)
2872n/a return;
2873n/a if (w == NULL) {
2874n/a Py_CLEAR(*pv);
2875n/a return;
2876n/a }
2877n/a
2878n/a if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2879n/a /* Only one reference, so we can resize in place */
2880n/a Py_ssize_t oldsize;
2881n/a Py_buffer wb;
2882n/a
2883n/a wb.len = -1;
2884n/a if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2885n/a PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2886n/a Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2887n/a Py_CLEAR(*pv);
2888n/a return;
2889n/a }
2890n/a
2891n/a oldsize = PyBytes_GET_SIZE(*pv);
2892n/a if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2893n/a PyErr_NoMemory();
2894n/a goto error;
2895n/a }
2896n/a if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2897n/a goto error;
2898n/a
2899n/a memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2900n/a PyBuffer_Release(&wb);
2901n/a return;
2902n/a
2903n/a error:
2904n/a PyBuffer_Release(&wb);
2905n/a Py_CLEAR(*pv);
2906n/a return;
2907n/a }
2908n/a
2909n/a else {
2910n/a /* Multiple references, need to create new object */
2911n/a PyObject *v;
2912n/a v = bytes_concat(*pv, w);
2913n/a Py_SETREF(*pv, v);
2914n/a }
2915n/a}
2916n/a
2917n/avoid
2918n/aPyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
2919n/a{
2920n/a PyBytes_Concat(pv, w);
2921n/a Py_XDECREF(w);
2922n/a}
2923n/a
2924n/a
2925n/a/* The following function breaks the notion that bytes are immutable:
2926n/a it changes the size of a bytes object. We get away with this only if there
2927n/a is only one module referencing the object. You can also think of it
2928n/a as creating a new bytes object and destroying the old one, only
2929n/a more efficiently. In any case, don't use this if the bytes object may
2930n/a already be known to some other part of the code...
2931n/a Note that if there's not enough memory to resize the bytes object, the
2932n/a original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
2933n/a memory" exception is set, and -1 is returned. Else (on success) 0 is
2934n/a returned, and the value in *pv may or may not be the same as on input.
2935n/a As always, an extra byte is allocated for a trailing \0 byte (newsize
2936n/a does *not* include that), and a trailing \0 byte is stored.
2937n/a*/
2938n/a
2939n/aint
2940n/a_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2941n/a{
2942n/a PyObject *v;
2943n/a PyBytesObject *sv;
2944n/a v = *pv;
2945n/a if (!PyBytes_Check(v) || newsize < 0) {
2946n/a goto error;
2947n/a }
2948n/a if (Py_SIZE(v) == newsize) {
2949n/a /* return early if newsize equals to v->ob_size */
2950n/a return 0;
2951n/a }
2952n/a if (Py_REFCNT(v) != 1) {
2953n/a goto error;
2954n/a }
2955n/a /* XXX UNREF/NEWREF interface should be more symmetrical */
2956n/a _Py_DEC_REFTOTAL;
2957n/a _Py_ForgetReference(v);
2958n/a *pv = (PyObject *)
2959n/a PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
2960n/a if (*pv == NULL) {
2961n/a PyObject_Del(v);
2962n/a PyErr_NoMemory();
2963n/a return -1;
2964n/a }
2965n/a _Py_NewReference(*pv);
2966n/a sv = (PyBytesObject *) *pv;
2967n/a Py_SIZE(sv) = newsize;
2968n/a sv->ob_sval[newsize] = '\0';
2969n/a sv->ob_shash = -1; /* invalidate cached hash value */
2970n/a return 0;
2971n/aerror:
2972n/a *pv = 0;
2973n/a Py_DECREF(v);
2974n/a PyErr_BadInternalCall();
2975n/a return -1;
2976n/a}
2977n/a
2978n/avoid
2979n/aPyBytes_Fini(void)
2980n/a{
2981n/a int i;
2982n/a for (i = 0; i < UCHAR_MAX + 1; i++)
2983n/a Py_CLEAR(characters[i]);
2984n/a Py_CLEAR(nullstring);
2985n/a}
2986n/a
2987n/a/*********************** Bytes Iterator ****************************/
2988n/a
2989n/atypedef struct {
2990n/a PyObject_HEAD
2991n/a Py_ssize_t it_index;
2992n/a PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
2993n/a} striterobject;
2994n/a
2995n/astatic void
2996n/astriter_dealloc(striterobject *it)
2997n/a{
2998n/a _PyObject_GC_UNTRACK(it);
2999n/a Py_XDECREF(it->it_seq);
3000n/a PyObject_GC_Del(it);
3001n/a}
3002n/a
3003n/astatic int
3004n/astriter_traverse(striterobject *it, visitproc visit, void *arg)
3005n/a{
3006n/a Py_VISIT(it->it_seq);
3007n/a return 0;
3008n/a}
3009n/a
3010n/astatic PyObject *
3011n/astriter_next(striterobject *it)
3012n/a{
3013n/a PyBytesObject *seq;
3014n/a PyObject *item;
3015n/a
3016n/a assert(it != NULL);
3017n/a seq = it->it_seq;
3018n/a if (seq == NULL)
3019n/a return NULL;
3020n/a assert(PyBytes_Check(seq));
3021n/a
3022n/a if (it->it_index < PyBytes_GET_SIZE(seq)) {
3023n/a item = PyLong_FromLong(
3024n/a (unsigned char)seq->ob_sval[it->it_index]);
3025n/a if (item != NULL)
3026n/a ++it->it_index;
3027n/a return item;
3028n/a }
3029n/a
3030n/a it->it_seq = NULL;
3031n/a Py_DECREF(seq);
3032n/a return NULL;
3033n/a}
3034n/a
3035n/astatic PyObject *
3036n/astriter_len(striterobject *it)
3037n/a{
3038n/a Py_ssize_t len = 0;
3039n/a if (it->it_seq)
3040n/a len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3041n/a return PyLong_FromSsize_t(len);
3042n/a}
3043n/a
3044n/aPyDoc_STRVAR(length_hint_doc,
3045n/a "Private method returning an estimate of len(list(it)).");
3046n/a
3047n/astatic PyObject *
3048n/astriter_reduce(striterobject *it)
3049n/a{
3050n/a if (it->it_seq != NULL) {
3051n/a return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
3052n/a it->it_seq, it->it_index);
3053n/a } else {
3054n/a return Py_BuildValue("N(())", _PyObject_GetBuiltin("iter"));
3055n/a }
3056n/a}
3057n/a
3058n/aPyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3059n/a
3060n/astatic PyObject *
3061n/astriter_setstate(striterobject *it, PyObject *state)
3062n/a{
3063n/a Py_ssize_t index = PyLong_AsSsize_t(state);
3064n/a if (index == -1 && PyErr_Occurred())
3065n/a return NULL;
3066n/a if (it->it_seq != NULL) {
3067n/a if (index < 0)
3068n/a index = 0;
3069n/a else if (index > PyBytes_GET_SIZE(it->it_seq))
3070n/a index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3071n/a it->it_index = index;
3072n/a }
3073n/a Py_RETURN_NONE;
3074n/a}
3075n/a
3076n/aPyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3077n/a
3078n/astatic PyMethodDef striter_methods[] = {
3079n/a {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3080n/a length_hint_doc},
3081n/a {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3082n/a reduce_doc},
3083n/a {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3084n/a setstate_doc},
3085n/a {NULL, NULL} /* sentinel */
3086n/a};
3087n/a
3088n/aPyTypeObject PyBytesIter_Type = {
3089n/a PyVarObject_HEAD_INIT(&PyType_Type, 0)
3090n/a "bytes_iterator", /* tp_name */
3091n/a sizeof(striterobject), /* tp_basicsize */
3092n/a 0, /* tp_itemsize */
3093n/a /* methods */
3094n/a (destructor)striter_dealloc, /* tp_dealloc */
3095n/a 0, /* tp_print */
3096n/a 0, /* tp_getattr */
3097n/a 0, /* tp_setattr */
3098n/a 0, /* tp_reserved */
3099n/a 0, /* tp_repr */
3100n/a 0, /* tp_as_number */
3101n/a 0, /* tp_as_sequence */
3102n/a 0, /* tp_as_mapping */
3103n/a 0, /* tp_hash */
3104n/a 0, /* tp_call */
3105n/a 0, /* tp_str */
3106n/a PyObject_GenericGetAttr, /* tp_getattro */
3107n/a 0, /* tp_setattro */
3108n/a 0, /* tp_as_buffer */
3109n/a Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3110n/a 0, /* tp_doc */
3111n/a (traverseproc)striter_traverse, /* tp_traverse */
3112n/a 0, /* tp_clear */
3113n/a 0, /* tp_richcompare */
3114n/a 0, /* tp_weaklistoffset */
3115n/a PyObject_SelfIter, /* tp_iter */
3116n/a (iternextfunc)striter_next, /* tp_iternext */
3117n/a striter_methods, /* tp_methods */
3118n/a 0,
3119n/a};
3120n/a
3121n/astatic PyObject *
3122n/abytes_iter(PyObject *seq)
3123n/a{
3124n/a striterobject *it;
3125n/a
3126n/a if (!PyBytes_Check(seq)) {
3127n/a PyErr_BadInternalCall();
3128n/a return NULL;
3129n/a }
3130n/a it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3131n/a if (it == NULL)
3132n/a return NULL;
3133n/a it->it_index = 0;
3134n/a Py_INCREF(seq);
3135n/a it->it_seq = (PyBytesObject *)seq;
3136n/a _PyObject_GC_TRACK(it);
3137n/a return (PyObject *)it;
3138n/a}
3139n/a
3140n/a
3141n/a/* _PyBytesWriter API */
3142n/a
3143n/a#ifdef MS_WINDOWS
3144n/a /* On Windows, overallocate by 50% is the best factor */
3145n/a# define OVERALLOCATE_FACTOR 2
3146n/a#else
3147n/a /* On Linux, overallocate by 25% is the best factor */
3148n/a# define OVERALLOCATE_FACTOR 4
3149n/a#endif
3150n/a
3151n/avoid
3152n/a_PyBytesWriter_Init(_PyBytesWriter *writer)
3153n/a{
3154n/a /* Set all attributes before small_buffer to 0 */
3155n/a memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3156n/a#ifdef Py_DEBUG
3157n/a memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
3158n/a#endif
3159n/a}
3160n/a
3161n/avoid
3162n/a_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3163n/a{
3164n/a Py_CLEAR(writer->buffer);
3165n/a}
3166n/a
3167n/aPy_LOCAL_INLINE(char*)
3168n/a_PyBytesWriter_AsString(_PyBytesWriter *writer)
3169n/a{
3170n/a if (writer->use_small_buffer) {
3171n/a assert(writer->buffer == NULL);
3172n/a return writer->small_buffer;
3173n/a }
3174n/a else if (writer->use_bytearray) {
3175n/a assert(writer->buffer != NULL);
3176n/a return PyByteArray_AS_STRING(writer->buffer);
3177n/a }
3178n/a else {
3179n/a assert(writer->buffer != NULL);
3180n/a return PyBytes_AS_STRING(writer->buffer);
3181n/a }
3182n/a}
3183n/a
3184n/aPy_LOCAL_INLINE(Py_ssize_t)
3185n/a_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3186n/a{
3187n/a char *start = _PyBytesWriter_AsString(writer);
3188n/a assert(str != NULL);
3189n/a assert(str >= start);
3190n/a assert(str - start <= writer->allocated);
3191n/a return str - start;
3192n/a}
3193n/a
3194n/aPy_LOCAL_INLINE(void)
3195n/a_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3196n/a{
3197n/a#ifdef Py_DEBUG
3198n/a char *start, *end;
3199n/a
3200n/a if (writer->use_small_buffer) {
3201n/a assert(writer->buffer == NULL);
3202n/a }
3203n/a else {
3204n/a assert(writer->buffer != NULL);
3205n/a if (writer->use_bytearray)
3206n/a assert(PyByteArray_CheckExact(writer->buffer));
3207n/a else
3208n/a assert(PyBytes_CheckExact(writer->buffer));
3209n/a assert(Py_REFCNT(writer->buffer) == 1);
3210n/a }
3211n/a
3212n/a if (writer->use_bytearray) {
3213n/a /* bytearray has its own overallocation algorithm,
3214n/a writer overallocation must be disabled */
3215n/a assert(!writer->overallocate);
3216n/a }
3217n/a
3218n/a assert(0 <= writer->allocated);
3219n/a assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3220n/a /* the last byte must always be null */
3221n/a start = _PyBytesWriter_AsString(writer);
3222n/a assert(start[writer->allocated] == 0);
3223n/a
3224n/a end = start + writer->allocated;
3225n/a assert(str != NULL);
3226n/a assert(start <= str && str <= end);
3227n/a#endif
3228n/a}
3229n/a
3230n/avoid*
3231n/a_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3232n/a{
3233n/a Py_ssize_t allocated, pos;
3234n/a
3235n/a _PyBytesWriter_CheckConsistency(writer, str);
3236n/a assert(writer->allocated < size);
3237n/a
3238n/a allocated = size;
3239n/a if (writer->overallocate
3240n/a && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3241n/a /* overallocate to limit the number of realloc() */
3242n/a allocated += allocated / OVERALLOCATE_FACTOR;
3243n/a }
3244n/a
3245n/a pos = _PyBytesWriter_GetSize(writer, str);
3246n/a if (!writer->use_small_buffer) {
3247n/a if (writer->use_bytearray) {
3248n/a if (PyByteArray_Resize(writer->buffer, allocated))
3249n/a goto error;
3250n/a /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3251n/a but we cannot use ob_alloc because bytes may need to be moved
3252n/a to use the whole buffer. bytearray uses an internal optimization
3253n/a to avoid moving or copying bytes when bytes are removed at the
3254n/a beginning (ex: del bytearray[:1]). */
3255n/a }
3256n/a else {
3257n/a if (_PyBytes_Resize(&writer->buffer, allocated))
3258n/a goto error;
3259n/a }
3260n/a }
3261n/a else {
3262n/a /* convert from stack buffer to bytes object buffer */
3263n/a assert(writer->buffer == NULL);
3264n/a
3265n/a if (writer->use_bytearray)
3266n/a writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3267n/a else
3268n/a writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3269n/a if (writer->buffer == NULL)
3270n/a goto error;
3271n/a
3272n/a if (pos != 0) {
3273n/a char *dest;
3274n/a if (writer->use_bytearray)
3275n/a dest = PyByteArray_AS_STRING(writer->buffer);
3276n/a else
3277n/a dest = PyBytes_AS_STRING(writer->buffer);
3278n/a memcpy(dest,
3279n/a writer->small_buffer,
3280n/a pos);
3281n/a }
3282n/a
3283n/a writer->use_small_buffer = 0;
3284n/a#ifdef Py_DEBUG
3285n/a memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
3286n/a#endif
3287n/a }
3288n/a writer->allocated = allocated;
3289n/a
3290n/a str = _PyBytesWriter_AsString(writer) + pos;
3291n/a _PyBytesWriter_CheckConsistency(writer, str);
3292n/a return str;
3293n/a
3294n/aerror:
3295n/a _PyBytesWriter_Dealloc(writer);
3296n/a return NULL;
3297n/a}
3298n/a
3299n/avoid*
3300n/a_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3301n/a{
3302n/a Py_ssize_t new_min_size;
3303n/a
3304n/a _PyBytesWriter_CheckConsistency(writer, str);
3305n/a assert(size >= 0);
3306n/a
3307n/a if (size == 0) {
3308n/a /* nothing to do */
3309n/a return str;
3310n/a }
3311n/a
3312n/a if (writer->min_size > PY_SSIZE_T_MAX - size) {
3313n/a PyErr_NoMemory();
3314n/a _PyBytesWriter_Dealloc(writer);
3315n/a return NULL;
3316n/a }
3317n/a new_min_size = writer->min_size + size;
3318n/a
3319n/a if (new_min_size > writer->allocated)
3320n/a str = _PyBytesWriter_Resize(writer, str, new_min_size);
3321n/a
3322n/a writer->min_size = new_min_size;
3323n/a return str;
3324n/a}
3325n/a
3326n/a/* Allocate the buffer to write size bytes.
3327n/a Return the pointer to the beginning of buffer data.
3328n/a Raise an exception and return NULL on error. */
3329n/avoid*
3330n/a_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3331n/a{
3332n/a /* ensure that _PyBytesWriter_Alloc() is only called once */
3333n/a assert(writer->min_size == 0 && writer->buffer == NULL);
3334n/a assert(size >= 0);
3335n/a
3336n/a writer->use_small_buffer = 1;
3337n/a#ifdef Py_DEBUG
3338n/a writer->allocated = sizeof(writer->small_buffer) - 1;
3339n/a /* In debug mode, don't use the full small buffer because it is less
3340n/a efficient than bytes and bytearray objects to detect buffer underflow
3341n/a and buffer overflow. Use 10 bytes of the small buffer to test also
3342n/a code using the smaller buffer in debug mode.
3343n/a
3344n/a Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3345n/a in debug mode to also be able to detect stack overflow when running
3346n/a tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3347n/a if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3348n/a stack overflow. */
3349n/a writer->allocated = Py_MIN(writer->allocated, 10);
3350n/a /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3351n/a to detect buffer overflow */
3352n/a writer->small_buffer[writer->allocated] = 0;
3353n/a#else
3354n/a writer->allocated = sizeof(writer->small_buffer);
3355n/a#endif
3356n/a return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3357n/a}
3358n/a
3359n/aPyObject *
3360n/a_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3361n/a{
3362n/a Py_ssize_t size;
3363n/a PyObject *result;
3364n/a
3365n/a _PyBytesWriter_CheckConsistency(writer, str);
3366n/a
3367n/a size = _PyBytesWriter_GetSize(writer, str);
3368n/a if (size == 0 && !writer->use_bytearray) {
3369n/a Py_CLEAR(writer->buffer);
3370n/a /* Get the empty byte string singleton */
3371n/a result = PyBytes_FromStringAndSize(NULL, 0);
3372n/a }
3373n/a else if (writer->use_small_buffer) {
3374n/a if (writer->use_bytearray) {
3375n/a result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3376n/a }
3377n/a else {
3378n/a result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3379n/a }
3380n/a }
3381n/a else {
3382n/a result = writer->buffer;
3383n/a writer->buffer = NULL;
3384n/a
3385n/a if (size != writer->allocated) {
3386n/a if (writer->use_bytearray) {
3387n/a if (PyByteArray_Resize(result, size)) {
3388n/a Py_DECREF(result);
3389n/a return NULL;
3390n/a }
3391n/a }
3392n/a else {
3393n/a if (_PyBytes_Resize(&result, size)) {
3394n/a assert(result == NULL);
3395n/a return NULL;
3396n/a }
3397n/a }
3398n/a }
3399n/a }
3400n/a return result;
3401n/a}
3402n/a
3403n/avoid*
3404n/a_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3405n/a const void *bytes, Py_ssize_t size)
3406n/a{
3407n/a char *str = (char *)ptr;
3408n/a
3409n/a str = _PyBytesWriter_Prepare(writer, str, size);
3410n/a if (str == NULL)
3411n/a return NULL;
3412n/a
3413n/a memcpy(str, bytes, size);
3414n/a str += size;
3415n/a
3416n/a return str;
3417n/a}