ยปCore Development>Code coverage>Modules/_codecsmodule.c

Python code coverage for Modules/_codecsmodule.c

#countcontent
1n/a/* ------------------------------------------------------------------------
2n/a
3n/a _codecs -- Provides access to the codec registry and the builtin
4n/a codecs.
5n/a
6n/a This module should never be imported directly. The standard library
7n/a module "codecs" wraps this builtin module for use within Python.
8n/a
9n/a The codec registry is accessible via:
10n/a
11n/a register(search_function) -> None
12n/a
13n/a lookup(encoding) -> CodecInfo object
14n/a
15n/a The builtin Unicode codecs use the following interface:
16n/a
17n/a <encoding>_encode(Unicode_object[,errors='strict']) ->
18n/a (string object, bytes consumed)
19n/a
20n/a <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21n/a (Unicode object, bytes consumed)
22n/a
23n/a These <encoding>s are available: utf_8, unicode_escape,
24n/a raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
25n/a mbcs (on win32).
26n/a
27n/a
28n/aWritten by Marc-Andre Lemburg (mal@lemburg.com).
29n/a
30n/aCopyright (c) Corporation for National Research Initiatives.
31n/a
32n/a ------------------------------------------------------------------------ */
33n/a
34n/a#define PY_SSIZE_T_CLEAN
35n/a#include "Python.h"
36n/a
37n/a#ifdef MS_WINDOWS
38n/a#include <windows.h>
39n/a#endif
40n/a
41n/a/*[clinic input]
42n/amodule _codecs
43n/a[clinic start generated code]*/
44n/a/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
45n/a
46n/a#include "clinic/_codecsmodule.c.h"
47n/a
48n/a/* --- Registry ----------------------------------------------------------- */
49n/a
50n/a/*[clinic input]
51n/a_codecs.register
52n/a search_function: object
53n/a /
54n/a
55n/aRegister a codec search function.
56n/a
57n/aSearch functions are expected to take one argument, the encoding name in
58n/aall lower case letters, and either return None, or a tuple of functions
59n/a(encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
60n/a[clinic start generated code]*/
61n/a
62n/astatic PyObject *
63n/a_codecs_register(PyObject *module, PyObject *search_function)
64n/a/*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
65n/a{
66n/a if (PyCodec_Register(search_function))
67n/a return NULL;
68n/a
69n/a Py_RETURN_NONE;
70n/a}
71n/a
72n/a/*[clinic input]
73n/a_codecs.lookup
74n/a encoding: str
75n/a /
76n/a
77n/aLooks up a codec tuple in the Python codec registry and returns a CodecInfo object.
78n/a[clinic start generated code]*/
79n/a
80n/astatic PyObject *
81n/a_codecs_lookup_impl(PyObject *module, const char *encoding)
82n/a/*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
83n/a{
84n/a return _PyCodec_Lookup(encoding);
85n/a}
86n/a
87n/a/*[clinic input]
88n/a_codecs.encode
89n/a obj: object
90n/a encoding: str(c_default="NULL") = "utf-8"
91n/a errors: str(c_default="NULL") = "strict"
92n/a
93n/aEncodes obj using the codec registered for encoding.
94n/a
95n/aThe default encoding is 'utf-8'. errors may be given to set a
96n/adifferent error handling scheme. Default is 'strict' meaning that encoding
97n/aerrors raise a ValueError. Other possible values are 'ignore', 'replace'
98n/aand 'backslashreplace' as well as any other name registered with
99n/acodecs.register_error that can handle ValueErrors.
100n/a[clinic start generated code]*/
101n/a
102n/astatic PyObject *
103n/a_codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
104n/a const char *errors)
105n/a/*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
106n/a{
107n/a if (encoding == NULL)
108n/a encoding = PyUnicode_GetDefaultEncoding();
109n/a
110n/a /* Encode via the codec registry */
111n/a return PyCodec_Encode(obj, encoding, errors);
112n/a}
113n/a
114n/a/*[clinic input]
115n/a_codecs.decode
116n/a obj: object
117n/a encoding: str(c_default="NULL") = "utf-8"
118n/a errors: str(c_default="NULL") = "strict"
119n/a
120n/aDecodes obj using the codec registered for encoding.
121n/a
122n/aDefault encoding is 'utf-8'. errors may be given to set a
123n/adifferent error handling scheme. Default is 'strict' meaning that encoding
124n/aerrors raise a ValueError. Other possible values are 'ignore', 'replace'
125n/aand 'backslashreplace' as well as any other name registered with
126n/acodecs.register_error that can handle ValueErrors.
127n/a[clinic start generated code]*/
128n/a
129n/astatic PyObject *
130n/a_codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
131n/a const char *errors)
132n/a/*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
133n/a{
134n/a if (encoding == NULL)
135n/a encoding = PyUnicode_GetDefaultEncoding();
136n/a
137n/a /* Decode via the codec registry */
138n/a return PyCodec_Decode(obj, encoding, errors);
139n/a}
140n/a
141n/a/* --- Helpers ------------------------------------------------------------ */
142n/a
143n/a/*[clinic input]
144n/a_codecs._forget_codec
145n/a
146n/a encoding: str
147n/a /
148n/a
149n/aPurge the named codec from the internal codec lookup cache
150n/a[clinic start generated code]*/
151n/a
152n/astatic PyObject *
153n/a_codecs__forget_codec_impl(PyObject *module, const char *encoding)
154n/a/*[clinic end generated code: output=0bde9f0a5b084aa2 input=18d5d92d0e386c38]*/
155n/a{
156n/a if (_PyCodec_Forget(encoding) < 0) {
157n/a return NULL;
158n/a };
159n/a Py_RETURN_NONE;
160n/a}
161n/a
162n/astatic
163n/aPyObject *codec_tuple(PyObject *decoded,
164n/a Py_ssize_t len)
165n/a{
166n/a if (decoded == NULL)
167n/a return NULL;
168n/a return Py_BuildValue("Nn", decoded, len);
169n/a}
170n/a
171n/a/* --- String codecs ------------------------------------------------------ */
172n/a/*[clinic input]
173n/a_codecs.escape_decode
174n/a data: Py_buffer(accept={str, buffer})
175n/a errors: str(accept={str, NoneType}) = NULL
176n/a /
177n/a[clinic start generated code]*/
178n/a
179n/astatic PyObject *
180n/a_codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
181n/a const char *errors)
182n/a/*[clinic end generated code: output=505200ba8056979a input=0018edfd99db714d]*/
183n/a{
184n/a PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
185n/a errors, 0, NULL);
186n/a return codec_tuple(decoded, data->len);
187n/a}
188n/a
189n/a/*[clinic input]
190n/a_codecs.escape_encode
191n/a data: object(subclass_of='&PyBytes_Type')
192n/a errors: str(accept={str, NoneType}) = NULL
193n/a /
194n/a[clinic start generated code]*/
195n/a
196n/astatic PyObject *
197n/a_codecs_escape_encode_impl(PyObject *module, PyObject *data,
198n/a const char *errors)
199n/a/*[clinic end generated code: output=4af1d477834bab34 input=da9ded00992f32f2]*/
200n/a{
201n/a Py_ssize_t size;
202n/a Py_ssize_t newsize;
203n/a PyObject *v;
204n/a
205n/a size = PyBytes_GET_SIZE(data);
206n/a if (size > PY_SSIZE_T_MAX / 4) {
207n/a PyErr_SetString(PyExc_OverflowError,
208n/a "string is too large to encode");
209n/a return NULL;
210n/a }
211n/a newsize = 4*size;
212n/a v = PyBytes_FromStringAndSize(NULL, newsize);
213n/a
214n/a if (v == NULL) {
215n/a return NULL;
216n/a }
217n/a else {
218n/a Py_ssize_t i;
219n/a char c;
220n/a char *p = PyBytes_AS_STRING(v);
221n/a
222n/a for (i = 0; i < size; i++) {
223n/a /* There's at least enough room for a hex escape */
224n/a assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
225n/a c = PyBytes_AS_STRING(data)[i];
226n/a if (c == '\'' || c == '\\')
227n/a *p++ = '\\', *p++ = c;
228n/a else if (c == '\t')
229n/a *p++ = '\\', *p++ = 't';
230n/a else if (c == '\n')
231n/a *p++ = '\\', *p++ = 'n';
232n/a else if (c == '\r')
233n/a *p++ = '\\', *p++ = 'r';
234n/a else if (c < ' ' || c >= 0x7f) {
235n/a *p++ = '\\';
236n/a *p++ = 'x';
237n/a *p++ = Py_hexdigits[(c & 0xf0) >> 4];
238n/a *p++ = Py_hexdigits[c & 0xf];
239n/a }
240n/a else
241n/a *p++ = c;
242n/a }
243n/a *p = '\0';
244n/a if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
245n/a return NULL;
246n/a }
247n/a }
248n/a
249n/a return codec_tuple(v, size);
250n/a}
251n/a
252n/a/* --- Decoder ------------------------------------------------------------ */
253n/a/*[clinic input]
254n/a_codecs.unicode_internal_decode
255n/a obj: object
256n/a errors: str(accept={str, NoneType}) = NULL
257n/a /
258n/a[clinic start generated code]*/
259n/a
260n/astatic PyObject *
261n/a_codecs_unicode_internal_decode_impl(PyObject *module, PyObject *obj,
262n/a const char *errors)
263n/a/*[clinic end generated code: output=edbfe175e09eff9a input=8d57930aeda170c6]*/
264n/a{
265n/a if (PyUnicode_Check(obj)) {
266n/a if (PyUnicode_READY(obj) < 0)
267n/a return NULL;
268n/a Py_INCREF(obj);
269n/a return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
270n/a }
271n/a else {
272n/a Py_buffer view;
273n/a PyObject *result;
274n/a if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
275n/a return NULL;
276n/a
277n/a result = codec_tuple(
278n/a _PyUnicode_DecodeUnicodeInternal(view.buf, view.len, errors),
279n/a view.len);
280n/a PyBuffer_Release(&view);
281n/a return result;
282n/a }
283n/a}
284n/a
285n/a/*[clinic input]
286n/a_codecs.utf_7_decode
287n/a data: Py_buffer
288n/a errors: str(accept={str, NoneType}) = NULL
289n/a final: int(c_default="0") = False
290n/a /
291n/a[clinic start generated code]*/
292n/a
293n/astatic PyObject *
294n/a_codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
295n/a const char *errors, int final)
296n/a/*[clinic end generated code: output=0cd3a944a32a4089 input=bc4d6247ecdb01e6]*/
297n/a{
298n/a Py_ssize_t consumed = data->len;
299n/a PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
300n/a errors,
301n/a final ? NULL : &consumed);
302n/a return codec_tuple(decoded, consumed);
303n/a}
304n/a
305n/a/*[clinic input]
306n/a_codecs.utf_8_decode
307n/a data: Py_buffer
308n/a errors: str(accept={str, NoneType}) = NULL
309n/a final: int(c_default="0") = False
310n/a /
311n/a[clinic start generated code]*/
312n/a
313n/astatic PyObject *
314n/a_codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
315n/a const char *errors, int final)
316n/a/*[clinic end generated code: output=10f74dec8d9bb8bf input=39161d71e7422ee2]*/
317n/a{
318n/a Py_ssize_t consumed = data->len;
319n/a PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
320n/a errors,
321n/a final ? NULL : &consumed);
322n/a return codec_tuple(decoded, consumed);
323n/a}
324n/a
325n/a/*[clinic input]
326n/a_codecs.utf_16_decode
327n/a data: Py_buffer
328n/a errors: str(accept={str, NoneType}) = NULL
329n/a final: int(c_default="0") = False
330n/a /
331n/a[clinic start generated code]*/
332n/a
333n/astatic PyObject *
334n/a_codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
335n/a const char *errors, int final)
336n/a/*[clinic end generated code: output=783b442abcbcc2d0 input=f3cf01d1461007ce]*/
337n/a{
338n/a int byteorder = 0;
339n/a /* This is overwritten unless final is true. */
340n/a Py_ssize_t consumed = data->len;
341n/a PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
342n/a errors, &byteorder,
343n/a final ? NULL : &consumed);
344n/a return codec_tuple(decoded, consumed);
345n/a}
346n/a
347n/a/*[clinic input]
348n/a_codecs.utf_16_le_decode
349n/a data: Py_buffer
350n/a errors: str(accept={str, NoneType}) = NULL
351n/a final: int(c_default="0") = False
352n/a /
353n/a[clinic start generated code]*/
354n/a
355n/astatic PyObject *
356n/a_codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
357n/a const char *errors, int final)
358n/a/*[clinic end generated code: output=899b9e6364379dcd input=a77e3bf97335d94e]*/
359n/a{
360n/a int byteorder = -1;
361n/a /* This is overwritten unless final is true. */
362n/a Py_ssize_t consumed = data->len;
363n/a PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
364n/a errors, &byteorder,
365n/a final ? NULL : &consumed);
366n/a return codec_tuple(decoded, consumed);
367n/a}
368n/a
369n/a/*[clinic input]
370n/a_codecs.utf_16_be_decode
371n/a data: Py_buffer
372n/a errors: str(accept={str, NoneType}) = NULL
373n/a final: int(c_default="0") = False
374n/a /
375n/a[clinic start generated code]*/
376n/a
377n/astatic PyObject *
378n/a_codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
379n/a const char *errors, int final)
380n/a/*[clinic end generated code: output=49f6465ea07669c8 input=606f69fae91b5563]*/
381n/a{
382n/a int byteorder = 1;
383n/a /* This is overwritten unless final is true. */
384n/a Py_ssize_t consumed = data->len;
385n/a PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
386n/a errors, &byteorder,
387n/a final ? NULL : &consumed);
388n/a return codec_tuple(decoded, consumed);
389n/a}
390n/a
391n/a/* This non-standard version also provides access to the byteorder
392n/a parameter of the builtin UTF-16 codec.
393n/a
394n/a It returns a tuple (unicode, bytesread, byteorder) with byteorder
395n/a being the value in effect at the end of data.
396n/a
397n/a*/
398n/a/*[clinic input]
399n/a_codecs.utf_16_ex_decode
400n/a data: Py_buffer
401n/a errors: str(accept={str, NoneType}) = NULL
402n/a byteorder: int = 0
403n/a final: int(c_default="0") = False
404n/a /
405n/a[clinic start generated code]*/
406n/a
407n/astatic PyObject *
408n/a_codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
409n/a const char *errors, int byteorder, int final)
410n/a/*[clinic end generated code: output=0f385f251ecc1988 input=f6e7f697658c013e]*/
411n/a{
412n/a /* This is overwritten unless final is true. */
413n/a Py_ssize_t consumed = data->len;
414n/a
415n/a PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
416n/a errors, &byteorder,
417n/a final ? NULL : &consumed);
418n/a if (decoded == NULL)
419n/a return NULL;
420n/a return Py_BuildValue("Nni", decoded, consumed, byteorder);
421n/a}
422n/a
423n/a/*[clinic input]
424n/a_codecs.utf_32_decode
425n/a data: Py_buffer
426n/a errors: str(accept={str, NoneType}) = NULL
427n/a final: int(c_default="0") = False
428n/a /
429n/a[clinic start generated code]*/
430n/a
431n/astatic PyObject *
432n/a_codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
433n/a const char *errors, int final)
434n/a/*[clinic end generated code: output=2fc961807f7b145f input=86d4f41c6c2e763d]*/
435n/a{
436n/a int byteorder = 0;
437n/a /* This is overwritten unless final is true. */
438n/a Py_ssize_t consumed = data->len;
439n/a PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
440n/a errors, &byteorder,
441n/a final ? NULL : &consumed);
442n/a return codec_tuple(decoded, consumed);
443n/a}
444n/a
445n/a/*[clinic input]
446n/a_codecs.utf_32_le_decode
447n/a data: Py_buffer
448n/a errors: str(accept={str, NoneType}) = NULL
449n/a final: int(c_default="0") = False
450n/a /
451n/a[clinic start generated code]*/
452n/a
453n/astatic PyObject *
454n/a_codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
455n/a const char *errors, int final)
456n/a/*[clinic end generated code: output=ec8f46b67a94f3e6 input=d18b650772d188ba]*/
457n/a{
458n/a int byteorder = -1;
459n/a /* This is overwritten unless final is true. */
460n/a Py_ssize_t consumed = data->len;
461n/a PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
462n/a errors, &byteorder,
463n/a final ? NULL : &consumed);
464n/a return codec_tuple(decoded, consumed);
465n/a}
466n/a
467n/a/*[clinic input]
468n/a_codecs.utf_32_be_decode
469n/a data: Py_buffer
470n/a errors: str(accept={str, NoneType}) = NULL
471n/a final: int(c_default="0") = False
472n/a /
473n/a[clinic start generated code]*/
474n/a
475n/astatic PyObject *
476n/a_codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
477n/a const char *errors, int final)
478n/a/*[clinic end generated code: output=ff82bae862c92c4e input=19c271b5d34926d8]*/
479n/a{
480n/a int byteorder = 1;
481n/a /* This is overwritten unless final is true. */
482n/a Py_ssize_t consumed = data->len;
483n/a PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
484n/a errors, &byteorder,
485n/a final ? NULL : &consumed);
486n/a return codec_tuple(decoded, consumed);
487n/a}
488n/a
489n/a/* This non-standard version also provides access to the byteorder
490n/a parameter of the builtin UTF-32 codec.
491n/a
492n/a It returns a tuple (unicode, bytesread, byteorder) with byteorder
493n/a being the value in effect at the end of data.
494n/a
495n/a*/
496n/a/*[clinic input]
497n/a_codecs.utf_32_ex_decode
498n/a data: Py_buffer
499n/a errors: str(accept={str, NoneType}) = NULL
500n/a byteorder: int = 0
501n/a final: int(c_default="0") = False
502n/a /
503n/a[clinic start generated code]*/
504n/a
505n/astatic PyObject *
506n/a_codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
507n/a const char *errors, int byteorder, int final)
508n/a/*[clinic end generated code: output=6bfb177dceaf4848 input=4af3e6ccfe34a076]*/
509n/a{
510n/a Py_ssize_t consumed = data->len;
511n/a PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
512n/a errors, &byteorder,
513n/a final ? NULL : &consumed);
514n/a if (decoded == NULL)
515n/a return NULL;
516n/a return Py_BuildValue("Nni", decoded, consumed, byteorder);
517n/a}
518n/a
519n/a/*[clinic input]
520n/a_codecs.unicode_escape_decode
521n/a data: Py_buffer(accept={str, buffer})
522n/a errors: str(accept={str, NoneType}) = NULL
523n/a /
524n/a[clinic start generated code]*/
525n/a
526n/astatic PyObject *
527n/a_codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
528n/a const char *errors)
529n/a/*[clinic end generated code: output=3ca3c917176b82ab input=49fd27d06813a7f5]*/
530n/a{
531n/a PyObject *decoded = PyUnicode_DecodeUnicodeEscape(data->buf, data->len,
532n/a errors);
533n/a return codec_tuple(decoded, data->len);
534n/a}
535n/a
536n/a/*[clinic input]
537n/a_codecs.raw_unicode_escape_decode
538n/a data: Py_buffer(accept={str, buffer})
539n/a errors: str(accept={str, NoneType}) = NULL
540n/a /
541n/a[clinic start generated code]*/
542n/a
543n/astatic PyObject *
544n/a_codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
545n/a const char *errors)
546n/a/*[clinic end generated code: output=c98eeb56028070a6 input=770903a211434ebc]*/
547n/a{
548n/a PyObject *decoded = PyUnicode_DecodeRawUnicodeEscape(data->buf, data->len,
549n/a errors);
550n/a return codec_tuple(decoded, data->len);
551n/a}
552n/a
553n/a/*[clinic input]
554n/a_codecs.latin_1_decode
555n/a data: Py_buffer
556n/a errors: str(accept={str, NoneType}) = NULL
557n/a /
558n/a[clinic start generated code]*/
559n/a
560n/astatic PyObject *
561n/a_codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
562n/a const char *errors)
563n/a/*[clinic end generated code: output=07f3dfa3f72c7d8f input=5cad0f1759c618ec]*/
564n/a{
565n/a PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
566n/a return codec_tuple(decoded, data->len);
567n/a}
568n/a
569n/a/*[clinic input]
570n/a_codecs.ascii_decode
571n/a data: Py_buffer
572n/a errors: str(accept={str, NoneType}) = NULL
573n/a /
574n/a[clinic start generated code]*/
575n/a
576n/astatic PyObject *
577n/a_codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
578n/a const char *errors)
579n/a/*[clinic end generated code: output=2627d72058d42429 input=ad1106f64037bd16]*/
580n/a{
581n/a PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
582n/a return codec_tuple(decoded, data->len);
583n/a}
584n/a
585n/a/*[clinic input]
586n/a_codecs.charmap_decode
587n/a data: Py_buffer
588n/a errors: str(accept={str, NoneType}) = NULL
589n/a mapping: object = NULL
590n/a /
591n/a[clinic start generated code]*/
592n/a
593n/astatic PyObject *
594n/a_codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
595n/a const char *errors, PyObject *mapping)
596n/a/*[clinic end generated code: output=2c335b09778cf895 input=19712ca35c5a80e2]*/
597n/a{
598n/a PyObject *decoded;
599n/a
600n/a if (mapping == Py_None)
601n/a mapping = NULL;
602n/a
603n/a decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
604n/a return codec_tuple(decoded, data->len);
605n/a}
606n/a
607n/a#ifdef MS_WINDOWS
608n/a
609n/a/*[clinic input]
610n/a_codecs.mbcs_decode
611n/a data: Py_buffer
612n/a errors: str(accept={str, NoneType}) = NULL
613n/a final: int(c_default="0") = False
614n/a /
615n/a[clinic start generated code]*/
616n/a
617n/astatic PyObject *
618n/a_codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
619n/a const char *errors, int final)
620n/a/*[clinic end generated code: output=39b65b8598938c4b input=d492c1ca64f4fa8a]*/
621n/a{
622n/a Py_ssize_t consumed = data->len;
623n/a PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
624n/a errors, final ? NULL : &consumed);
625n/a return codec_tuple(decoded, consumed);
626n/a}
627n/a
628n/a/*[clinic input]
629n/a_codecs.oem_decode
630n/a data: Py_buffer
631n/a errors: str(accept={str, NoneType}) = NULL
632n/a final: int(c_default="0") = False
633n/a /
634n/a[clinic start generated code]*/
635n/a
636n/astatic PyObject *
637n/a_codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
638n/a const char *errors, int final)
639n/a/*[clinic end generated code: output=da1617612f3fcad8 input=95b8a92c446b03cd]*/
640n/a{
641n/a Py_ssize_t consumed = data->len;
642n/a PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
643n/a data->buf, data->len, errors, final ? NULL : &consumed);
644n/a return codec_tuple(decoded, consumed);
645n/a}
646n/a
647n/a/*[clinic input]
648n/a_codecs.code_page_decode
649n/a codepage: int
650n/a data: Py_buffer
651n/a errors: str(accept={str, NoneType}) = NULL
652n/a final: int(c_default="0") = False
653n/a /
654n/a[clinic start generated code]*/
655n/a
656n/astatic PyObject *
657n/a_codecs_code_page_decode_impl(PyObject *module, int codepage,
658n/a Py_buffer *data, const char *errors, int final)
659n/a/*[clinic end generated code: output=53008ea967da3fff input=4f3152a304e21d51]*/
660n/a{
661n/a Py_ssize_t consumed = data->len;
662n/a PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
663n/a data->buf, data->len,
664n/a errors,
665n/a final ? NULL : &consumed);
666n/a return codec_tuple(decoded, consumed);
667n/a}
668n/a
669n/a#endif /* MS_WINDOWS */
670n/a
671n/a/* --- Encoder ------------------------------------------------------------ */
672n/a
673n/a/*[clinic input]
674n/a_codecs.readbuffer_encode
675n/a data: Py_buffer(accept={str, buffer})
676n/a errors: str(accept={str, NoneType}) = NULL
677n/a /
678n/a[clinic start generated code]*/
679n/a
680n/astatic PyObject *
681n/a_codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
682n/a const char *errors)
683n/a/*[clinic end generated code: output=c645ea7cdb3d6e86 input=b7c322b89d4ab923]*/
684n/a{
685n/a PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
686n/a return codec_tuple(result, data->len);
687n/a}
688n/a
689n/a/*[clinic input]
690n/a_codecs.unicode_internal_encode
691n/a obj: object
692n/a errors: str(accept={str, NoneType}) = NULL
693n/a /
694n/a[clinic start generated code]*/
695n/a
696n/astatic PyObject *
697n/a_codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj,
698n/a const char *errors)
699n/a/*[clinic end generated code: output=a72507dde4ea558f input=8628f0280cf5ba61]*/
700n/a{
701n/a if (PyErr_WarnEx(PyExc_DeprecationWarning,
702n/a "unicode_internal codec has been deprecated",
703n/a 1))
704n/a return NULL;
705n/a
706n/a if (PyUnicode_Check(obj)) {
707n/a Py_UNICODE *u;
708n/a Py_ssize_t len, size;
709n/a
710n/a if (PyUnicode_READY(obj) < 0)
711n/a return NULL;
712n/a
713n/a u = PyUnicode_AsUnicodeAndSize(obj, &len);
714n/a if (u == NULL)
715n/a return NULL;
716n/a if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
717n/a return PyErr_NoMemory();
718n/a size = len * sizeof(Py_UNICODE);
719n/a return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
720n/a PyUnicode_GET_LENGTH(obj));
721n/a }
722n/a else {
723n/a Py_buffer view;
724n/a PyObject *result;
725n/a if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
726n/a return NULL;
727n/a result = codec_tuple(PyBytes_FromStringAndSize(view.buf, view.len),
728n/a view.len);
729n/a PyBuffer_Release(&view);
730n/a return result;
731n/a }
732n/a}
733n/a
734n/a/*[clinic input]
735n/a_codecs.utf_7_encode
736n/a str: unicode
737n/a errors: str(accept={str, NoneType}) = NULL
738n/a /
739n/a[clinic start generated code]*/
740n/a
741n/astatic PyObject *
742n/a_codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
743n/a const char *errors)
744n/a/*[clinic end generated code: output=0feda21ffc921bc8 input=d1a47579e79cbe15]*/
745n/a{
746n/a return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
747n/a PyUnicode_GET_LENGTH(str));
748n/a}
749n/a
750n/a/*[clinic input]
751n/a_codecs.utf_8_encode
752n/a str: unicode
753n/a errors: str(accept={str, NoneType}) = NULL
754n/a /
755n/a[clinic start generated code]*/
756n/a
757n/astatic PyObject *
758n/a_codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
759n/a const char *errors)
760n/a/*[clinic end generated code: output=02bf47332b9c796c input=42e3ba73c4392eef]*/
761n/a{
762n/a return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
763n/a PyUnicode_GET_LENGTH(str));
764n/a}
765n/a
766n/a/* This version provides access to the byteorder parameter of the
767n/a builtin UTF-16 codecs as optional third argument. It defaults to 0
768n/a which means: use the native byte order and prepend the data with a
769n/a BOM mark.
770n/a
771n/a*/
772n/a
773n/a/*[clinic input]
774n/a_codecs.utf_16_encode
775n/a str: unicode
776n/a errors: str(accept={str, NoneType}) = NULL
777n/a byteorder: int = 0
778n/a /
779n/a[clinic start generated code]*/
780n/a
781n/astatic PyObject *
782n/a_codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
783n/a const char *errors, int byteorder)
784n/a/*[clinic end generated code: output=c654e13efa2e64e4 input=ff46416b04edb944]*/
785n/a{
786n/a return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
787n/a PyUnicode_GET_LENGTH(str));
788n/a}
789n/a
790n/a/*[clinic input]
791n/a_codecs.utf_16_le_encode
792n/a str: unicode
793n/a errors: str(accept={str, NoneType}) = NULL
794n/a /
795n/a[clinic start generated code]*/
796n/a
797n/astatic PyObject *
798n/a_codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
799n/a const char *errors)
800n/a/*[clinic end generated code: output=431b01e55f2d4995 input=cb385455ea8f2fe0]*/
801n/a{
802n/a return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
803n/a PyUnicode_GET_LENGTH(str));
804n/a}
805n/a
806n/a/*[clinic input]
807n/a_codecs.utf_16_be_encode
808n/a str: unicode
809n/a errors: str(accept={str, NoneType}) = NULL
810n/a /
811n/a[clinic start generated code]*/
812n/a
813n/astatic PyObject *
814n/a_codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
815n/a const char *errors)
816n/a/*[clinic end generated code: output=96886a6fd54dcae3 input=9119997066bdaefd]*/
817n/a{
818n/a return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
819n/a PyUnicode_GET_LENGTH(str));
820n/a}
821n/a
822n/a/* This version provides access to the byteorder parameter of the
823n/a builtin UTF-32 codecs as optional third argument. It defaults to 0
824n/a which means: use the native byte order and prepend the data with a
825n/a BOM mark.
826n/a
827n/a*/
828n/a
829n/a/*[clinic input]
830n/a_codecs.utf_32_encode
831n/a str: unicode
832n/a errors: str(accept={str, NoneType}) = NULL
833n/a byteorder: int = 0
834n/a /
835n/a[clinic start generated code]*/
836n/a
837n/astatic PyObject *
838n/a_codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
839n/a const char *errors, int byteorder)
840n/a/*[clinic end generated code: output=5c760da0c09a8b83 input=c5e77da82fbe5c2a]*/
841n/a{
842n/a return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
843n/a PyUnicode_GET_LENGTH(str));
844n/a}
845n/a
846n/a/*[clinic input]
847n/a_codecs.utf_32_le_encode
848n/a str: unicode
849n/a errors: str(accept={str, NoneType}) = NULL
850n/a /
851n/a[clinic start generated code]*/
852n/a
853n/astatic PyObject *
854n/a_codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
855n/a const char *errors)
856n/a/*[clinic end generated code: output=b65cd176de8e36d6 input=9993b25fe0877848]*/
857n/a{
858n/a return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
859n/a PyUnicode_GET_LENGTH(str));
860n/a}
861n/a
862n/a/*[clinic input]
863n/a_codecs.utf_32_be_encode
864n/a str: unicode
865n/a errors: str(accept={str, NoneType}) = NULL
866n/a /
867n/a[clinic start generated code]*/
868n/a
869n/astatic PyObject *
870n/a_codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
871n/a const char *errors)
872n/a/*[clinic end generated code: output=1d9e71a9358709e9 input=d3e0ccaa02920431]*/
873n/a{
874n/a return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
875n/a PyUnicode_GET_LENGTH(str));
876n/a}
877n/a
878n/a/*[clinic input]
879n/a_codecs.unicode_escape_encode
880n/a str: unicode
881n/a errors: str(accept={str, NoneType}) = NULL
882n/a /
883n/a[clinic start generated code]*/
884n/a
885n/astatic PyObject *
886n/a_codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
887n/a const char *errors)
888n/a/*[clinic end generated code: output=66271b30bc4f7a3c input=65d9eefca65b455a]*/
889n/a{
890n/a return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
891n/a PyUnicode_GET_LENGTH(str));
892n/a}
893n/a
894n/a/*[clinic input]
895n/a_codecs.raw_unicode_escape_encode
896n/a str: unicode
897n/a errors: str(accept={str, NoneType}) = NULL
898n/a /
899n/a[clinic start generated code]*/
900n/a
901n/astatic PyObject *
902n/a_codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
903n/a const char *errors)
904n/a/*[clinic end generated code: output=a66a806ed01c830a input=5aa33e4a133391ab]*/
905n/a{
906n/a return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
907n/a PyUnicode_GET_LENGTH(str));
908n/a}
909n/a
910n/a/*[clinic input]
911n/a_codecs.latin_1_encode
912n/a str: unicode
913n/a errors: str(accept={str, NoneType}) = NULL
914n/a /
915n/a[clinic start generated code]*/
916n/a
917n/astatic PyObject *
918n/a_codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
919n/a const char *errors)
920n/a/*[clinic end generated code: output=2c28c83a27884e08 input=30b11c9e49a65150]*/
921n/a{
922n/a return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
923n/a PyUnicode_GET_LENGTH(str));
924n/a}
925n/a
926n/a/*[clinic input]
927n/a_codecs.ascii_encode
928n/a str: unicode
929n/a errors: str(accept={str, NoneType}) = NULL
930n/a /
931n/a[clinic start generated code]*/
932n/a
933n/astatic PyObject *
934n/a_codecs_ascii_encode_impl(PyObject *module, PyObject *str,
935n/a const char *errors)
936n/a/*[clinic end generated code: output=b5e035182d33befc input=843a1d268e6dfa8e]*/
937n/a{
938n/a return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
939n/a PyUnicode_GET_LENGTH(str));
940n/a}
941n/a
942n/a/*[clinic input]
943n/a_codecs.charmap_encode
944n/a str: unicode
945n/a errors: str(accept={str, NoneType}) = NULL
946n/a mapping: object = NULL
947n/a /
948n/a[clinic start generated code]*/
949n/a
950n/astatic PyObject *
951n/a_codecs_charmap_encode_impl(PyObject *module, PyObject *str,
952n/a const char *errors, PyObject *mapping)
953n/a/*[clinic end generated code: output=047476f48495a9e9 input=0752cde07a6d6d00]*/
954n/a{
955n/a if (mapping == Py_None)
956n/a mapping = NULL;
957n/a
958n/a return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
959n/a PyUnicode_GET_LENGTH(str));
960n/a}
961n/a
962n/a/*[clinic input]
963n/a_codecs.charmap_build
964n/a map: unicode
965n/a /
966n/a[clinic start generated code]*/
967n/a
968n/astatic PyObject *
969n/a_codecs_charmap_build_impl(PyObject *module, PyObject *map)
970n/a/*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
971n/a{
972n/a return PyUnicode_BuildEncodingMap(map);
973n/a}
974n/a
975n/a#ifdef MS_WINDOWS
976n/a
977n/a/*[clinic input]
978n/a_codecs.mbcs_encode
979n/a str: unicode
980n/a errors: str(accept={str, NoneType}) = NULL
981n/a /
982n/a[clinic start generated code]*/
983n/a
984n/astatic PyObject *
985n/a_codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
986n/a/*[clinic end generated code: output=76e2e170c966c080 input=de471e0815947553]*/
987n/a{
988n/a return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
989n/a PyUnicode_GET_LENGTH(str));
990n/a}
991n/a
992n/a/*[clinic input]
993n/a_codecs.oem_encode
994n/a str: unicode
995n/a errors: str(accept={str, NoneType}) = NULL
996n/a /
997n/a[clinic start generated code]*/
998n/a
999n/astatic PyObject *
1000n/a_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
1001n/a/*[clinic end generated code: output=65d5982c737de649 input=3fc5f0028aad3cda]*/
1002n/a{
1003n/a return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
1004n/a PyUnicode_GET_LENGTH(str));
1005n/a}
1006n/a
1007n/a/*[clinic input]
1008n/a_codecs.code_page_encode
1009n/a code_page: int
1010n/a str: unicode
1011n/a errors: str(accept={str, NoneType}) = NULL
1012n/a /
1013n/a[clinic start generated code]*/
1014n/a
1015n/astatic PyObject *
1016n/a_codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
1017n/a const char *errors)
1018n/a/*[clinic end generated code: output=45673f6085657a9e input=786421ae617d680b]*/
1019n/a{
1020n/a return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
1021n/a PyUnicode_GET_LENGTH(str));
1022n/a}
1023n/a
1024n/a#endif /* MS_WINDOWS */
1025n/a
1026n/a/* --- Error handler registry --------------------------------------------- */
1027n/a
1028n/a/*[clinic input]
1029n/a_codecs.register_error
1030n/a errors: str
1031n/a handler: object
1032n/a /
1033n/a
1034n/aRegister the specified error handler under the name errors.
1035n/a
1036n/ahandler must be a callable object, that will be called with an exception
1037n/ainstance containing information about the location of the encoding/decoding
1038n/aerror and must return a (replacement, new position) tuple.
1039n/a[clinic start generated code]*/
1040n/a
1041n/astatic PyObject *
1042n/a_codecs_register_error_impl(PyObject *module, const char *errors,
1043n/a PyObject *handler)
1044n/a/*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
1045n/a{
1046n/a if (PyCodec_RegisterError(errors, handler))
1047n/a return NULL;
1048n/a Py_RETURN_NONE;
1049n/a}
1050n/a
1051n/a/*[clinic input]
1052n/a_codecs.lookup_error
1053n/a name: str
1054n/a /
1055n/a
1056n/alookup_error(errors) -> handler
1057n/a
1058n/aReturn the error handler for the specified error handling name or raise a
1059n/aLookupError, if no handler exists under this name.
1060n/a[clinic start generated code]*/
1061n/a
1062n/astatic PyObject *
1063n/a_codecs_lookup_error_impl(PyObject *module, const char *name)
1064n/a/*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
1065n/a{
1066n/a return PyCodec_LookupError(name);
1067n/a}
1068n/a
1069n/a/* --- Module API --------------------------------------------------------- */
1070n/a
1071n/astatic PyMethodDef _codecs_functions[] = {
1072n/a _CODECS_REGISTER_METHODDEF
1073n/a _CODECS_LOOKUP_METHODDEF
1074n/a _CODECS_ENCODE_METHODDEF
1075n/a _CODECS_DECODE_METHODDEF
1076n/a _CODECS_ESCAPE_ENCODE_METHODDEF
1077n/a _CODECS_ESCAPE_DECODE_METHODDEF
1078n/a _CODECS_UTF_8_ENCODE_METHODDEF
1079n/a _CODECS_UTF_8_DECODE_METHODDEF
1080n/a _CODECS_UTF_7_ENCODE_METHODDEF
1081n/a _CODECS_UTF_7_DECODE_METHODDEF
1082n/a _CODECS_UTF_16_ENCODE_METHODDEF
1083n/a _CODECS_UTF_16_LE_ENCODE_METHODDEF
1084n/a _CODECS_UTF_16_BE_ENCODE_METHODDEF
1085n/a _CODECS_UTF_16_DECODE_METHODDEF
1086n/a _CODECS_UTF_16_LE_DECODE_METHODDEF
1087n/a _CODECS_UTF_16_BE_DECODE_METHODDEF
1088n/a _CODECS_UTF_16_EX_DECODE_METHODDEF
1089n/a _CODECS_UTF_32_ENCODE_METHODDEF
1090n/a _CODECS_UTF_32_LE_ENCODE_METHODDEF
1091n/a _CODECS_UTF_32_BE_ENCODE_METHODDEF
1092n/a _CODECS_UTF_32_DECODE_METHODDEF
1093n/a _CODECS_UTF_32_LE_DECODE_METHODDEF
1094n/a _CODECS_UTF_32_BE_DECODE_METHODDEF
1095n/a _CODECS_UTF_32_EX_DECODE_METHODDEF
1096n/a _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
1097n/a _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
1098n/a _CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF
1099n/a _CODECS_UNICODE_INTERNAL_DECODE_METHODDEF
1100n/a _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
1101n/a _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
1102n/a _CODECS_LATIN_1_ENCODE_METHODDEF
1103n/a _CODECS_LATIN_1_DECODE_METHODDEF
1104n/a _CODECS_ASCII_ENCODE_METHODDEF
1105n/a _CODECS_ASCII_DECODE_METHODDEF
1106n/a _CODECS_CHARMAP_ENCODE_METHODDEF
1107n/a _CODECS_CHARMAP_DECODE_METHODDEF
1108n/a _CODECS_CHARMAP_BUILD_METHODDEF
1109n/a _CODECS_READBUFFER_ENCODE_METHODDEF
1110n/a _CODECS_MBCS_ENCODE_METHODDEF
1111n/a _CODECS_MBCS_DECODE_METHODDEF
1112n/a _CODECS_OEM_ENCODE_METHODDEF
1113n/a _CODECS_OEM_DECODE_METHODDEF
1114n/a _CODECS_CODE_PAGE_ENCODE_METHODDEF
1115n/a _CODECS_CODE_PAGE_DECODE_METHODDEF
1116n/a _CODECS_REGISTER_ERROR_METHODDEF
1117n/a _CODECS_LOOKUP_ERROR_METHODDEF
1118n/a _CODECS__FORGET_CODEC_METHODDEF
1119n/a {NULL, NULL} /* sentinel */
1120n/a};
1121n/a
1122n/astatic struct PyModuleDef codecsmodule = {
1123n/a PyModuleDef_HEAD_INIT,
1124n/a "_codecs",
1125n/a NULL,
1126n/a -1,
1127n/a _codecs_functions,
1128n/a NULL,
1129n/a NULL,
1130n/a NULL,
1131n/a NULL
1132n/a};
1133n/a
1134n/aPyMODINIT_FUNC
1135n/aPyInit__codecs(void)
1136n/a{
1137n/a return PyModule_Create(&codecsmodule);
1138n/a}