ยปCore Development>Code coverage>Modules/_io/textio.c

Python code coverage for Modules/_io/textio.c

#countcontent
1n/a/*
2n/a An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3n/a
4n/a Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5n/a
6n/a Written by Amaury Forgeot d'Arc and Antoine Pitrou
7n/a*/
8n/a
9n/a#define PY_SSIZE_T_CLEAN
10n/a#include "Python.h"
11n/a#include "structmember.h"
12n/a#include "_iomodule.h"
13n/a
14n/a/*[clinic input]
15n/amodule _io
16n/aclass _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
17n/aclass _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
18n/a[clinic start generated code]*/
19n/a/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
20n/a
21n/a/*[python input]
22n/aclass io_ssize_t_converter(CConverter):
23n/a type = 'Py_ssize_t'
24n/a converter = '_PyIO_ConvertSsize_t'
25n/a[python start generated code]*/
26n/a/*[python end generated code: output=da39a3ee5e6b4b0d input=d0a811d3cbfd1b33]*/
27n/a
28n/a_Py_IDENTIFIER(close);
29n/a_Py_IDENTIFIER(_dealloc_warn);
30n/a_Py_IDENTIFIER(decode);
31n/a_Py_IDENTIFIER(fileno);
32n/a_Py_IDENTIFIER(flush);
33n/a_Py_IDENTIFIER(getpreferredencoding);
34n/a_Py_IDENTIFIER(isatty);
35n/a_Py_IDENTIFIER(mode);
36n/a_Py_IDENTIFIER(name);
37n/a_Py_IDENTIFIER(raw);
38n/a_Py_IDENTIFIER(read);
39n/a_Py_IDENTIFIER(read1);
40n/a_Py_IDENTIFIER(readable);
41n/a_Py_IDENTIFIER(replace);
42n/a_Py_IDENTIFIER(reset);
43n/a_Py_IDENTIFIER(seek);
44n/a_Py_IDENTIFIER(seekable);
45n/a_Py_IDENTIFIER(setstate);
46n/a_Py_IDENTIFIER(tell);
47n/a_Py_IDENTIFIER(writable);
48n/a
49n/a/* TextIOBase */
50n/a
51n/aPyDoc_STRVAR(textiobase_doc,
52n/a "Base class for text I/O.\n"
53n/a "\n"
54n/a "This class provides a character and line based interface to stream\n"
55n/a "I/O. There is no readinto method because Python's character strings\n"
56n/a "are immutable. There is no public constructor.\n"
57n/a );
58n/a
59n/astatic PyObject *
60n/a_unsupported(const char *message)
61n/a{
62n/a _PyIO_State *state = IO_STATE();
63n/a if (state != NULL)
64n/a PyErr_SetString(state->unsupported_operation, message);
65n/a return NULL;
66n/a}
67n/a
68n/aPyDoc_STRVAR(textiobase_detach_doc,
69n/a "Separate the underlying buffer from the TextIOBase and return it.\n"
70n/a "\n"
71n/a "After the underlying buffer has been detached, the TextIO is in an\n"
72n/a "unusable state.\n"
73n/a );
74n/a
75n/astatic PyObject *
76n/atextiobase_detach(PyObject *self)
77n/a{
78n/a return _unsupported("detach");
79n/a}
80n/a
81n/aPyDoc_STRVAR(textiobase_read_doc,
82n/a "Read at most n characters from stream.\n"
83n/a "\n"
84n/a "Read from underlying buffer until we have n characters or we hit EOF.\n"
85n/a "If n is negative or omitted, read until EOF.\n"
86n/a );
87n/a
88n/astatic PyObject *
89n/atextiobase_read(PyObject *self, PyObject *args)
90n/a{
91n/a return _unsupported("read");
92n/a}
93n/a
94n/aPyDoc_STRVAR(textiobase_readline_doc,
95n/a "Read until newline or EOF.\n"
96n/a "\n"
97n/a "Returns an empty string if EOF is hit immediately.\n"
98n/a );
99n/a
100n/astatic PyObject *
101n/atextiobase_readline(PyObject *self, PyObject *args)
102n/a{
103n/a return _unsupported("readline");
104n/a}
105n/a
106n/aPyDoc_STRVAR(textiobase_write_doc,
107n/a "Write string to stream.\n"
108n/a "Returns the number of characters written (which is always equal to\n"
109n/a "the length of the string).\n"
110n/a );
111n/a
112n/astatic PyObject *
113n/atextiobase_write(PyObject *self, PyObject *args)
114n/a{
115n/a return _unsupported("write");
116n/a}
117n/a
118n/aPyDoc_STRVAR(textiobase_encoding_doc,
119n/a "Encoding of the text stream.\n"
120n/a "\n"
121n/a "Subclasses should override.\n"
122n/a );
123n/a
124n/astatic PyObject *
125n/atextiobase_encoding_get(PyObject *self, void *context)
126n/a{
127n/a Py_RETURN_NONE;
128n/a}
129n/a
130n/aPyDoc_STRVAR(textiobase_newlines_doc,
131n/a "Line endings translated so far.\n"
132n/a "\n"
133n/a "Only line endings translated during reading are considered.\n"
134n/a "\n"
135n/a "Subclasses should override.\n"
136n/a );
137n/a
138n/astatic PyObject *
139n/atextiobase_newlines_get(PyObject *self, void *context)
140n/a{
141n/a Py_RETURN_NONE;
142n/a}
143n/a
144n/aPyDoc_STRVAR(textiobase_errors_doc,
145n/a "The error setting of the decoder or encoder.\n"
146n/a "\n"
147n/a "Subclasses should override.\n"
148n/a );
149n/a
150n/astatic PyObject *
151n/atextiobase_errors_get(PyObject *self, void *context)
152n/a{
153n/a Py_RETURN_NONE;
154n/a}
155n/a
156n/a
157n/astatic PyMethodDef textiobase_methods[] = {
158n/a {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
159n/a {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
160n/a {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
161n/a {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
162n/a {NULL, NULL}
163n/a};
164n/a
165n/astatic PyGetSetDef textiobase_getset[] = {
166n/a {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
167n/a {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
168n/a {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
169n/a {NULL}
170n/a};
171n/a
172n/aPyTypeObject PyTextIOBase_Type = {
173n/a PyVarObject_HEAD_INIT(NULL, 0)
174n/a "_io._TextIOBase", /*tp_name*/
175n/a 0, /*tp_basicsize*/
176n/a 0, /*tp_itemsize*/
177n/a 0, /*tp_dealloc*/
178n/a 0, /*tp_print*/
179n/a 0, /*tp_getattr*/
180n/a 0, /*tp_setattr*/
181n/a 0, /*tp_compare */
182n/a 0, /*tp_repr*/
183n/a 0, /*tp_as_number*/
184n/a 0, /*tp_as_sequence*/
185n/a 0, /*tp_as_mapping*/
186n/a 0, /*tp_hash */
187n/a 0, /*tp_call*/
188n/a 0, /*tp_str*/
189n/a 0, /*tp_getattro*/
190n/a 0, /*tp_setattro*/
191n/a 0, /*tp_as_buffer*/
192n/a Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
193n/a | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
194n/a textiobase_doc, /* tp_doc */
195n/a 0, /* tp_traverse */
196n/a 0, /* tp_clear */
197n/a 0, /* tp_richcompare */
198n/a 0, /* tp_weaklistoffset */
199n/a 0, /* tp_iter */
200n/a 0, /* tp_iternext */
201n/a textiobase_methods, /* tp_methods */
202n/a 0, /* tp_members */
203n/a textiobase_getset, /* tp_getset */
204n/a &PyIOBase_Type, /* tp_base */
205n/a 0, /* tp_dict */
206n/a 0, /* tp_descr_get */
207n/a 0, /* tp_descr_set */
208n/a 0, /* tp_dictoffset */
209n/a 0, /* tp_init */
210n/a 0, /* tp_alloc */
211n/a 0, /* tp_new */
212n/a 0, /* tp_free */
213n/a 0, /* tp_is_gc */
214n/a 0, /* tp_bases */
215n/a 0, /* tp_mro */
216n/a 0, /* tp_cache */
217n/a 0, /* tp_subclasses */
218n/a 0, /* tp_weaklist */
219n/a 0, /* tp_del */
220n/a 0, /* tp_version_tag */
221n/a 0, /* tp_finalize */
222n/a};
223n/a
224n/a
225n/a/* IncrementalNewlineDecoder */
226n/a
227n/atypedef struct {
228n/a PyObject_HEAD
229n/a PyObject *decoder;
230n/a PyObject *errors;
231n/a unsigned int pendingcr: 1;
232n/a unsigned int translate: 1;
233n/a unsigned int seennl: 3;
234n/a} nldecoder_object;
235n/a
236n/a/*[clinic input]
237n/a_io.IncrementalNewlineDecoder.__init__
238n/a decoder: object
239n/a translate: int
240n/a errors: object(c_default="NULL") = "strict"
241n/a
242n/aCodec used when reading a file in universal newlines mode.
243n/a
244n/aIt wraps another incremental decoder, translating \r\n and \r into \n.
245n/aIt also records the types of newlines encountered. When used with
246n/atranslate=False, it ensures that the newline sequence is returned in
247n/aone piece. When used with decoder=None, it expects unicode strings as
248n/adecode input and translates newlines without first invoking an external
249n/adecoder.
250n/a[clinic start generated code]*/
251n/a
252n/astatic int
253n/a_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
254n/a PyObject *decoder, int translate,
255n/a PyObject *errors)
256n/a/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
257n/a{
258n/a self->decoder = decoder;
259n/a Py_INCREF(decoder);
260n/a
261n/a if (errors == NULL) {
262n/a self->errors = PyUnicode_FromString("strict");
263n/a if (self->errors == NULL)
264n/a return -1;
265n/a }
266n/a else {
267n/a Py_INCREF(errors);
268n/a self->errors = errors;
269n/a }
270n/a
271n/a self->translate = translate;
272n/a self->seennl = 0;
273n/a self->pendingcr = 0;
274n/a
275n/a return 0;
276n/a}
277n/a
278n/astatic void
279n/aincrementalnewlinedecoder_dealloc(nldecoder_object *self)
280n/a{
281n/a Py_CLEAR(self->decoder);
282n/a Py_CLEAR(self->errors);
283n/a Py_TYPE(self)->tp_free((PyObject *)self);
284n/a}
285n/a
286n/astatic int
287n/acheck_decoded(PyObject *decoded)
288n/a{
289n/a if (decoded == NULL)
290n/a return -1;
291n/a if (!PyUnicode_Check(decoded)) {
292n/a PyErr_Format(PyExc_TypeError,
293n/a "decoder should return a string result, not '%.200s'",
294n/a Py_TYPE(decoded)->tp_name);
295n/a Py_DECREF(decoded);
296n/a return -1;
297n/a }
298n/a if (PyUnicode_READY(decoded) < 0) {
299n/a Py_DECREF(decoded);
300n/a return -1;
301n/a }
302n/a return 0;
303n/a}
304n/a
305n/a#define SEEN_CR 1
306n/a#define SEEN_LF 2
307n/a#define SEEN_CRLF 4
308n/a#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
309n/a
310n/aPyObject *
311n/a_PyIncrementalNewlineDecoder_decode(PyObject *myself,
312n/a PyObject *input, int final)
313n/a{
314n/a PyObject *output;
315n/a Py_ssize_t output_len;
316n/a nldecoder_object *self = (nldecoder_object *) myself;
317n/a
318n/a if (self->decoder == NULL) {
319n/a PyErr_SetString(PyExc_ValueError,
320n/a "IncrementalNewlineDecoder.__init__ not called");
321n/a return NULL;
322n/a }
323n/a
324n/a /* decode input (with the eventual \r from a previous pass) */
325n/a if (self->decoder != Py_None) {
326n/a output = PyObject_CallMethodObjArgs(self->decoder,
327n/a _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
328n/a }
329n/a else {
330n/a output = input;
331n/a Py_INCREF(output);
332n/a }
333n/a
334n/a if (check_decoded(output) < 0)
335n/a return NULL;
336n/a
337n/a output_len = PyUnicode_GET_LENGTH(output);
338n/a if (self->pendingcr && (final || output_len > 0)) {
339n/a /* Prefix output with CR */
340n/a int kind;
341n/a PyObject *modified;
342n/a char *out;
343n/a
344n/a modified = PyUnicode_New(output_len + 1,
345n/a PyUnicode_MAX_CHAR_VALUE(output));
346n/a if (modified == NULL)
347n/a goto error;
348n/a kind = PyUnicode_KIND(modified);
349n/a out = PyUnicode_DATA(modified);
350n/a PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
351n/a memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
352n/a Py_DECREF(output);
353n/a output = modified; /* output remains ready */
354n/a self->pendingcr = 0;
355n/a output_len++;
356n/a }
357n/a
358n/a /* retain last \r even when not translating data:
359n/a * then readline() is sure to get \r\n in one pass
360n/a */
361n/a if (!final) {
362n/a if (output_len > 0
363n/a && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
364n/a {
365n/a PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
366n/a if (modified == NULL)
367n/a goto error;
368n/a Py_DECREF(output);
369n/a output = modified;
370n/a self->pendingcr = 1;
371n/a }
372n/a }
373n/a
374n/a /* Record which newlines are read and do newline translation if desired,
375n/a all in one pass. */
376n/a {
377n/a void *in_str;
378n/a Py_ssize_t len;
379n/a int seennl = self->seennl;
380n/a int only_lf = 0;
381n/a int kind;
382n/a
383n/a in_str = PyUnicode_DATA(output);
384n/a len = PyUnicode_GET_LENGTH(output);
385n/a kind = PyUnicode_KIND(output);
386n/a
387n/a if (len == 0)
388n/a return output;
389n/a
390n/a /* If, up to now, newlines are consistently \n, do a quick check
391n/a for the \r *byte* with the libc's optimized memchr.
392n/a */
393n/a if (seennl == SEEN_LF || seennl == 0) {
394n/a only_lf = (memchr(in_str, '\r', kind * len) == NULL);
395n/a }
396n/a
397n/a if (only_lf) {
398n/a /* If not already seen, quick scan for a possible "\n" character.
399n/a (there's nothing else to be done, even when in translation mode)
400n/a */
401n/a if (seennl == 0 &&
402n/a memchr(in_str, '\n', kind * len) != NULL) {
403n/a if (kind == PyUnicode_1BYTE_KIND)
404n/a seennl |= SEEN_LF;
405n/a else {
406n/a Py_ssize_t i = 0;
407n/a for (;;) {
408n/a Py_UCS4 c;
409n/a /* Fast loop for non-control characters */
410n/a while (PyUnicode_READ(kind, in_str, i) > '\n')
411n/a i++;
412n/a c = PyUnicode_READ(kind, in_str, i++);
413n/a if (c == '\n') {
414n/a seennl |= SEEN_LF;
415n/a break;
416n/a }
417n/a if (i >= len)
418n/a break;
419n/a }
420n/a }
421n/a }
422n/a /* Finished: we have scanned for newlines, and none of them
423n/a need translating */
424n/a }
425n/a else if (!self->translate) {
426n/a Py_ssize_t i = 0;
427n/a /* We have already seen all newline types, no need to scan again */
428n/a if (seennl == SEEN_ALL)
429n/a goto endscan;
430n/a for (;;) {
431n/a Py_UCS4 c;
432n/a /* Fast loop for non-control characters */
433n/a while (PyUnicode_READ(kind, in_str, i) > '\r')
434n/a i++;
435n/a c = PyUnicode_READ(kind, in_str, i++);
436n/a if (c == '\n')
437n/a seennl |= SEEN_LF;
438n/a else if (c == '\r') {
439n/a if (PyUnicode_READ(kind, in_str, i) == '\n') {
440n/a seennl |= SEEN_CRLF;
441n/a i++;
442n/a }
443n/a else
444n/a seennl |= SEEN_CR;
445n/a }
446n/a if (i >= len)
447n/a break;
448n/a if (seennl == SEEN_ALL)
449n/a break;
450n/a }
451n/a endscan:
452n/a ;
453n/a }
454n/a else {
455n/a void *translated;
456n/a int kind = PyUnicode_KIND(output);
457n/a void *in_str = PyUnicode_DATA(output);
458n/a Py_ssize_t in, out;
459n/a /* XXX: Previous in-place translation here is disabled as
460n/a resizing is not possible anymore */
461n/a /* We could try to optimize this so that we only do a copy
462n/a when there is something to translate. On the other hand,
463n/a we already know there is a \r byte, so chances are high
464n/a that something needs to be done. */
465n/a translated = PyMem_Malloc(kind * len);
466n/a if (translated == NULL) {
467n/a PyErr_NoMemory();
468n/a goto error;
469n/a }
470n/a in = out = 0;
471n/a for (;;) {
472n/a Py_UCS4 c;
473n/a /* Fast loop for non-control characters */
474n/a while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
475n/a PyUnicode_WRITE(kind, translated, out++, c);
476n/a if (c == '\n') {
477n/a PyUnicode_WRITE(kind, translated, out++, c);
478n/a seennl |= SEEN_LF;
479n/a continue;
480n/a }
481n/a if (c == '\r') {
482n/a if (PyUnicode_READ(kind, in_str, in) == '\n') {
483n/a in++;
484n/a seennl |= SEEN_CRLF;
485n/a }
486n/a else
487n/a seennl |= SEEN_CR;
488n/a PyUnicode_WRITE(kind, translated, out++, '\n');
489n/a continue;
490n/a }
491n/a if (in > len)
492n/a break;
493n/a PyUnicode_WRITE(kind, translated, out++, c);
494n/a }
495n/a Py_DECREF(output);
496n/a output = PyUnicode_FromKindAndData(kind, translated, out);
497n/a PyMem_Free(translated);
498n/a if (!output)
499n/a return NULL;
500n/a }
501n/a self->seennl |= seennl;
502n/a }
503n/a
504n/a return output;
505n/a
506n/a error:
507n/a Py_DECREF(output);
508n/a return NULL;
509n/a}
510n/a
511n/a/*[clinic input]
512n/a_io.IncrementalNewlineDecoder.decode
513n/a input: object
514n/a final: int(c_default="0") = False
515n/a[clinic start generated code]*/
516n/a
517n/astatic PyObject *
518n/a_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
519n/a PyObject *input, int final)
520n/a/*[clinic end generated code: output=0d486755bb37a66e input=d65677385bfd6827]*/
521n/a{
522n/a return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
523n/a}
524n/a
525n/a/*[clinic input]
526n/a_io.IncrementalNewlineDecoder.getstate
527n/a[clinic start generated code]*/
528n/a
529n/astatic PyObject *
530n/a_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
531n/a/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
532n/a{
533n/a PyObject *buffer;
534n/a unsigned long long flag;
535n/a
536n/a if (self->decoder != Py_None) {
537n/a PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
538n/a _PyIO_str_getstate, NULL);
539n/a if (state == NULL)
540n/a return NULL;
541n/a if (!PyArg_ParseTuple(state, "OK", &buffer, &flag)) {
542n/a Py_DECREF(state);
543n/a return NULL;
544n/a }
545n/a Py_INCREF(buffer);
546n/a Py_DECREF(state);
547n/a }
548n/a else {
549n/a buffer = PyBytes_FromString("");
550n/a flag = 0;
551n/a }
552n/a flag <<= 1;
553n/a if (self->pendingcr)
554n/a flag |= 1;
555n/a return Py_BuildValue("NK", buffer, flag);
556n/a}
557n/a
558n/a/*[clinic input]
559n/a_io.IncrementalNewlineDecoder.setstate
560n/a state: object
561n/a /
562n/a[clinic start generated code]*/
563n/a
564n/astatic PyObject *
565n/a_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
566n/a PyObject *state)
567n/a/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
568n/a{
569n/a PyObject *buffer;
570n/a unsigned long long flag;
571n/a
572n/a if (!PyArg_ParseTuple(state, "OK", &buffer, &flag))
573n/a return NULL;
574n/a
575n/a self->pendingcr = (int) (flag & 1);
576n/a flag >>= 1;
577n/a
578n/a if (self->decoder != Py_None)
579n/a return _PyObject_CallMethodId(self->decoder,
580n/a &PyId_setstate, "((OK))", buffer, flag);
581n/a else
582n/a Py_RETURN_NONE;
583n/a}
584n/a
585n/a/*[clinic input]
586n/a_io.IncrementalNewlineDecoder.reset
587n/a[clinic start generated code]*/
588n/a
589n/astatic PyObject *
590n/a_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
591n/a/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
592n/a{
593n/a self->seennl = 0;
594n/a self->pendingcr = 0;
595n/a if (self->decoder != Py_None)
596n/a return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
597n/a else
598n/a Py_RETURN_NONE;
599n/a}
600n/a
601n/astatic PyObject *
602n/aincrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
603n/a{
604n/a switch (self->seennl) {
605n/a case SEEN_CR:
606n/a return PyUnicode_FromString("\r");
607n/a case SEEN_LF:
608n/a return PyUnicode_FromString("\n");
609n/a case SEEN_CRLF:
610n/a return PyUnicode_FromString("\r\n");
611n/a case SEEN_CR | SEEN_LF:
612n/a return Py_BuildValue("ss", "\r", "\n");
613n/a case SEEN_CR | SEEN_CRLF:
614n/a return Py_BuildValue("ss", "\r", "\r\n");
615n/a case SEEN_LF | SEEN_CRLF:
616n/a return Py_BuildValue("ss", "\n", "\r\n");
617n/a case SEEN_CR | SEEN_LF | SEEN_CRLF:
618n/a return Py_BuildValue("sss", "\r", "\n", "\r\n");
619n/a default:
620n/a Py_RETURN_NONE;
621n/a }
622n/a
623n/a}
624n/a
625n/a/* TextIOWrapper */
626n/a
627n/atypedef PyObject *
628n/a (*encodefunc_t)(PyObject *, PyObject *);
629n/a
630n/atypedef struct
631n/a{
632n/a PyObject_HEAD
633n/a int ok; /* initialized? */
634n/a int detached;
635n/a Py_ssize_t chunk_size;
636n/a PyObject *buffer;
637n/a PyObject *encoding;
638n/a PyObject *encoder;
639n/a PyObject *decoder;
640n/a PyObject *readnl;
641n/a PyObject *errors;
642n/a const char *writenl; /* utf-8 encoded, NULL stands for \n */
643n/a char line_buffering;
644n/a char write_through;
645n/a char readuniversal;
646n/a char readtranslate;
647n/a char writetranslate;
648n/a char seekable;
649n/a char has_read1;
650n/a char telling;
651n/a char finalizing;
652n/a /* Specialized encoding func (see below) */
653n/a encodefunc_t encodefunc;
654n/a /* Whether or not it's the start of the stream */
655n/a char encoding_start_of_stream;
656n/a
657n/a /* Reads and writes are internally buffered in order to speed things up.
658n/a However, any read will first flush the write buffer if itsn't empty.
659n/a
660n/a Please also note that text to be written is first encoded before being
661n/a buffered. This is necessary so that encoding errors are immediately
662n/a reported to the caller, but it unfortunately means that the
663n/a IncrementalEncoder (whose encode() method is always written in Python)
664n/a becomes a bottleneck for small writes.
665n/a */
666n/a PyObject *decoded_chars; /* buffer for text returned from decoder */
667n/a Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
668n/a PyObject *pending_bytes; /* list of bytes objects waiting to be
669n/a written, or NULL */
670n/a Py_ssize_t pending_bytes_count;
671n/a
672n/a /* snapshot is either None, or a tuple (dec_flags, next_input) where
673n/a * dec_flags is the second (integer) item of the decoder state and
674n/a * next_input is the chunk of input bytes that comes next after the
675n/a * snapshot point. We use this to reconstruct decoder states in tell().
676n/a */
677n/a PyObject *snapshot;
678n/a /* Bytes-to-characters ratio for the current chunk. Serves as input for
679n/a the heuristic in tell(). */
680n/a double b2cratio;
681n/a
682n/a /* Cache raw object if it's a FileIO object */
683n/a PyObject *raw;
684n/a
685n/a PyObject *weakreflist;
686n/a PyObject *dict;
687n/a} textio;
688n/a
689n/a/* A couple of specialized cases in order to bypass the slow incremental
690n/a encoding methods for the most popular encodings. */
691n/a
692n/astatic PyObject *
693n/aascii_encode(textio *self, PyObject *text)
694n/a{
695n/a return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
696n/a}
697n/a
698n/astatic PyObject *
699n/autf16be_encode(textio *self, PyObject *text)
700n/a{
701n/a return _PyUnicode_EncodeUTF16(text,
702n/a PyBytes_AS_STRING(self->errors), 1);
703n/a}
704n/a
705n/astatic PyObject *
706n/autf16le_encode(textio *self, PyObject *text)
707n/a{
708n/a return _PyUnicode_EncodeUTF16(text,
709n/a PyBytes_AS_STRING(self->errors), -1);
710n/a}
711n/a
712n/astatic PyObject *
713n/autf16_encode(textio *self, PyObject *text)
714n/a{
715n/a if (!self->encoding_start_of_stream) {
716n/a /* Skip the BOM and use native byte ordering */
717n/a#if PY_BIG_ENDIAN
718n/a return utf16be_encode(self, text);
719n/a#else
720n/a return utf16le_encode(self, text);
721n/a#endif
722n/a }
723n/a return _PyUnicode_EncodeUTF16(text,
724n/a PyBytes_AS_STRING(self->errors), 0);
725n/a}
726n/a
727n/astatic PyObject *
728n/autf32be_encode(textio *self, PyObject *text)
729n/a{
730n/a return _PyUnicode_EncodeUTF32(text,
731n/a PyBytes_AS_STRING(self->errors), 1);
732n/a}
733n/a
734n/astatic PyObject *
735n/autf32le_encode(textio *self, PyObject *text)
736n/a{
737n/a return _PyUnicode_EncodeUTF32(text,
738n/a PyBytes_AS_STRING(self->errors), -1);
739n/a}
740n/a
741n/astatic PyObject *
742n/autf32_encode(textio *self, PyObject *text)
743n/a{
744n/a if (!self->encoding_start_of_stream) {
745n/a /* Skip the BOM and use native byte ordering */
746n/a#if PY_BIG_ENDIAN
747n/a return utf32be_encode(self, text);
748n/a#else
749n/a return utf32le_encode(self, text);
750n/a#endif
751n/a }
752n/a return _PyUnicode_EncodeUTF32(text,
753n/a PyBytes_AS_STRING(self->errors), 0);
754n/a}
755n/a
756n/astatic PyObject *
757n/autf8_encode(textio *self, PyObject *text)
758n/a{
759n/a return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
760n/a}
761n/a
762n/astatic PyObject *
763n/alatin1_encode(textio *self, PyObject *text)
764n/a{
765n/a return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
766n/a}
767n/a
768n/a/* Map normalized encoding names onto the specialized encoding funcs */
769n/a
770n/atypedef struct {
771n/a const char *name;
772n/a encodefunc_t encodefunc;
773n/a} encodefuncentry;
774n/a
775n/astatic const encodefuncentry encodefuncs[] = {
776n/a {"ascii", (encodefunc_t) ascii_encode},
777n/a {"iso8859-1", (encodefunc_t) latin1_encode},
778n/a {"utf-8", (encodefunc_t) utf8_encode},
779n/a {"utf-16-be", (encodefunc_t) utf16be_encode},
780n/a {"utf-16-le", (encodefunc_t) utf16le_encode},
781n/a {"utf-16", (encodefunc_t) utf16_encode},
782n/a {"utf-32-be", (encodefunc_t) utf32be_encode},
783n/a {"utf-32-le", (encodefunc_t) utf32le_encode},
784n/a {"utf-32", (encodefunc_t) utf32_encode},
785n/a {NULL, NULL}
786n/a};
787n/a
788n/a
789n/a/*[clinic input]
790n/a_io.TextIOWrapper.__init__
791n/a buffer: object
792n/a encoding: str(accept={str, NoneType}) = NULL
793n/a errors: str(accept={str, NoneType}) = NULL
794n/a newline: str(accept={str, NoneType}) = NULL
795n/a line_buffering: int(c_default="0") = False
796n/a write_through: int(c_default="0") = False
797n/a
798n/aCharacter and line based layer over a BufferedIOBase object, buffer.
799n/a
800n/aencoding gives the name of the encoding that the stream will be
801n/adecoded or encoded with. It defaults to locale.getpreferredencoding(False).
802n/a
803n/aerrors determines the strictness of encoding and decoding (see
804n/ahelp(codecs.Codec) or the documentation for codecs.register) and
805n/adefaults to "strict".
806n/a
807n/anewline controls how line endings are handled. It can be None, '',
808n/a'\n', '\r', and '\r\n'. It works as follows:
809n/a
810n/a* On input, if newline is None, universal newlines mode is
811n/a enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
812n/a these are translated into '\n' before being returned to the
813n/a caller. If it is '', universal newline mode is enabled, but line
814n/a endings are returned to the caller untranslated. If it has any of
815n/a the other legal values, input lines are only terminated by the given
816n/a string, and the line ending is returned to the caller untranslated.
817n/a
818n/a* On output, if newline is None, any '\n' characters written are
819n/a translated to the system default line separator, os.linesep. If
820n/a newline is '' or '\n', no translation takes place. If newline is any
821n/a of the other legal values, any '\n' characters written are translated
822n/a to the given string.
823n/a
824n/aIf line_buffering is True, a call to flush is implied when a call to
825n/awrite contains a newline character.
826n/a[clinic start generated code]*/
827n/a
828n/astatic int
829n/a_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
830n/a const char *encoding, const char *errors,
831n/a const char *newline, int line_buffering,
832n/a int write_through)
833n/a/*[clinic end generated code: output=56a83402ce2a8381 input=3126cb3101a2c99b]*/
834n/a{
835n/a PyObject *raw, *codec_info = NULL;
836n/a _PyIO_State *state = NULL;
837n/a PyObject *res;
838n/a int r;
839n/a
840n/a self->ok = 0;
841n/a self->detached = 0;
842n/a
843n/a if (newline && newline[0] != '\0'
844n/a && !(newline[0] == '\n' && newline[1] == '\0')
845n/a && !(newline[0] == '\r' && newline[1] == '\0')
846n/a && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
847n/a PyErr_Format(PyExc_ValueError,
848n/a "illegal newline value: %s", newline);
849n/a return -1;
850n/a }
851n/a
852n/a Py_CLEAR(self->buffer);
853n/a Py_CLEAR(self->encoding);
854n/a Py_CLEAR(self->encoder);
855n/a Py_CLEAR(self->decoder);
856n/a Py_CLEAR(self->readnl);
857n/a Py_CLEAR(self->decoded_chars);
858n/a Py_CLEAR(self->pending_bytes);
859n/a Py_CLEAR(self->snapshot);
860n/a Py_CLEAR(self->errors);
861n/a Py_CLEAR(self->raw);
862n/a self->decoded_chars_used = 0;
863n/a self->pending_bytes_count = 0;
864n/a self->encodefunc = NULL;
865n/a self->b2cratio = 0.0;
866n/a
867n/a if (encoding == NULL) {
868n/a /* Try os.device_encoding(fileno) */
869n/a PyObject *fileno;
870n/a state = IO_STATE();
871n/a if (state == NULL)
872n/a goto error;
873n/a fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
874n/a /* Ignore only AttributeError and UnsupportedOperation */
875n/a if (fileno == NULL) {
876n/a if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
877n/a PyErr_ExceptionMatches(state->unsupported_operation)) {
878n/a PyErr_Clear();
879n/a }
880n/a else {
881n/a goto error;
882n/a }
883n/a }
884n/a else {
885n/a int fd = _PyLong_AsInt(fileno);
886n/a Py_DECREF(fileno);
887n/a if (fd == -1 && PyErr_Occurred()) {
888n/a goto error;
889n/a }
890n/a
891n/a self->encoding = _Py_device_encoding(fd);
892n/a if (self->encoding == NULL)
893n/a goto error;
894n/a else if (!PyUnicode_Check(self->encoding))
895n/a Py_CLEAR(self->encoding);
896n/a }
897n/a }
898n/a if (encoding == NULL && self->encoding == NULL) {
899n/a PyObject *locale_module = _PyIO_get_locale_module(state);
900n/a if (locale_module == NULL)
901n/a goto catch_ImportError;
902n/a self->encoding = _PyObject_CallMethodIdObjArgs(
903n/a locale_module, &PyId_getpreferredencoding, Py_False, NULL);
904n/a Py_DECREF(locale_module);
905n/a if (self->encoding == NULL) {
906n/a catch_ImportError:
907n/a /*
908n/a Importing locale can raise an ImportError because of
909n/a _functools, and locale.getpreferredencoding can raise an
910n/a ImportError if _locale is not available. These will happen
911n/a during module building.
912n/a */
913n/a if (PyErr_ExceptionMatches(PyExc_ImportError)) {
914n/a PyErr_Clear();
915n/a self->encoding = PyUnicode_FromString("ascii");
916n/a }
917n/a else
918n/a goto error;
919n/a }
920n/a else if (!PyUnicode_Check(self->encoding))
921n/a Py_CLEAR(self->encoding);
922n/a }
923n/a if (self->encoding != NULL) {
924n/a encoding = PyUnicode_AsUTF8(self->encoding);
925n/a if (encoding == NULL)
926n/a goto error;
927n/a }
928n/a else if (encoding != NULL) {
929n/a self->encoding = PyUnicode_FromString(encoding);
930n/a if (self->encoding == NULL)
931n/a goto error;
932n/a }
933n/a else {
934n/a PyErr_SetString(PyExc_IOError,
935n/a "could not determine default encoding");
936n/a }
937n/a
938n/a /* Check we have been asked for a real text encoding */
939n/a codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
940n/a if (codec_info == NULL) {
941n/a Py_CLEAR(self->encoding);
942n/a goto error;
943n/a }
944n/a
945n/a /* XXX: Failures beyond this point have the potential to leak elements
946n/a * of the partially constructed object (like self->encoding)
947n/a */
948n/a
949n/a if (errors == NULL)
950n/a errors = "strict";
951n/a self->errors = PyBytes_FromString(errors);
952n/a if (self->errors == NULL)
953n/a goto error;
954n/a
955n/a self->chunk_size = 8192;
956n/a self->readuniversal = (newline == NULL || newline[0] == '\0');
957n/a self->line_buffering = line_buffering;
958n/a self->write_through = write_through;
959n/a self->readtranslate = (newline == NULL);
960n/a if (newline) {
961n/a self->readnl = PyUnicode_FromString(newline);
962n/a if (self->readnl == NULL)
963n/a goto error;
964n/a }
965n/a self->writetranslate = (newline == NULL || newline[0] != '\0');
966n/a if (!self->readuniversal && self->readnl) {
967n/a self->writenl = PyUnicode_AsUTF8(self->readnl);
968n/a if (self->writenl == NULL)
969n/a goto error;
970n/a if (!strcmp(self->writenl, "\n"))
971n/a self->writenl = NULL;
972n/a }
973n/a#ifdef MS_WINDOWS
974n/a else
975n/a self->writenl = "\r\n";
976n/a#endif
977n/a
978n/a /* Build the decoder object */
979n/a res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
980n/a if (res == NULL)
981n/a goto error;
982n/a r = PyObject_IsTrue(res);
983n/a Py_DECREF(res);
984n/a if (r == -1)
985n/a goto error;
986n/a if (r == 1) {
987n/a self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
988n/a errors);
989n/a if (self->decoder == NULL)
990n/a goto error;
991n/a
992n/a if (self->readuniversal) {
993n/a PyObject *incrementalDecoder = PyObject_CallFunction(
994n/a (PyObject *)&PyIncrementalNewlineDecoder_Type,
995n/a "Oi", self->decoder, (int)self->readtranslate);
996n/a if (incrementalDecoder == NULL)
997n/a goto error;
998n/a Py_XSETREF(self->decoder, incrementalDecoder);
999n/a }
1000n/a }
1001n/a
1002n/a /* Build the encoder object */
1003n/a res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
1004n/a if (res == NULL)
1005n/a goto error;
1006n/a r = PyObject_IsTrue(res);
1007n/a Py_DECREF(res);
1008n/a if (r == -1)
1009n/a goto error;
1010n/a if (r == 1) {
1011n/a self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
1012n/a errors);
1013n/a if (self->encoder == NULL)
1014n/a goto error;
1015n/a /* Get the normalized name of the codec */
1016n/a res = _PyObject_GetAttrId(codec_info, &PyId_name);
1017n/a if (res == NULL) {
1018n/a if (PyErr_ExceptionMatches(PyExc_AttributeError))
1019n/a PyErr_Clear();
1020n/a else
1021n/a goto error;
1022n/a }
1023n/a else if (PyUnicode_Check(res)) {
1024n/a const encodefuncentry *e = encodefuncs;
1025n/a while (e->name != NULL) {
1026n/a if (_PyUnicode_EqualToASCIIString(res, e->name)) {
1027n/a self->encodefunc = e->encodefunc;
1028n/a break;
1029n/a }
1030n/a e++;
1031n/a }
1032n/a }
1033n/a Py_XDECREF(res);
1034n/a }
1035n/a
1036n/a /* Finished sorting out the codec details */
1037n/a Py_CLEAR(codec_info);
1038n/a
1039n/a self->buffer = buffer;
1040n/a Py_INCREF(buffer);
1041n/a
1042n/a if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1043n/a Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1044n/a Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1045n/a raw = _PyObject_GetAttrId(buffer, &PyId_raw);
1046n/a /* Cache the raw FileIO object to speed up 'closed' checks */
1047n/a if (raw == NULL) {
1048n/a if (PyErr_ExceptionMatches(PyExc_AttributeError))
1049n/a PyErr_Clear();
1050n/a else
1051n/a goto error;
1052n/a }
1053n/a else if (Py_TYPE(raw) == &PyFileIO_Type)
1054n/a self->raw = raw;
1055n/a else
1056n/a Py_DECREF(raw);
1057n/a }
1058n/a
1059n/a res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
1060n/a if (res == NULL)
1061n/a goto error;
1062n/a r = PyObject_IsTrue(res);
1063n/a Py_DECREF(res);
1064n/a if (r < 0)
1065n/a goto error;
1066n/a self->seekable = self->telling = r;
1067n/a
1068n/a self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
1069n/a
1070n/a self->encoding_start_of_stream = 0;
1071n/a if (self->seekable && self->encoder) {
1072n/a PyObject *cookieObj;
1073n/a int cmp;
1074n/a
1075n/a self->encoding_start_of_stream = 1;
1076n/a
1077n/a cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1078n/a if (cookieObj == NULL)
1079n/a goto error;
1080n/a
1081n/a cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1082n/a Py_DECREF(cookieObj);
1083n/a if (cmp < 0) {
1084n/a goto error;
1085n/a }
1086n/a
1087n/a if (cmp == 0) {
1088n/a self->encoding_start_of_stream = 0;
1089n/a res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1090n/a _PyIO_zero, NULL);
1091n/a if (res == NULL)
1092n/a goto error;
1093n/a Py_DECREF(res);
1094n/a }
1095n/a }
1096n/a
1097n/a self->ok = 1;
1098n/a return 0;
1099n/a
1100n/a error:
1101n/a Py_XDECREF(codec_info);
1102n/a return -1;
1103n/a}
1104n/a
1105n/astatic int
1106n/atextiowrapper_clear(textio *self)
1107n/a{
1108n/a self->ok = 0;
1109n/a Py_CLEAR(self->buffer);
1110n/a Py_CLEAR(self->encoding);
1111n/a Py_CLEAR(self->encoder);
1112n/a Py_CLEAR(self->decoder);
1113n/a Py_CLEAR(self->readnl);
1114n/a Py_CLEAR(self->decoded_chars);
1115n/a Py_CLEAR(self->pending_bytes);
1116n/a Py_CLEAR(self->snapshot);
1117n/a Py_CLEAR(self->errors);
1118n/a Py_CLEAR(self->raw);
1119n/a
1120n/a Py_CLEAR(self->dict);
1121n/a return 0;
1122n/a}
1123n/a
1124n/astatic void
1125n/atextiowrapper_dealloc(textio *self)
1126n/a{
1127n/a self->finalizing = 1;
1128n/a if (_PyIOBase_finalize((PyObject *) self) < 0)
1129n/a return;
1130n/a self->ok = 0;
1131n/a _PyObject_GC_UNTRACK(self);
1132n/a if (self->weakreflist != NULL)
1133n/a PyObject_ClearWeakRefs((PyObject *)self);
1134n/a textiowrapper_clear(self);
1135n/a Py_TYPE(self)->tp_free((PyObject *)self);
1136n/a}
1137n/a
1138n/astatic int
1139n/atextiowrapper_traverse(textio *self, visitproc visit, void *arg)
1140n/a{
1141n/a Py_VISIT(self->buffer);
1142n/a Py_VISIT(self->encoding);
1143n/a Py_VISIT(self->encoder);
1144n/a Py_VISIT(self->decoder);
1145n/a Py_VISIT(self->readnl);
1146n/a Py_VISIT(self->decoded_chars);
1147n/a Py_VISIT(self->pending_bytes);
1148n/a Py_VISIT(self->snapshot);
1149n/a Py_VISIT(self->errors);
1150n/a Py_VISIT(self->raw);
1151n/a
1152n/a Py_VISIT(self->dict);
1153n/a return 0;
1154n/a}
1155n/a
1156n/astatic PyObject *
1157n/atextiowrapper_closed_get(textio *self, void *context);
1158n/a
1159n/a/* This macro takes some shortcuts to make the common case faster. */
1160n/a#define CHECK_CLOSED(self) \
1161n/a do { \
1162n/a int r; \
1163n/a PyObject *_res; \
1164n/a if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1165n/a if (self->raw != NULL) \
1166n/a r = _PyFileIO_closed(self->raw); \
1167n/a else { \
1168n/a _res = textiowrapper_closed_get(self, NULL); \
1169n/a if (_res == NULL) \
1170n/a return NULL; \
1171n/a r = PyObject_IsTrue(_res); \
1172n/a Py_DECREF(_res); \
1173n/a if (r < 0) \
1174n/a return NULL; \
1175n/a } \
1176n/a if (r > 0) { \
1177n/a PyErr_SetString(PyExc_ValueError, \
1178n/a "I/O operation on closed file."); \
1179n/a return NULL; \
1180n/a } \
1181n/a } \
1182n/a else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1183n/a return NULL; \
1184n/a } while (0)
1185n/a
1186n/a#define CHECK_INITIALIZED(self) \
1187n/a if (self->ok <= 0) { \
1188n/a PyErr_SetString(PyExc_ValueError, \
1189n/a "I/O operation on uninitialized object"); \
1190n/a return NULL; \
1191n/a }
1192n/a
1193n/a#define CHECK_ATTACHED(self) \
1194n/a CHECK_INITIALIZED(self); \
1195n/a if (self->detached) { \
1196n/a PyErr_SetString(PyExc_ValueError, \
1197n/a "underlying buffer has been detached"); \
1198n/a return NULL; \
1199n/a }
1200n/a
1201n/a#define CHECK_ATTACHED_INT(self) \
1202n/a if (self->ok <= 0) { \
1203n/a PyErr_SetString(PyExc_ValueError, \
1204n/a "I/O operation on uninitialized object"); \
1205n/a return -1; \
1206n/a } else if (self->detached) { \
1207n/a PyErr_SetString(PyExc_ValueError, \
1208n/a "underlying buffer has been detached"); \
1209n/a return -1; \
1210n/a }
1211n/a
1212n/a
1213n/a/*[clinic input]
1214n/a_io.TextIOWrapper.detach
1215n/a[clinic start generated code]*/
1216n/a
1217n/astatic PyObject *
1218n/a_io_TextIOWrapper_detach_impl(textio *self)
1219n/a/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
1220n/a{
1221n/a PyObject *buffer, *res;
1222n/a CHECK_ATTACHED(self);
1223n/a res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1224n/a if (res == NULL)
1225n/a return NULL;
1226n/a Py_DECREF(res);
1227n/a buffer = self->buffer;
1228n/a self->buffer = NULL;
1229n/a self->detached = 1;
1230n/a return buffer;
1231n/a}
1232n/a
1233n/a/* Flush the internal write buffer. This doesn't explicitly flush the
1234n/a underlying buffered object, though. */
1235n/astatic int
1236n/a_textiowrapper_writeflush(textio *self)
1237n/a{
1238n/a PyObject *pending, *b, *ret;
1239n/a
1240n/a if (self->pending_bytes == NULL)
1241n/a return 0;
1242n/a
1243n/a pending = self->pending_bytes;
1244n/a Py_INCREF(pending);
1245n/a self->pending_bytes_count = 0;
1246n/a Py_CLEAR(self->pending_bytes);
1247n/a
1248n/a b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1249n/a Py_DECREF(pending);
1250n/a if (b == NULL)
1251n/a return -1;
1252n/a ret = NULL;
1253n/a do {
1254n/a ret = PyObject_CallMethodObjArgs(self->buffer,
1255n/a _PyIO_str_write, b, NULL);
1256n/a } while (ret == NULL && _PyIO_trap_eintr());
1257n/a Py_DECREF(b);
1258n/a if (ret == NULL)
1259n/a return -1;
1260n/a Py_DECREF(ret);
1261n/a return 0;
1262n/a}
1263n/a
1264n/a/*[clinic input]
1265n/a_io.TextIOWrapper.write
1266n/a text: unicode
1267n/a /
1268n/a[clinic start generated code]*/
1269n/a
1270n/astatic PyObject *
1271n/a_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1272n/a/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
1273n/a{
1274n/a PyObject *ret;
1275n/a PyObject *b;
1276n/a Py_ssize_t textlen;
1277n/a int haslf = 0;
1278n/a int needflush = 0, text_needflush = 0;
1279n/a
1280n/a if (PyUnicode_READY(text) == -1)
1281n/a return NULL;
1282n/a
1283n/a CHECK_ATTACHED(self);
1284n/a CHECK_CLOSED(self);
1285n/a
1286n/a if (self->encoder == NULL)
1287n/a return _unsupported("not writable");
1288n/a
1289n/a Py_INCREF(text);
1290n/a
1291n/a textlen = PyUnicode_GET_LENGTH(text);
1292n/a
1293n/a if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1294n/a if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1295n/a haslf = 1;
1296n/a
1297n/a if (haslf && self->writetranslate && self->writenl != NULL) {
1298n/a PyObject *newtext = _PyObject_CallMethodId(
1299n/a text, &PyId_replace, "ss", "\n", self->writenl);
1300n/a Py_DECREF(text);
1301n/a if (newtext == NULL)
1302n/a return NULL;
1303n/a text = newtext;
1304n/a }
1305n/a
1306n/a if (self->write_through)
1307n/a text_needflush = 1;
1308n/a if (self->line_buffering &&
1309n/a (haslf ||
1310n/a PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1311n/a needflush = 1;
1312n/a
1313n/a /* XXX What if we were just reading? */
1314n/a if (self->encodefunc != NULL) {
1315n/a b = (*self->encodefunc)((PyObject *) self, text);
1316n/a self->encoding_start_of_stream = 0;
1317n/a }
1318n/a else
1319n/a b = PyObject_CallMethodObjArgs(self->encoder,
1320n/a _PyIO_str_encode, text, NULL);
1321n/a Py_DECREF(text);
1322n/a if (b == NULL)
1323n/a return NULL;
1324n/a
1325n/a if (self->pending_bytes == NULL) {
1326n/a self->pending_bytes = PyList_New(0);
1327n/a if (self->pending_bytes == NULL) {
1328n/a Py_DECREF(b);
1329n/a return NULL;
1330n/a }
1331n/a self->pending_bytes_count = 0;
1332n/a }
1333n/a if (PyList_Append(self->pending_bytes, b) < 0) {
1334n/a Py_DECREF(b);
1335n/a return NULL;
1336n/a }
1337n/a self->pending_bytes_count += PyBytes_GET_SIZE(b);
1338n/a Py_DECREF(b);
1339n/a if (self->pending_bytes_count > self->chunk_size || needflush ||
1340n/a text_needflush) {
1341n/a if (_textiowrapper_writeflush(self) < 0)
1342n/a return NULL;
1343n/a }
1344n/a
1345n/a if (needflush) {
1346n/a ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1347n/a if (ret == NULL)
1348n/a return NULL;
1349n/a Py_DECREF(ret);
1350n/a }
1351n/a
1352n/a Py_CLEAR(self->snapshot);
1353n/a
1354n/a if (self->decoder) {
1355n/a ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
1356n/a if (ret == NULL)
1357n/a return NULL;
1358n/a Py_DECREF(ret);
1359n/a }
1360n/a
1361n/a return PyLong_FromSsize_t(textlen);
1362n/a}
1363n/a
1364n/a/* Steal a reference to chars and store it in the decoded_char buffer;
1365n/a */
1366n/astatic void
1367n/atextiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1368n/a{
1369n/a Py_XSETREF(self->decoded_chars, chars);
1370n/a self->decoded_chars_used = 0;
1371n/a}
1372n/a
1373n/astatic PyObject *
1374n/atextiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1375n/a{
1376n/a PyObject *chars;
1377n/a Py_ssize_t avail;
1378n/a
1379n/a if (self->decoded_chars == NULL)
1380n/a return PyUnicode_FromStringAndSize(NULL, 0);
1381n/a
1382n/a /* decoded_chars is guaranteed to be "ready". */
1383n/a avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1384n/a - self->decoded_chars_used);
1385n/a
1386n/a assert(avail >= 0);
1387n/a
1388n/a if (n < 0 || n > avail)
1389n/a n = avail;
1390n/a
1391n/a if (self->decoded_chars_used > 0 || n < avail) {
1392n/a chars = PyUnicode_Substring(self->decoded_chars,
1393n/a self->decoded_chars_used,
1394n/a self->decoded_chars_used + n);
1395n/a if (chars == NULL)
1396n/a return NULL;
1397n/a }
1398n/a else {
1399n/a chars = self->decoded_chars;
1400n/a Py_INCREF(chars);
1401n/a }
1402n/a
1403n/a self->decoded_chars_used += n;
1404n/a return chars;
1405n/a}
1406n/a
1407n/a/* Read and decode the next chunk of data from the BufferedReader.
1408n/a */
1409n/astatic int
1410n/atextiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1411n/a{
1412n/a PyObject *dec_buffer = NULL;
1413n/a PyObject *dec_flags = NULL;
1414n/a PyObject *input_chunk = NULL;
1415n/a Py_buffer input_chunk_buf;
1416n/a PyObject *decoded_chars, *chunk_size;
1417n/a Py_ssize_t nbytes, nchars;
1418n/a int eof;
1419n/a
1420n/a /* The return value is True unless EOF was reached. The decoded string is
1421n/a * placed in self._decoded_chars (replacing its previous value). The
1422n/a * entire input chunk is sent to the decoder, though some of it may remain
1423n/a * buffered in the decoder, yet to be converted.
1424n/a */
1425n/a
1426n/a if (self->decoder == NULL) {
1427n/a _unsupported("not readable");
1428n/a return -1;
1429n/a }
1430n/a
1431n/a if (self->telling) {
1432n/a /* To prepare for tell(), we need to snapshot a point in the file
1433n/a * where the decoder's input buffer is empty.
1434n/a */
1435n/a
1436n/a PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1437n/a _PyIO_str_getstate, NULL);
1438n/a if (state == NULL)
1439n/a return -1;
1440n/a /* Given this, we know there was a valid snapshot point
1441n/a * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1442n/a */
1443n/a if (PyArg_ParseTuple(state, "OO", &dec_buffer, &dec_flags) < 0) {
1444n/a Py_DECREF(state);
1445n/a return -1;
1446n/a }
1447n/a
1448n/a if (!PyBytes_Check(dec_buffer)) {
1449n/a PyErr_Format(PyExc_TypeError,
1450n/a "decoder getstate() should have returned a bytes "
1451n/a "object, not '%.200s'",
1452n/a Py_TYPE(dec_buffer)->tp_name);
1453n/a Py_DECREF(state);
1454n/a return -1;
1455n/a }
1456n/a Py_INCREF(dec_buffer);
1457n/a Py_INCREF(dec_flags);
1458n/a Py_DECREF(state);
1459n/a }
1460n/a
1461n/a /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1462n/a if (size_hint > 0) {
1463n/a size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1464n/a }
1465n/a chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1466n/a if (chunk_size == NULL)
1467n/a goto fail;
1468n/a
1469n/a input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1470n/a (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1471n/a chunk_size, NULL);
1472n/a Py_DECREF(chunk_size);
1473n/a if (input_chunk == NULL)
1474n/a goto fail;
1475n/a
1476n/a if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1477n/a PyErr_Format(PyExc_TypeError,
1478n/a "underlying %s() should have returned a bytes-like object, "
1479n/a "not '%.200s'", (self->has_read1 ? "read1": "read"),
1480n/a Py_TYPE(input_chunk)->tp_name);
1481n/a goto fail;
1482n/a }
1483n/a
1484n/a nbytes = input_chunk_buf.len;
1485n/a eof = (nbytes == 0);
1486n/a if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1487n/a decoded_chars = _PyIncrementalNewlineDecoder_decode(
1488n/a self->decoder, input_chunk, eof);
1489n/a }
1490n/a else {
1491n/a decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1492n/a _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1493n/a }
1494n/a PyBuffer_Release(&input_chunk_buf);
1495n/a
1496n/a if (check_decoded(decoded_chars) < 0)
1497n/a goto fail;
1498n/a textiowrapper_set_decoded_chars(self, decoded_chars);
1499n/a nchars = PyUnicode_GET_LENGTH(decoded_chars);
1500n/a if (nchars > 0)
1501n/a self->b2cratio = (double) nbytes / nchars;
1502n/a else
1503n/a self->b2cratio = 0.0;
1504n/a if (nchars > 0)
1505n/a eof = 0;
1506n/a
1507n/a if (self->telling) {
1508n/a /* At the snapshot point, len(dec_buffer) bytes before the read, the
1509n/a * next input to be decoded is dec_buffer + input_chunk.
1510n/a */
1511n/a PyObject *next_input = dec_buffer;
1512n/a PyBytes_Concat(&next_input, input_chunk);
1513n/a if (next_input == NULL) {
1514n/a dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1515n/a goto fail;
1516n/a }
1517n/a Py_XSETREF(self->snapshot, Py_BuildValue("NN", dec_flags, next_input));
1518n/a }
1519n/a Py_DECREF(input_chunk);
1520n/a
1521n/a return (eof == 0);
1522n/a
1523n/a fail:
1524n/a Py_XDECREF(dec_buffer);
1525n/a Py_XDECREF(dec_flags);
1526n/a Py_XDECREF(input_chunk);
1527n/a return -1;
1528n/a}
1529n/a
1530n/a/*[clinic input]
1531n/a_io.TextIOWrapper.read
1532n/a size as n: io_ssize_t = -1
1533n/a /
1534n/a[clinic start generated code]*/
1535n/a
1536n/astatic PyObject *
1537n/a_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1538n/a/*[clinic end generated code: output=7e651ce6cc6a25a6 input=8c09398424085cca]*/
1539n/a{
1540n/a PyObject *result = NULL, *chunks = NULL;
1541n/a
1542n/a CHECK_ATTACHED(self);
1543n/a CHECK_CLOSED(self);
1544n/a
1545n/a if (self->decoder == NULL)
1546n/a return _unsupported("not readable");
1547n/a
1548n/a if (_textiowrapper_writeflush(self) < 0)
1549n/a return NULL;
1550n/a
1551n/a if (n < 0) {
1552n/a /* Read everything */
1553n/a PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
1554n/a PyObject *decoded;
1555n/a if (bytes == NULL)
1556n/a goto fail;
1557n/a
1558n/a if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1559n/a decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1560n/a bytes, 1);
1561n/a else
1562n/a decoded = PyObject_CallMethodObjArgs(
1563n/a self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
1564n/a Py_DECREF(bytes);
1565n/a if (check_decoded(decoded) < 0)
1566n/a goto fail;
1567n/a
1568n/a result = textiowrapper_get_decoded_chars(self, -1);
1569n/a
1570n/a if (result == NULL) {
1571n/a Py_DECREF(decoded);
1572n/a return NULL;
1573n/a }
1574n/a
1575n/a PyUnicode_AppendAndDel(&result, decoded);
1576n/a if (result == NULL)
1577n/a goto fail;
1578n/a
1579n/a Py_CLEAR(self->snapshot);
1580n/a return result;
1581n/a }
1582n/a else {
1583n/a int res = 1;
1584n/a Py_ssize_t remaining = n;
1585n/a
1586n/a result = textiowrapper_get_decoded_chars(self, n);
1587n/a if (result == NULL)
1588n/a goto fail;
1589n/a if (PyUnicode_READY(result) == -1)
1590n/a goto fail;
1591n/a remaining -= PyUnicode_GET_LENGTH(result);
1592n/a
1593n/a /* Keep reading chunks until we have n characters to return */
1594n/a while (remaining > 0) {
1595n/a res = textiowrapper_read_chunk(self, remaining);
1596n/a if (res < 0) {
1597n/a /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1598n/a when EINTR occurs so we needn't do it ourselves. */
1599n/a if (_PyIO_trap_eintr()) {
1600n/a continue;
1601n/a }
1602n/a goto fail;
1603n/a }
1604n/a if (res == 0) /* EOF */
1605n/a break;
1606n/a if (chunks == NULL) {
1607n/a chunks = PyList_New(0);
1608n/a if (chunks == NULL)
1609n/a goto fail;
1610n/a }
1611n/a if (PyUnicode_GET_LENGTH(result) > 0 &&
1612n/a PyList_Append(chunks, result) < 0)
1613n/a goto fail;
1614n/a Py_DECREF(result);
1615n/a result = textiowrapper_get_decoded_chars(self, remaining);
1616n/a if (result == NULL)
1617n/a goto fail;
1618n/a remaining -= PyUnicode_GET_LENGTH(result);
1619n/a }
1620n/a if (chunks != NULL) {
1621n/a if (result != NULL && PyList_Append(chunks, result) < 0)
1622n/a goto fail;
1623n/a Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
1624n/a if (result == NULL)
1625n/a goto fail;
1626n/a Py_CLEAR(chunks);
1627n/a }
1628n/a return result;
1629n/a }
1630n/a fail:
1631n/a Py_XDECREF(result);
1632n/a Py_XDECREF(chunks);
1633n/a return NULL;
1634n/a}
1635n/a
1636n/a
1637n/a/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
1638n/a that is to the NUL character. Otherwise the function will produce
1639n/a incorrect results. */
1640n/astatic const char *
1641n/afind_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
1642n/a{
1643n/a if (kind == PyUnicode_1BYTE_KIND) {
1644n/a assert(ch < 256);
1645n/a return (char *) memchr((void *) s, (char) ch, end - s);
1646n/a }
1647n/a for (;;) {
1648n/a while (PyUnicode_READ(kind, s, 0) > ch)
1649n/a s += kind;
1650n/a if (PyUnicode_READ(kind, s, 0) == ch)
1651n/a return s;
1652n/a if (s == end)
1653n/a return NULL;
1654n/a s += kind;
1655n/a }
1656n/a}
1657n/a
1658n/aPy_ssize_t
1659n/a_PyIO_find_line_ending(
1660n/a int translated, int universal, PyObject *readnl,
1661n/a int kind, const char *start, const char *end, Py_ssize_t *consumed)
1662n/a{
1663n/a Py_ssize_t len = ((char*)end - (char*)start)/kind;
1664n/a
1665n/a if (translated) {
1666n/a /* Newlines are already translated, only search for \n */
1667n/a const char *pos = find_control_char(kind, start, end, '\n');
1668n/a if (pos != NULL)
1669n/a return (pos - start)/kind + 1;
1670n/a else {
1671n/a *consumed = len;
1672n/a return -1;
1673n/a }
1674n/a }
1675n/a else if (universal) {
1676n/a /* Universal newline search. Find any of \r, \r\n, \n
1677n/a * The decoder ensures that \r\n are not split in two pieces
1678n/a */
1679n/a const char *s = start;
1680n/a for (;;) {
1681n/a Py_UCS4 ch;
1682n/a /* Fast path for non-control chars. The loop always ends
1683n/a since the Unicode string is NUL-terminated. */
1684n/a while (PyUnicode_READ(kind, s, 0) > '\r')
1685n/a s += kind;
1686n/a if (s >= end) {
1687n/a *consumed = len;
1688n/a return -1;
1689n/a }
1690n/a ch = PyUnicode_READ(kind, s, 0);
1691n/a s += kind;
1692n/a if (ch == '\n')
1693n/a return (s - start)/kind;
1694n/a if (ch == '\r') {
1695n/a if (PyUnicode_READ(kind, s, 0) == '\n')
1696n/a return (s - start)/kind + 1;
1697n/a else
1698n/a return (s - start)/kind;
1699n/a }
1700n/a }
1701n/a }
1702n/a else {
1703n/a /* Non-universal mode. */
1704n/a Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1705n/a Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
1706n/a /* Assume that readnl is an ASCII character. */
1707n/a assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
1708n/a if (readnl_len == 1) {
1709n/a const char *pos = find_control_char(kind, start, end, nl[0]);
1710n/a if (pos != NULL)
1711n/a return (pos - start)/kind + 1;
1712n/a *consumed = len;
1713n/a return -1;
1714n/a }
1715n/a else {
1716n/a const char *s = start;
1717n/a const char *e = end - (readnl_len - 1)*kind;
1718n/a const char *pos;
1719n/a if (e < s)
1720n/a e = s;
1721n/a while (s < e) {
1722n/a Py_ssize_t i;
1723n/a const char *pos = find_control_char(kind, s, end, nl[0]);
1724n/a if (pos == NULL || pos >= e)
1725n/a break;
1726n/a for (i = 1; i < readnl_len; i++) {
1727n/a if (PyUnicode_READ(kind, pos, i) != nl[i])
1728n/a break;
1729n/a }
1730n/a if (i == readnl_len)
1731n/a return (pos - start)/kind + readnl_len;
1732n/a s = pos + kind;
1733n/a }
1734n/a pos = find_control_char(kind, e, end, nl[0]);
1735n/a if (pos == NULL)
1736n/a *consumed = len;
1737n/a else
1738n/a *consumed = (pos - start)/kind;
1739n/a return -1;
1740n/a }
1741n/a }
1742n/a}
1743n/a
1744n/astatic PyObject *
1745n/a_textiowrapper_readline(textio *self, Py_ssize_t limit)
1746n/a{
1747n/a PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1748n/a Py_ssize_t start, endpos, chunked, offset_to_buffer;
1749n/a int res;
1750n/a
1751n/a CHECK_CLOSED(self);
1752n/a
1753n/a if (_textiowrapper_writeflush(self) < 0)
1754n/a return NULL;
1755n/a
1756n/a chunked = 0;
1757n/a
1758n/a while (1) {
1759n/a char *ptr;
1760n/a Py_ssize_t line_len;
1761n/a int kind;
1762n/a Py_ssize_t consumed = 0;
1763n/a
1764n/a /* First, get some data if necessary */
1765n/a res = 1;
1766n/a while (!self->decoded_chars ||
1767n/a !PyUnicode_GET_LENGTH(self->decoded_chars)) {
1768n/a res = textiowrapper_read_chunk(self, 0);
1769n/a if (res < 0) {
1770n/a /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1771n/a when EINTR occurs so we needn't do it ourselves. */
1772n/a if (_PyIO_trap_eintr()) {
1773n/a continue;
1774n/a }
1775n/a goto error;
1776n/a }
1777n/a if (res == 0)
1778n/a break;
1779n/a }
1780n/a if (res == 0) {
1781n/a /* end of file */
1782n/a textiowrapper_set_decoded_chars(self, NULL);
1783n/a Py_CLEAR(self->snapshot);
1784n/a start = endpos = offset_to_buffer = 0;
1785n/a break;
1786n/a }
1787n/a
1788n/a if (remaining == NULL) {
1789n/a line = self->decoded_chars;
1790n/a start = self->decoded_chars_used;
1791n/a offset_to_buffer = 0;
1792n/a Py_INCREF(line);
1793n/a }
1794n/a else {
1795n/a assert(self->decoded_chars_used == 0);
1796n/a line = PyUnicode_Concat(remaining, self->decoded_chars);
1797n/a start = 0;
1798n/a offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
1799n/a Py_CLEAR(remaining);
1800n/a if (line == NULL)
1801n/a goto error;
1802n/a if (PyUnicode_READY(line) == -1)
1803n/a goto error;
1804n/a }
1805n/a
1806n/a ptr = PyUnicode_DATA(line);
1807n/a line_len = PyUnicode_GET_LENGTH(line);
1808n/a kind = PyUnicode_KIND(line);
1809n/a
1810n/a endpos = _PyIO_find_line_ending(
1811n/a self->readtranslate, self->readuniversal, self->readnl,
1812n/a kind,
1813n/a ptr + kind * start,
1814n/a ptr + kind * line_len,
1815n/a &consumed);
1816n/a if (endpos >= 0) {
1817n/a endpos += start;
1818n/a if (limit >= 0 && (endpos - start) + chunked >= limit)
1819n/a endpos = start + limit - chunked;
1820n/a break;
1821n/a }
1822n/a
1823n/a /* We can put aside up to `endpos` */
1824n/a endpos = consumed + start;
1825n/a if (limit >= 0 && (endpos - start) + chunked >= limit) {
1826n/a /* Didn't find line ending, but reached length limit */
1827n/a endpos = start + limit - chunked;
1828n/a break;
1829n/a }
1830n/a
1831n/a if (endpos > start) {
1832n/a /* No line ending seen yet - put aside current data */
1833n/a PyObject *s;
1834n/a if (chunks == NULL) {
1835n/a chunks = PyList_New(0);
1836n/a if (chunks == NULL)
1837n/a goto error;
1838n/a }
1839n/a s = PyUnicode_Substring(line, start, endpos);
1840n/a if (s == NULL)
1841n/a goto error;
1842n/a if (PyList_Append(chunks, s) < 0) {
1843n/a Py_DECREF(s);
1844n/a goto error;
1845n/a }
1846n/a chunked += PyUnicode_GET_LENGTH(s);
1847n/a Py_DECREF(s);
1848n/a }
1849n/a /* There may be some remaining bytes we'll have to prepend to the
1850n/a next chunk of data */
1851n/a if (endpos < line_len) {
1852n/a remaining = PyUnicode_Substring(line, endpos, line_len);
1853n/a if (remaining == NULL)
1854n/a goto error;
1855n/a }
1856n/a Py_CLEAR(line);
1857n/a /* We have consumed the buffer */
1858n/a textiowrapper_set_decoded_chars(self, NULL);
1859n/a }
1860n/a
1861n/a if (line != NULL) {
1862n/a /* Our line ends in the current buffer */
1863n/a self->decoded_chars_used = endpos - offset_to_buffer;
1864n/a if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1865n/a PyObject *s = PyUnicode_Substring(line, start, endpos);
1866n/a Py_CLEAR(line);
1867n/a if (s == NULL)
1868n/a goto error;
1869n/a line = s;
1870n/a }
1871n/a }
1872n/a if (remaining != NULL) {
1873n/a if (chunks == NULL) {
1874n/a chunks = PyList_New(0);
1875n/a if (chunks == NULL)
1876n/a goto error;
1877n/a }
1878n/a if (PyList_Append(chunks, remaining) < 0)
1879n/a goto error;
1880n/a Py_CLEAR(remaining);
1881n/a }
1882n/a if (chunks != NULL) {
1883n/a if (line != NULL) {
1884n/a if (PyList_Append(chunks, line) < 0)
1885n/a goto error;
1886n/a Py_DECREF(line);
1887n/a }
1888n/a line = PyUnicode_Join(_PyIO_empty_str, chunks);
1889n/a if (line == NULL)
1890n/a goto error;
1891n/a Py_CLEAR(chunks);
1892n/a }
1893n/a if (line == NULL) {
1894n/a Py_INCREF(_PyIO_empty_str);
1895n/a line = _PyIO_empty_str;
1896n/a }
1897n/a
1898n/a return line;
1899n/a
1900n/a error:
1901n/a Py_XDECREF(chunks);
1902n/a Py_XDECREF(remaining);
1903n/a Py_XDECREF(line);
1904n/a return NULL;
1905n/a}
1906n/a
1907n/a/*[clinic input]
1908n/a_io.TextIOWrapper.readline
1909n/a size: Py_ssize_t = -1
1910n/a /
1911n/a[clinic start generated code]*/
1912n/a
1913n/astatic PyObject *
1914n/a_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
1915n/a/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
1916n/a{
1917n/a CHECK_ATTACHED(self);
1918n/a return _textiowrapper_readline(self, size);
1919n/a}
1920n/a
1921n/a/* Seek and Tell */
1922n/a
1923n/atypedef struct {
1924n/a Py_off_t start_pos;
1925n/a int dec_flags;
1926n/a int bytes_to_feed;
1927n/a int chars_to_skip;
1928n/a char need_eof;
1929n/a} cookie_type;
1930n/a
1931n/a/*
1932n/a To speed up cookie packing/unpacking, we store the fields in a temporary
1933n/a string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1934n/a The following macros define at which offsets in the intermediary byte
1935n/a string the various CookieStruct fields will be stored.
1936n/a */
1937n/a
1938n/a#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1939n/a
1940n/a#if PY_BIG_ENDIAN
1941n/a/* We want the least significant byte of start_pos to also be the least
1942n/a significant byte of the cookie, which means that in big-endian mode we
1943n/a must copy the fields in reverse order. */
1944n/a
1945n/a# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1946n/a# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1947n/a# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1948n/a# define OFF_CHARS_TO_SKIP (sizeof(char))
1949n/a# define OFF_NEED_EOF 0
1950n/a
1951n/a#else
1952n/a/* Little-endian mode: the least significant byte of start_pos will
1953n/a naturally end up the least significant byte of the cookie. */
1954n/a
1955n/a# define OFF_START_POS 0
1956n/a# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1957n/a# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1958n/a# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1959n/a# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1960n/a
1961n/a#endif
1962n/a
1963n/astatic int
1964n/atextiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1965n/a{
1966n/a unsigned char buffer[COOKIE_BUF_LEN];
1967n/a PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1968n/a if (cookieLong == NULL)
1969n/a return -1;
1970n/a
1971n/a if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1972n/a PY_LITTLE_ENDIAN, 0) < 0) {
1973n/a Py_DECREF(cookieLong);
1974n/a return -1;
1975n/a }
1976n/a Py_DECREF(cookieLong);
1977n/a
1978n/a memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1979n/a memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1980n/a memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1981n/a memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1982n/a memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1983n/a
1984n/a return 0;
1985n/a}
1986n/a
1987n/astatic PyObject *
1988n/atextiowrapper_build_cookie(cookie_type *cookie)
1989n/a{
1990n/a unsigned char buffer[COOKIE_BUF_LEN];
1991n/a
1992n/a memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1993n/a memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1994n/a memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1995n/a memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1996n/a memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1997n/a
1998n/a return _PyLong_FromByteArray(buffer, sizeof(buffer),
1999n/a PY_LITTLE_ENDIAN, 0);
2000n/a}
2001n/a
2002n/astatic int
2003n/a_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2004n/a{
2005n/a PyObject *res;
2006n/a /* When seeking to the start of the stream, we call decoder.reset()
2007n/a rather than decoder.getstate().
2008n/a This is for a few decoders such as utf-16 for which the state value
2009n/a at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2010n/a utf-16, that we are expecting a BOM).
2011n/a */
2012n/a if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2013n/a res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2014n/a else
2015n/a res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2016n/a "((yi))", "", cookie->dec_flags);
2017n/a if (res == NULL)
2018n/a return -1;
2019n/a Py_DECREF(res);
2020n/a return 0;
2021n/a}
2022n/a
2023n/astatic int
2024n/a_textiowrapper_encoder_reset(textio *self, int start_of_stream)
2025n/a{
2026n/a PyObject *res;
2027n/a if (start_of_stream) {
2028n/a res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2029n/a self->encoding_start_of_stream = 1;
2030n/a }
2031n/a else {
2032n/a res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2033n/a _PyIO_zero, NULL);
2034n/a self->encoding_start_of_stream = 0;
2035n/a }
2036n/a if (res == NULL)
2037n/a return -1;
2038n/a Py_DECREF(res);
2039n/a return 0;
2040n/a}
2041n/a
2042n/astatic int
2043n/a_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2044n/a{
2045n/a /* Same as _textiowrapper_decoder_setstate() above. */
2046n/a return _textiowrapper_encoder_reset(
2047n/a self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2048n/a}
2049n/a
2050n/a/*[clinic input]
2051n/a_io.TextIOWrapper.seek
2052n/a cookie as cookieObj: object
2053n/a whence: int = 0
2054n/a /
2055n/a[clinic start generated code]*/
2056n/a
2057n/astatic PyObject *
2058n/a_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2059n/a/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
2060n/a{
2061n/a PyObject *posobj;
2062n/a cookie_type cookie;
2063n/a PyObject *res;
2064n/a int cmp;
2065n/a
2066n/a CHECK_ATTACHED(self);
2067n/a CHECK_CLOSED(self);
2068n/a
2069n/a Py_INCREF(cookieObj);
2070n/a
2071n/a if (!self->seekable) {
2072n/a _unsupported("underlying stream is not seekable");
2073n/a goto fail;
2074n/a }
2075n/a
2076n/a if (whence == 1) {
2077n/a /* seek relative to current position */
2078n/a cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2079n/a if (cmp < 0)
2080n/a goto fail;
2081n/a
2082n/a if (cmp == 0) {
2083n/a _unsupported("can't do nonzero cur-relative seeks");
2084n/a goto fail;
2085n/a }
2086n/a
2087n/a /* Seeking to the current position should attempt to
2088n/a * sync the underlying buffer with the current position.
2089n/a */
2090n/a Py_DECREF(cookieObj);
2091n/a cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
2092n/a if (cookieObj == NULL)
2093n/a goto fail;
2094n/a }
2095n/a else if (whence == 2) {
2096n/a /* seek relative to end of file */
2097n/a cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2098n/a if (cmp < 0)
2099n/a goto fail;
2100n/a
2101n/a if (cmp == 0) {
2102n/a _unsupported("can't do nonzero end-relative seeks");
2103n/a goto fail;
2104n/a }
2105n/a
2106n/a res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2107n/a if (res == NULL)
2108n/a goto fail;
2109n/a Py_DECREF(res);
2110n/a
2111n/a textiowrapper_set_decoded_chars(self, NULL);
2112n/a Py_CLEAR(self->snapshot);
2113n/a if (self->decoder) {
2114n/a res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
2115n/a if (res == NULL)
2116n/a goto fail;
2117n/a Py_DECREF(res);
2118n/a }
2119n/a
2120n/a res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
2121n/a Py_CLEAR(cookieObj);
2122n/a if (res == NULL)
2123n/a goto fail;
2124n/a if (self->encoder) {
2125n/a /* If seek() == 0, we are at the start of stream, otherwise not */
2126n/a cmp = PyObject_RichCompareBool(res, _PyIO_zero, Py_EQ);
2127n/a if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2128n/a Py_DECREF(res);
2129n/a goto fail;
2130n/a }
2131n/a }
2132n/a return res;
2133n/a }
2134n/a else if (whence != 0) {
2135n/a PyErr_Format(PyExc_ValueError,
2136n/a "invalid whence (%d, should be 0, 1 or 2)", whence);
2137n/a goto fail;
2138n/a }
2139n/a
2140n/a cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2141n/a if (cmp < 0)
2142n/a goto fail;
2143n/a
2144n/a if (cmp == 1) {
2145n/a PyErr_Format(PyExc_ValueError,
2146n/a "negative seek position %R", cookieObj);
2147n/a goto fail;
2148n/a }
2149n/a
2150n/a res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2151n/a if (res == NULL)
2152n/a goto fail;
2153n/a Py_DECREF(res);
2154n/a
2155n/a /* The strategy of seek() is to go back to the safe start point
2156n/a * and replay the effect of read(chars_to_skip) from there.
2157n/a */
2158n/a if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2159n/a goto fail;
2160n/a
2161n/a /* Seek back to the safe start point. */
2162n/a posobj = PyLong_FromOff_t(cookie.start_pos);
2163n/a if (posobj == NULL)
2164n/a goto fail;
2165n/a res = PyObject_CallMethodObjArgs(self->buffer,
2166n/a _PyIO_str_seek, posobj, NULL);
2167n/a Py_DECREF(posobj);
2168n/a if (res == NULL)
2169n/a goto fail;
2170n/a Py_DECREF(res);
2171n/a
2172n/a textiowrapper_set_decoded_chars(self, NULL);
2173n/a Py_CLEAR(self->snapshot);
2174n/a
2175n/a /* Restore the decoder to its state from the safe start point. */
2176n/a if (self->decoder) {
2177n/a if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2178n/a goto fail;
2179n/a }
2180n/a
2181n/a if (cookie.chars_to_skip) {
2182n/a /* Just like _read_chunk, feed the decoder and save a snapshot. */
2183n/a PyObject *input_chunk = _PyObject_CallMethodId(
2184n/a self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
2185n/a PyObject *decoded;
2186n/a
2187n/a if (input_chunk == NULL)
2188n/a goto fail;
2189n/a
2190n/a if (!PyBytes_Check(input_chunk)) {
2191n/a PyErr_Format(PyExc_TypeError,
2192n/a "underlying read() should have returned a bytes "
2193n/a "object, not '%.200s'",
2194n/a Py_TYPE(input_chunk)->tp_name);
2195n/a Py_DECREF(input_chunk);
2196n/a goto fail;
2197n/a }
2198n/a
2199n/a self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2200n/a if (self->snapshot == NULL) {
2201n/a Py_DECREF(input_chunk);
2202n/a goto fail;
2203n/a }
2204n/a
2205n/a decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2206n/a "Oi", input_chunk, (int)cookie.need_eof);
2207n/a
2208n/a if (check_decoded(decoded) < 0)
2209n/a goto fail;
2210n/a
2211n/a textiowrapper_set_decoded_chars(self, decoded);
2212n/a
2213n/a /* Skip chars_to_skip of the decoded characters. */
2214n/a if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2215n/a PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2216n/a goto fail;
2217n/a }
2218n/a self->decoded_chars_used = cookie.chars_to_skip;
2219n/a }
2220n/a else {
2221n/a self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2222n/a if (self->snapshot == NULL)
2223n/a goto fail;
2224n/a }
2225n/a
2226n/a /* Finally, reset the encoder (merely useful for proper BOM handling) */
2227n/a if (self->encoder) {
2228n/a if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2229n/a goto fail;
2230n/a }
2231n/a return cookieObj;
2232n/a fail:
2233n/a Py_XDECREF(cookieObj);
2234n/a return NULL;
2235n/a
2236n/a}
2237n/a
2238n/a/*[clinic input]
2239n/a_io.TextIOWrapper.tell
2240n/a[clinic start generated code]*/
2241n/a
2242n/astatic PyObject *
2243n/a_io_TextIOWrapper_tell_impl(textio *self)
2244n/a/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
2245n/a{
2246n/a PyObject *res;
2247n/a PyObject *posobj = NULL;
2248n/a cookie_type cookie = {0,0,0,0,0};
2249n/a PyObject *next_input;
2250n/a Py_ssize_t chars_to_skip, chars_decoded;
2251n/a Py_ssize_t skip_bytes, skip_back;
2252n/a PyObject *saved_state = NULL;
2253n/a char *input, *input_end;
2254n/a Py_ssize_t dec_buffer_len;
2255n/a int dec_flags;
2256n/a
2257n/a CHECK_ATTACHED(self);
2258n/a CHECK_CLOSED(self);
2259n/a
2260n/a if (!self->seekable) {
2261n/a _unsupported("underlying stream is not seekable");
2262n/a goto fail;
2263n/a }
2264n/a if (!self->telling) {
2265n/a PyErr_SetString(PyExc_IOError,
2266n/a "telling position disabled by next() call");
2267n/a goto fail;
2268n/a }
2269n/a
2270n/a if (_textiowrapper_writeflush(self) < 0)
2271n/a return NULL;
2272n/a res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2273n/a if (res == NULL)
2274n/a goto fail;
2275n/a Py_DECREF(res);
2276n/a
2277n/a posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
2278n/a if (posobj == NULL)
2279n/a goto fail;
2280n/a
2281n/a if (self->decoder == NULL || self->snapshot == NULL) {
2282n/a assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2283n/a return posobj;
2284n/a }
2285n/a
2286n/a#if defined(HAVE_LARGEFILE_SUPPORT)
2287n/a cookie.start_pos = PyLong_AsLongLong(posobj);
2288n/a#else
2289n/a cookie.start_pos = PyLong_AsLong(posobj);
2290n/a#endif
2291n/a Py_DECREF(posobj);
2292n/a if (PyErr_Occurred())
2293n/a goto fail;
2294n/a
2295n/a /* Skip backward to the snapshot point (see _read_chunk). */
2296n/a if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2297n/a goto fail;
2298n/a
2299n/a assert (PyBytes_Check(next_input));
2300n/a
2301n/a cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2302n/a
2303n/a /* How many decoded characters have been used up since the snapshot? */
2304n/a if (self->decoded_chars_used == 0) {
2305n/a /* We haven't moved from the snapshot point. */
2306n/a return textiowrapper_build_cookie(&cookie);
2307n/a }
2308n/a
2309n/a chars_to_skip = self->decoded_chars_used;
2310n/a
2311n/a /* Decoder state will be restored at the end */
2312n/a saved_state = PyObject_CallMethodObjArgs(self->decoder,
2313n/a _PyIO_str_getstate, NULL);
2314n/a if (saved_state == NULL)
2315n/a goto fail;
2316n/a
2317n/a#define DECODER_GETSTATE() do { \
2318n/a PyObject *dec_buffer; \
2319n/a PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2320n/a _PyIO_str_getstate, NULL); \
2321n/a if (_state == NULL) \
2322n/a goto fail; \
2323n/a if (!PyArg_ParseTuple(_state, "Oi", &dec_buffer, &dec_flags)) { \
2324n/a Py_DECREF(_state); \
2325n/a goto fail; \
2326n/a } \
2327n/a if (!PyBytes_Check(dec_buffer)) { \
2328n/a PyErr_Format(PyExc_TypeError, \
2329n/a "decoder getstate() should have returned a bytes " \
2330n/a "object, not '%.200s'", \
2331n/a Py_TYPE(dec_buffer)->tp_name); \
2332n/a Py_DECREF(_state); \
2333n/a goto fail; \
2334n/a } \
2335n/a dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2336n/a Py_DECREF(_state); \
2337n/a } while (0)
2338n/a
2339n/a#define DECODER_DECODE(start, len, res) do { \
2340n/a PyObject *_decoded = _PyObject_CallMethodId( \
2341n/a self->decoder, &PyId_decode, "y#", start, len); \
2342n/a if (check_decoded(_decoded) < 0) \
2343n/a goto fail; \
2344n/a res = PyUnicode_GET_LENGTH(_decoded); \
2345n/a Py_DECREF(_decoded); \
2346n/a } while (0)
2347n/a
2348n/a /* Fast search for an acceptable start point, close to our
2349n/a current pos */
2350n/a skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2351n/a skip_back = 1;
2352n/a assert(skip_back <= PyBytes_GET_SIZE(next_input));
2353n/a input = PyBytes_AS_STRING(next_input);
2354n/a while (skip_bytes > 0) {
2355n/a /* Decode up to temptative start point */
2356n/a if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2357n/a goto fail;
2358n/a DECODER_DECODE(input, skip_bytes, chars_decoded);
2359n/a if (chars_decoded <= chars_to_skip) {
2360n/a DECODER_GETSTATE();
2361n/a if (dec_buffer_len == 0) {
2362n/a /* Before pos and no bytes buffered in decoder => OK */
2363n/a cookie.dec_flags = dec_flags;
2364n/a chars_to_skip -= chars_decoded;
2365n/a break;
2366n/a }
2367n/a /* Skip back by buffered amount and reset heuristic */
2368n/a skip_bytes -= dec_buffer_len;
2369n/a skip_back = 1;
2370n/a }
2371n/a else {
2372n/a /* We're too far ahead, skip back a bit */
2373n/a skip_bytes -= skip_back;
2374n/a skip_back *= 2;
2375n/a }
2376n/a }
2377n/a if (skip_bytes <= 0) {
2378n/a skip_bytes = 0;
2379n/a if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2380n/a goto fail;
2381n/a }
2382n/a
2383n/a /* Note our initial start point. */
2384n/a cookie.start_pos += skip_bytes;
2385n/a cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2386n/a if (chars_to_skip == 0)
2387n/a goto finally;
2388n/a
2389n/a /* We should be close to the desired position. Now feed the decoder one
2390n/a * byte at a time until we reach the `chars_to_skip` target.
2391n/a * As we go, note the nearest "safe start point" before the current
2392n/a * location (a point where the decoder has nothing buffered, so seek()
2393n/a * can safely start from there and advance to this location).
2394n/a */
2395n/a chars_decoded = 0;
2396n/a input = PyBytes_AS_STRING(next_input);
2397n/a input_end = input + PyBytes_GET_SIZE(next_input);
2398n/a input += skip_bytes;
2399n/a while (input < input_end) {
2400n/a Py_ssize_t n;
2401n/a
2402n/a DECODER_DECODE(input, (Py_ssize_t)1, n);
2403n/a /* We got n chars for 1 byte */
2404n/a chars_decoded += n;
2405n/a cookie.bytes_to_feed += 1;
2406n/a DECODER_GETSTATE();
2407n/a
2408n/a if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2409n/a /* Decoder buffer is empty, so this is a safe start point. */
2410n/a cookie.start_pos += cookie.bytes_to_feed;
2411n/a chars_to_skip -= chars_decoded;
2412n/a cookie.dec_flags = dec_flags;
2413n/a cookie.bytes_to_feed = 0;
2414n/a chars_decoded = 0;
2415n/a }
2416n/a if (chars_decoded >= chars_to_skip)
2417n/a break;
2418n/a input++;
2419n/a }
2420n/a if (input == input_end) {
2421n/a /* We didn't get enough decoded data; signal EOF to get more. */
2422n/a PyObject *decoded = _PyObject_CallMethodId(
2423n/a self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
2424n/a if (check_decoded(decoded) < 0)
2425n/a goto fail;
2426n/a chars_decoded += PyUnicode_GET_LENGTH(decoded);
2427n/a Py_DECREF(decoded);
2428n/a cookie.need_eof = 1;
2429n/a
2430n/a if (chars_decoded < chars_to_skip) {
2431n/a PyErr_SetString(PyExc_IOError,
2432n/a "can't reconstruct logical file position");
2433n/a goto fail;
2434n/a }
2435n/a }
2436n/a
2437n/afinally:
2438n/a res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
2439n/a Py_DECREF(saved_state);
2440n/a if (res == NULL)
2441n/a return NULL;
2442n/a Py_DECREF(res);
2443n/a
2444n/a /* The returned cookie corresponds to the last safe start point. */
2445n/a cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2446n/a return textiowrapper_build_cookie(&cookie);
2447n/a
2448n/afail:
2449n/a if (saved_state) {
2450n/a PyObject *type, *value, *traceback;
2451n/a PyErr_Fetch(&type, &value, &traceback);
2452n/a res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
2453n/a _PyErr_ChainExceptions(type, value, traceback);
2454n/a Py_DECREF(saved_state);
2455n/a Py_XDECREF(res);
2456n/a }
2457n/a return NULL;
2458n/a}
2459n/a
2460n/a/*[clinic input]
2461n/a_io.TextIOWrapper.truncate
2462n/a pos: object = None
2463n/a /
2464n/a[clinic start generated code]*/
2465n/a
2466n/astatic PyObject *
2467n/a_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2468n/a/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
2469n/a{
2470n/a PyObject *res;
2471n/a
2472n/a CHECK_ATTACHED(self)
2473n/a
2474n/a res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2475n/a if (res == NULL)
2476n/a return NULL;
2477n/a Py_DECREF(res);
2478n/a
2479n/a return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
2480n/a}
2481n/a
2482n/astatic PyObject *
2483n/atextiowrapper_repr(textio *self)
2484n/a{
2485n/a PyObject *nameobj, *modeobj, *res, *s;
2486n/a
2487n/a CHECK_INITIALIZED(self);
2488n/a
2489n/a res = PyUnicode_FromString("<_io.TextIOWrapper");
2490n/a if (res == NULL)
2491n/a return NULL;
2492n/a
2493n/a nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
2494n/a if (nameobj == NULL) {
2495n/a if (PyErr_ExceptionMatches(PyExc_Exception))
2496n/a PyErr_Clear();
2497n/a else
2498n/a goto error;
2499n/a }
2500n/a else {
2501n/a s = PyUnicode_FromFormat(" name=%R", nameobj);
2502n/a Py_DECREF(nameobj);
2503n/a if (s == NULL)
2504n/a goto error;
2505n/a PyUnicode_AppendAndDel(&res, s);
2506n/a if (res == NULL)
2507n/a return NULL;
2508n/a }
2509n/a modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
2510n/a if (modeobj == NULL) {
2511n/a if (PyErr_ExceptionMatches(PyExc_Exception))
2512n/a PyErr_Clear();
2513n/a else
2514n/a goto error;
2515n/a }
2516n/a else {
2517n/a s = PyUnicode_FromFormat(" mode=%R", modeobj);
2518n/a Py_DECREF(modeobj);
2519n/a if (s == NULL)
2520n/a goto error;
2521n/a PyUnicode_AppendAndDel(&res, s);
2522n/a if (res == NULL)
2523n/a return NULL;
2524n/a }
2525n/a s = PyUnicode_FromFormat("%U encoding=%R>",
2526n/a res, self->encoding);
2527n/a Py_DECREF(res);
2528n/a return s;
2529n/aerror:
2530n/a Py_XDECREF(res);
2531n/a return NULL;
2532n/a}
2533n/a
2534n/a
2535n/a/* Inquiries */
2536n/a
2537n/a/*[clinic input]
2538n/a_io.TextIOWrapper.fileno
2539n/a[clinic start generated code]*/
2540n/a
2541n/astatic PyObject *
2542n/a_io_TextIOWrapper_fileno_impl(textio *self)
2543n/a/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
2544n/a{
2545n/a CHECK_ATTACHED(self);
2546n/a return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
2547n/a}
2548n/a
2549n/a/*[clinic input]
2550n/a_io.TextIOWrapper.seekable
2551n/a[clinic start generated code]*/
2552n/a
2553n/astatic PyObject *
2554n/a_io_TextIOWrapper_seekable_impl(textio *self)
2555n/a/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
2556n/a{
2557n/a CHECK_ATTACHED(self);
2558n/a return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
2559n/a}
2560n/a
2561n/a/*[clinic input]
2562n/a_io.TextIOWrapper.readable
2563n/a[clinic start generated code]*/
2564n/a
2565n/astatic PyObject *
2566n/a_io_TextIOWrapper_readable_impl(textio *self)
2567n/a/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
2568n/a{
2569n/a CHECK_ATTACHED(self);
2570n/a return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
2571n/a}
2572n/a
2573n/a/*[clinic input]
2574n/a_io.TextIOWrapper.writable
2575n/a[clinic start generated code]*/
2576n/a
2577n/astatic PyObject *
2578n/a_io_TextIOWrapper_writable_impl(textio *self)
2579n/a/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
2580n/a{
2581n/a CHECK_ATTACHED(self);
2582n/a return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
2583n/a}
2584n/a
2585n/a/*[clinic input]
2586n/a_io.TextIOWrapper.isatty
2587n/a[clinic start generated code]*/
2588n/a
2589n/astatic PyObject *
2590n/a_io_TextIOWrapper_isatty_impl(textio *self)
2591n/a/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
2592n/a{
2593n/a CHECK_ATTACHED(self);
2594n/a return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
2595n/a}
2596n/a
2597n/astatic PyObject *
2598n/atextiowrapper_getstate(textio *self, PyObject *args)
2599n/a{
2600n/a PyErr_Format(PyExc_TypeError,
2601n/a "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2602n/a return NULL;
2603n/a}
2604n/a
2605n/a/*[clinic input]
2606n/a_io.TextIOWrapper.flush
2607n/a[clinic start generated code]*/
2608n/a
2609n/astatic PyObject *
2610n/a_io_TextIOWrapper_flush_impl(textio *self)
2611n/a/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
2612n/a{
2613n/a CHECK_ATTACHED(self);
2614n/a CHECK_CLOSED(self);
2615n/a self->telling = self->seekable;
2616n/a if (_textiowrapper_writeflush(self) < 0)
2617n/a return NULL;
2618n/a return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
2619n/a}
2620n/a
2621n/a/*[clinic input]
2622n/a_io.TextIOWrapper.close
2623n/a[clinic start generated code]*/
2624n/a
2625n/astatic PyObject *
2626n/a_io_TextIOWrapper_close_impl(textio *self)
2627n/a/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
2628n/a{
2629n/a PyObject *res;
2630n/a int r;
2631n/a CHECK_ATTACHED(self);
2632n/a
2633n/a res = textiowrapper_closed_get(self, NULL);
2634n/a if (res == NULL)
2635n/a return NULL;
2636n/a r = PyObject_IsTrue(res);
2637n/a Py_DECREF(res);
2638n/a if (r < 0)
2639n/a return NULL;
2640n/a
2641n/a if (r > 0) {
2642n/a Py_RETURN_NONE; /* stream already closed */
2643n/a }
2644n/a else {
2645n/a PyObject *exc = NULL, *val, *tb;
2646n/a if (self->finalizing) {
2647n/a res = _PyObject_CallMethodIdObjArgs(self->buffer,
2648n/a &PyId__dealloc_warn,
2649n/a self, NULL);
2650n/a if (res)
2651n/a Py_DECREF(res);
2652n/a else
2653n/a PyErr_Clear();
2654n/a }
2655n/a res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2656n/a if (res == NULL)
2657n/a PyErr_Fetch(&exc, &val, &tb);
2658n/a else
2659n/a Py_DECREF(res);
2660n/a
2661n/a res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2662n/a if (exc != NULL) {
2663n/a _PyErr_ChainExceptions(exc, val, tb);
2664n/a Py_CLEAR(res);
2665n/a }
2666n/a return res;
2667n/a }
2668n/a}
2669n/a
2670n/astatic PyObject *
2671n/atextiowrapper_iternext(textio *self)
2672n/a{
2673n/a PyObject *line;
2674n/a
2675n/a CHECK_ATTACHED(self);
2676n/a
2677n/a self->telling = 0;
2678n/a if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2679n/a /* Skip method call overhead for speed */
2680n/a line = _textiowrapper_readline(self, -1);
2681n/a }
2682n/a else {
2683n/a line = PyObject_CallMethodObjArgs((PyObject *)self,
2684n/a _PyIO_str_readline, NULL);
2685n/a if (line && !PyUnicode_Check(line)) {
2686n/a PyErr_Format(PyExc_IOError,
2687n/a "readline() should have returned a str object, "
2688n/a "not '%.200s'", Py_TYPE(line)->tp_name);
2689n/a Py_DECREF(line);
2690n/a return NULL;
2691n/a }
2692n/a }
2693n/a
2694n/a if (line == NULL || PyUnicode_READY(line) == -1)
2695n/a return NULL;
2696n/a
2697n/a if (PyUnicode_GET_LENGTH(line) == 0) {
2698n/a /* Reached EOF or would have blocked */
2699n/a Py_DECREF(line);
2700n/a Py_CLEAR(self->snapshot);
2701n/a self->telling = self->seekable;
2702n/a return NULL;
2703n/a }
2704n/a
2705n/a return line;
2706n/a}
2707n/a
2708n/astatic PyObject *
2709n/atextiowrapper_name_get(textio *self, void *context)
2710n/a{
2711n/a CHECK_ATTACHED(self);
2712n/a return _PyObject_GetAttrId(self->buffer, &PyId_name);
2713n/a}
2714n/a
2715n/astatic PyObject *
2716n/atextiowrapper_closed_get(textio *self, void *context)
2717n/a{
2718n/a CHECK_ATTACHED(self);
2719n/a return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2720n/a}
2721n/a
2722n/astatic PyObject *
2723n/atextiowrapper_newlines_get(textio *self, void *context)
2724n/a{
2725n/a PyObject *res;
2726n/a CHECK_ATTACHED(self);
2727n/a if (self->decoder == NULL)
2728n/a Py_RETURN_NONE;
2729n/a res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2730n/a if (res == NULL) {
2731n/a if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2732n/a PyErr_Clear();
2733n/a Py_RETURN_NONE;
2734n/a }
2735n/a else {
2736n/a return NULL;
2737n/a }
2738n/a }
2739n/a return res;
2740n/a}
2741n/a
2742n/astatic PyObject *
2743n/atextiowrapper_errors_get(textio *self, void *context)
2744n/a{
2745n/a CHECK_INITIALIZED(self);
2746n/a return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2747n/a}
2748n/a
2749n/astatic PyObject *
2750n/atextiowrapper_chunk_size_get(textio *self, void *context)
2751n/a{
2752n/a CHECK_ATTACHED(self);
2753n/a return PyLong_FromSsize_t(self->chunk_size);
2754n/a}
2755n/a
2756n/astatic int
2757n/atextiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2758n/a{
2759n/a Py_ssize_t n;
2760n/a CHECK_ATTACHED_INT(self);
2761n/a n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
2762n/a if (n == -1 && PyErr_Occurred())
2763n/a return -1;
2764n/a if (n <= 0) {
2765n/a PyErr_SetString(PyExc_ValueError,
2766n/a "a strictly positive integer is required");
2767n/a return -1;
2768n/a }
2769n/a self->chunk_size = n;
2770n/a return 0;
2771n/a}
2772n/a
2773n/a#include "clinic/textio.c.h"
2774n/a
2775n/astatic PyMethodDef incrementalnewlinedecoder_methods[] = {
2776n/a _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
2777n/a _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
2778n/a _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
2779n/a _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
2780n/a {NULL}
2781n/a};
2782n/a
2783n/astatic PyGetSetDef incrementalnewlinedecoder_getset[] = {
2784n/a {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
2785n/a {NULL}
2786n/a};
2787n/a
2788n/aPyTypeObject PyIncrementalNewlineDecoder_Type = {
2789n/a PyVarObject_HEAD_INIT(NULL, 0)
2790n/a "_io.IncrementalNewlineDecoder", /*tp_name*/
2791n/a sizeof(nldecoder_object), /*tp_basicsize*/
2792n/a 0, /*tp_itemsize*/
2793n/a (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
2794n/a 0, /*tp_print*/
2795n/a 0, /*tp_getattr*/
2796n/a 0, /*tp_setattr*/
2797n/a 0, /*tp_compare */
2798n/a 0, /*tp_repr*/
2799n/a 0, /*tp_as_number*/
2800n/a 0, /*tp_as_sequence*/
2801n/a 0, /*tp_as_mapping*/
2802n/a 0, /*tp_hash */
2803n/a 0, /*tp_call*/
2804n/a 0, /*tp_str*/
2805n/a 0, /*tp_getattro*/
2806n/a 0, /*tp_setattro*/
2807n/a 0, /*tp_as_buffer*/
2808n/a Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
2809n/a _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
2810n/a 0, /* tp_traverse */
2811n/a 0, /* tp_clear */
2812n/a 0, /* tp_richcompare */
2813n/a 0, /*tp_weaklistoffset*/
2814n/a 0, /* tp_iter */
2815n/a 0, /* tp_iternext */
2816n/a incrementalnewlinedecoder_methods, /* tp_methods */
2817n/a 0, /* tp_members */
2818n/a incrementalnewlinedecoder_getset, /* tp_getset */
2819n/a 0, /* tp_base */
2820n/a 0, /* tp_dict */
2821n/a 0, /* tp_descr_get */
2822n/a 0, /* tp_descr_set */
2823n/a 0, /* tp_dictoffset */
2824n/a _io_IncrementalNewlineDecoder___init__, /* tp_init */
2825n/a 0, /* tp_alloc */
2826n/a PyType_GenericNew, /* tp_new */
2827n/a};
2828n/a
2829n/a
2830n/astatic PyMethodDef textiowrapper_methods[] = {
2831n/a _IO_TEXTIOWRAPPER_DETACH_METHODDEF
2832n/a _IO_TEXTIOWRAPPER_WRITE_METHODDEF
2833n/a _IO_TEXTIOWRAPPER_READ_METHODDEF
2834n/a _IO_TEXTIOWRAPPER_READLINE_METHODDEF
2835n/a _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
2836n/a _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
2837n/a
2838n/a _IO_TEXTIOWRAPPER_FILENO_METHODDEF
2839n/a _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
2840n/a _IO_TEXTIOWRAPPER_READABLE_METHODDEF
2841n/a _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
2842n/a _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
2843n/a {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
2844n/a
2845n/a _IO_TEXTIOWRAPPER_SEEK_METHODDEF
2846n/a _IO_TEXTIOWRAPPER_TELL_METHODDEF
2847n/a _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
2848n/a {NULL, NULL}
2849n/a};
2850n/a
2851n/astatic PyMemberDef textiowrapper_members[] = {
2852n/a {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2853n/a {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2854n/a {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2855n/a {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
2856n/a {NULL}
2857n/a};
2858n/a
2859n/astatic PyGetSetDef textiowrapper_getset[] = {
2860n/a {"name", (getter)textiowrapper_name_get, NULL, NULL},
2861n/a {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2862n/a/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2863n/a*/
2864n/a {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2865n/a {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2866n/a {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2867n/a (setter)textiowrapper_chunk_size_set, NULL},
2868n/a {NULL}
2869n/a};
2870n/a
2871n/aPyTypeObject PyTextIOWrapper_Type = {
2872n/a PyVarObject_HEAD_INIT(NULL, 0)
2873n/a "_io.TextIOWrapper", /*tp_name*/
2874n/a sizeof(textio), /*tp_basicsize*/
2875n/a 0, /*tp_itemsize*/
2876n/a (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2877n/a 0, /*tp_print*/
2878n/a 0, /*tp_getattr*/
2879n/a 0, /*tps_etattr*/
2880n/a 0, /*tp_compare */
2881n/a (reprfunc)textiowrapper_repr,/*tp_repr*/
2882n/a 0, /*tp_as_number*/
2883n/a 0, /*tp_as_sequence*/
2884n/a 0, /*tp_as_mapping*/
2885n/a 0, /*tp_hash */
2886n/a 0, /*tp_call*/
2887n/a 0, /*tp_str*/
2888n/a 0, /*tp_getattro*/
2889n/a 0, /*tp_setattro*/
2890n/a 0, /*tp_as_buffer*/
2891n/a Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2892n/a | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
2893n/a _io_TextIOWrapper___init____doc__, /* tp_doc */
2894n/a (traverseproc)textiowrapper_traverse, /* tp_traverse */
2895n/a (inquiry)textiowrapper_clear, /* tp_clear */
2896n/a 0, /* tp_richcompare */
2897n/a offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2898n/a 0, /* tp_iter */
2899n/a (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2900n/a textiowrapper_methods, /* tp_methods */
2901n/a textiowrapper_members, /* tp_members */
2902n/a textiowrapper_getset, /* tp_getset */
2903n/a 0, /* tp_base */
2904n/a 0, /* tp_dict */
2905n/a 0, /* tp_descr_get */
2906n/a 0, /* tp_descr_set */
2907n/a offsetof(textio, dict), /*tp_dictoffset*/
2908n/a _io_TextIOWrapper___init__, /* tp_init */
2909n/a 0, /* tp_alloc */
2910n/a PyType_GenericNew, /* tp_new */
2911n/a 0, /* tp_free */
2912n/a 0, /* tp_is_gc */
2913n/a 0, /* tp_bases */
2914n/a 0, /* tp_mro */
2915n/a 0, /* tp_cache */
2916n/a 0, /* tp_subclasses */
2917n/a 0, /* tp_weaklist */
2918n/a 0, /* tp_del */
2919n/a 0, /* tp_version_tag */
2920n/a 0, /* tp_finalize */
2921n/a};