ยปCore Development>Code coverage>Modules/_csv.c

Python code coverage for Modules/_csv.c

#countcontent
1n/a/* csv module */
2n/a
3n/a/*
4n/a
5n/aThis module provides the low-level underpinnings of a CSV reading/writing
6n/amodule. Users should not use this module directly, but import the csv.py
7n/amodule instead.
8n/a
9n/a*/
10n/a
11n/a#define MODULE_VERSION "1.0"
12n/a
13n/a#include "Python.h"
14n/a#include "structmember.h"
15n/a
16n/a
17n/atypedef struct {
18n/a PyObject *error_obj; /* CSV exception */
19n/a PyObject *dialects; /* Dialect registry */
20n/a long field_limit; /* max parsed field size */
21n/a} _csvstate;
22n/a
23n/a#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
24n/a
25n/astatic int
26n/a_csv_clear(PyObject *m)
27n/a{
28n/a Py_CLEAR(_csvstate(m)->error_obj);
29n/a Py_CLEAR(_csvstate(m)->dialects);
30n/a return 0;
31n/a}
32n/a
33n/astatic int
34n/a_csv_traverse(PyObject *m, visitproc visit, void *arg)
35n/a{
36n/a Py_VISIT(_csvstate(m)->error_obj);
37n/a Py_VISIT(_csvstate(m)->dialects);
38n/a return 0;
39n/a}
40n/a
41n/astatic void
42n/a_csv_free(void *m)
43n/a{
44n/a _csv_clear((PyObject *)m);
45n/a}
46n/a
47n/astatic struct PyModuleDef _csvmodule;
48n/a
49n/a#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
50n/a
51n/atypedef enum {
52n/a START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
53n/a IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
54n/a EAT_CRNL,AFTER_ESCAPED_CRNL
55n/a} ParserState;
56n/a
57n/atypedef enum {
58n/a QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
59n/a} QuoteStyle;
60n/a
61n/atypedef struct {
62n/a QuoteStyle style;
63n/a const char *name;
64n/a} StyleDesc;
65n/a
66n/astatic const StyleDesc quote_styles[] = {
67n/a { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
68n/a { QUOTE_ALL, "QUOTE_ALL" },
69n/a { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
70n/a { QUOTE_NONE, "QUOTE_NONE" },
71n/a { 0 }
72n/a};
73n/a
74n/atypedef struct {
75n/a PyObject_HEAD
76n/a
77n/a int doublequote; /* is " represented by ""? */
78n/a Py_UCS4 delimiter; /* field separator */
79n/a Py_UCS4 quotechar; /* quote character */
80n/a Py_UCS4 escapechar; /* escape character */
81n/a int skipinitialspace; /* ignore spaces following delimiter? */
82n/a PyObject *lineterminator; /* string to write between records */
83n/a int quoting; /* style of quoting to write */
84n/a
85n/a int strict; /* raise exception on bad CSV */
86n/a} DialectObj;
87n/a
88n/astatic PyTypeObject Dialect_Type;
89n/a
90n/atypedef struct {
91n/a PyObject_HEAD
92n/a
93n/a PyObject *input_iter; /* iterate over this for input lines */
94n/a
95n/a DialectObj *dialect; /* parsing dialect */
96n/a
97n/a PyObject *fields; /* field list for current record */
98n/a ParserState state; /* current CSV parse state */
99n/a Py_UCS4 *field; /* temporary buffer */
100n/a Py_ssize_t field_size; /* size of allocated buffer */
101n/a Py_ssize_t field_len; /* length of current field */
102n/a int numeric_field; /* treat field as numeric */
103n/a unsigned long line_num; /* Source-file line number */
104n/a} ReaderObj;
105n/a
106n/astatic PyTypeObject Reader_Type;
107n/a
108n/a#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
109n/a
110n/atypedef struct {
111n/a PyObject_HEAD
112n/a
113n/a PyObject *writeline; /* write output lines to this file */
114n/a
115n/a DialectObj *dialect; /* parsing dialect */
116n/a
117n/a Py_UCS4 *rec; /* buffer for parser.join */
118n/a Py_ssize_t rec_size; /* size of allocated record */
119n/a Py_ssize_t rec_len; /* length of record */
120n/a int num_fields; /* number of fields in record */
121n/a} WriterObj;
122n/a
123n/astatic PyTypeObject Writer_Type;
124n/a
125n/a/*
126n/a * DIALECT class
127n/a */
128n/a
129n/astatic PyObject *
130n/aget_dialect_from_registry(PyObject * name_obj)
131n/a{
132n/a PyObject *dialect_obj;
133n/a
134n/a dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
135n/a if (dialect_obj == NULL) {
136n/a if (!PyErr_Occurred())
137n/a PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
138n/a }
139n/a else
140n/a Py_INCREF(dialect_obj);
141n/a return dialect_obj;
142n/a}
143n/a
144n/astatic PyObject *
145n/aget_string(PyObject *str)
146n/a{
147n/a Py_XINCREF(str);
148n/a return str;
149n/a}
150n/a
151n/astatic PyObject *
152n/aget_nullchar_as_None(Py_UCS4 c)
153n/a{
154n/a if (c == '\0') {
155n/a Py_RETURN_NONE;
156n/a }
157n/a else
158n/a return PyUnicode_FromOrdinal(c);
159n/a}
160n/a
161n/astatic PyObject *
162n/aDialect_get_lineterminator(DialectObj *self)
163n/a{
164n/a return get_string(self->lineterminator);
165n/a}
166n/a
167n/astatic PyObject *
168n/aDialect_get_delimiter(DialectObj *self)
169n/a{
170n/a return get_nullchar_as_None(self->delimiter);
171n/a}
172n/a
173n/astatic PyObject *
174n/aDialect_get_escapechar(DialectObj *self)
175n/a{
176n/a return get_nullchar_as_None(self->escapechar);
177n/a}
178n/a
179n/astatic PyObject *
180n/aDialect_get_quotechar(DialectObj *self)
181n/a{
182n/a return get_nullchar_as_None(self->quotechar);
183n/a}
184n/a
185n/astatic PyObject *
186n/aDialect_get_quoting(DialectObj *self)
187n/a{
188n/a return PyLong_FromLong(self->quoting);
189n/a}
190n/a
191n/astatic int
192n/a_set_bool(const char *name, int *target, PyObject *src, int dflt)
193n/a{
194n/a if (src == NULL)
195n/a *target = dflt;
196n/a else {
197n/a int b = PyObject_IsTrue(src);
198n/a if (b < 0)
199n/a return -1;
200n/a *target = b;
201n/a }
202n/a return 0;
203n/a}
204n/a
205n/astatic int
206n/a_set_int(const char *name, int *target, PyObject *src, int dflt)
207n/a{
208n/a if (src == NULL)
209n/a *target = dflt;
210n/a else {
211n/a int value;
212n/a if (!PyLong_CheckExact(src)) {
213n/a PyErr_Format(PyExc_TypeError,
214n/a "\"%s\" must be an integer", name);
215n/a return -1;
216n/a }
217n/a value = _PyLong_AsInt(src);
218n/a if (value == -1 && PyErr_Occurred()) {
219n/a return -1;
220n/a }
221n/a *target = value;
222n/a }
223n/a return 0;
224n/a}
225n/a
226n/astatic int
227n/a_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
228n/a{
229n/a if (src == NULL)
230n/a *target = dflt;
231n/a else {
232n/a *target = '\0';
233n/a if (src != Py_None) {
234n/a Py_ssize_t len;
235n/a if (!PyUnicode_Check(src)) {
236n/a PyErr_Format(PyExc_TypeError,
237n/a "\"%s\" must be string, not %.200s", name,
238n/a src->ob_type->tp_name);
239n/a return -1;
240n/a }
241n/a len = PyUnicode_GetLength(src);
242n/a if (len > 1) {
243n/a PyErr_Format(PyExc_TypeError,
244n/a "\"%s\" must be a 1-character string",
245n/a name);
246n/a return -1;
247n/a }
248n/a /* PyUnicode_READY() is called in PyUnicode_GetLength() */
249n/a if (len > 0)
250n/a *target = PyUnicode_READ_CHAR(src, 0);
251n/a }
252n/a }
253n/a return 0;
254n/a}
255n/a
256n/astatic int
257n/a_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
258n/a{
259n/a if (src == NULL)
260n/a *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
261n/a else {
262n/a if (src == Py_None)
263n/a *target = NULL;
264n/a else if (!PyUnicode_Check(src)) {
265n/a PyErr_Format(PyExc_TypeError,
266n/a "\"%s\" must be a string", name);
267n/a return -1;
268n/a }
269n/a else {
270n/a if (PyUnicode_READY(src) == -1)
271n/a return -1;
272n/a Py_INCREF(src);
273n/a Py_XSETREF(*target, src);
274n/a }
275n/a }
276n/a return 0;
277n/a}
278n/a
279n/astatic int
280n/adialect_check_quoting(int quoting)
281n/a{
282n/a const StyleDesc *qs;
283n/a
284n/a for (qs = quote_styles; qs->name; qs++) {
285n/a if ((int)qs->style == quoting)
286n/a return 0;
287n/a }
288n/a PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
289n/a return -1;
290n/a}
291n/a
292n/a#define D_OFF(x) offsetof(DialectObj, x)
293n/a
294n/astatic struct PyMemberDef Dialect_memberlist[] = {
295n/a { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
296n/a { "doublequote", T_INT, D_OFF(doublequote), READONLY },
297n/a { "strict", T_INT, D_OFF(strict), READONLY },
298n/a { NULL }
299n/a};
300n/a
301n/astatic PyGetSetDef Dialect_getsetlist[] = {
302n/a { "delimiter", (getter)Dialect_get_delimiter},
303n/a { "escapechar", (getter)Dialect_get_escapechar},
304n/a { "lineterminator", (getter)Dialect_get_lineterminator},
305n/a { "quotechar", (getter)Dialect_get_quotechar},
306n/a { "quoting", (getter)Dialect_get_quoting},
307n/a {NULL},
308n/a};
309n/a
310n/astatic void
311n/aDialect_dealloc(DialectObj *self)
312n/a{
313n/a Py_XDECREF(self->lineterminator);
314n/a Py_TYPE(self)->tp_free((PyObject *)self);
315n/a}
316n/a
317n/astatic char *dialect_kws[] = {
318n/a "dialect",
319n/a "delimiter",
320n/a "doublequote",
321n/a "escapechar",
322n/a "lineterminator",
323n/a "quotechar",
324n/a "quoting",
325n/a "skipinitialspace",
326n/a "strict",
327n/a NULL
328n/a};
329n/a
330n/astatic PyObject *
331n/adialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
332n/a{
333n/a DialectObj *self;
334n/a PyObject *ret = NULL;
335n/a PyObject *dialect = NULL;
336n/a PyObject *delimiter = NULL;
337n/a PyObject *doublequote = NULL;
338n/a PyObject *escapechar = NULL;
339n/a PyObject *lineterminator = NULL;
340n/a PyObject *quotechar = NULL;
341n/a PyObject *quoting = NULL;
342n/a PyObject *skipinitialspace = NULL;
343n/a PyObject *strict = NULL;
344n/a
345n/a if (!PyArg_ParseTupleAndKeywords(args, kwargs,
346n/a "|OOOOOOOOO", dialect_kws,
347n/a &dialect,
348n/a &delimiter,
349n/a &doublequote,
350n/a &escapechar,
351n/a &lineterminator,
352n/a &quotechar,
353n/a &quoting,
354n/a &skipinitialspace,
355n/a &strict))
356n/a return NULL;
357n/a
358n/a if (dialect != NULL) {
359n/a if (PyUnicode_Check(dialect)) {
360n/a dialect = get_dialect_from_registry(dialect);
361n/a if (dialect == NULL)
362n/a return NULL;
363n/a }
364n/a else
365n/a Py_INCREF(dialect);
366n/a /* Can we reuse this instance? */
367n/a if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
368n/a delimiter == 0 &&
369n/a doublequote == 0 &&
370n/a escapechar == 0 &&
371n/a lineterminator == 0 &&
372n/a quotechar == 0 &&
373n/a quoting == 0 &&
374n/a skipinitialspace == 0 &&
375n/a strict == 0)
376n/a return dialect;
377n/a }
378n/a
379n/a self = (DialectObj *)type->tp_alloc(type, 0);
380n/a if (self == NULL) {
381n/a Py_XDECREF(dialect);
382n/a return NULL;
383n/a }
384n/a self->lineterminator = NULL;
385n/a
386n/a Py_XINCREF(delimiter);
387n/a Py_XINCREF(doublequote);
388n/a Py_XINCREF(escapechar);
389n/a Py_XINCREF(lineterminator);
390n/a Py_XINCREF(quotechar);
391n/a Py_XINCREF(quoting);
392n/a Py_XINCREF(skipinitialspace);
393n/a Py_XINCREF(strict);
394n/a if (dialect != NULL) {
395n/a#define DIALECT_GETATTR(v, n) \
396n/a if (v == NULL) \
397n/a v = PyObject_GetAttrString(dialect, n)
398n/a DIALECT_GETATTR(delimiter, "delimiter");
399n/a DIALECT_GETATTR(doublequote, "doublequote");
400n/a DIALECT_GETATTR(escapechar, "escapechar");
401n/a DIALECT_GETATTR(lineterminator, "lineterminator");
402n/a DIALECT_GETATTR(quotechar, "quotechar");
403n/a DIALECT_GETATTR(quoting, "quoting");
404n/a DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
405n/a DIALECT_GETATTR(strict, "strict");
406n/a PyErr_Clear();
407n/a }
408n/a
409n/a /* check types and convert to C values */
410n/a#define DIASET(meth, name, target, src, dflt) \
411n/a if (meth(name, target, src, dflt)) \
412n/a goto err
413n/a DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
414n/a DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
415n/a DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
416n/a DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
417n/a DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
418n/a DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
419n/a DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
420n/a DIASET(_set_bool, "strict", &self->strict, strict, 0);
421n/a
422n/a /* validate options */
423n/a if (dialect_check_quoting(self->quoting))
424n/a goto err;
425n/a if (self->delimiter == 0) {
426n/a PyErr_SetString(PyExc_TypeError,
427n/a "\"delimiter\" must be a 1-character string");
428n/a goto err;
429n/a }
430n/a if (quotechar == Py_None && quoting == NULL)
431n/a self->quoting = QUOTE_NONE;
432n/a if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
433n/a PyErr_SetString(PyExc_TypeError,
434n/a "quotechar must be set if quoting enabled");
435n/a goto err;
436n/a }
437n/a if (self->lineterminator == 0) {
438n/a PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
439n/a goto err;
440n/a }
441n/a
442n/a ret = (PyObject *)self;
443n/a Py_INCREF(self);
444n/aerr:
445n/a Py_XDECREF(self);
446n/a Py_XDECREF(dialect);
447n/a Py_XDECREF(delimiter);
448n/a Py_XDECREF(doublequote);
449n/a Py_XDECREF(escapechar);
450n/a Py_XDECREF(lineterminator);
451n/a Py_XDECREF(quotechar);
452n/a Py_XDECREF(quoting);
453n/a Py_XDECREF(skipinitialspace);
454n/a Py_XDECREF(strict);
455n/a return ret;
456n/a}
457n/a
458n/a
459n/aPyDoc_STRVAR(Dialect_Type_doc,
460n/a"CSV dialect\n"
461n/a"\n"
462n/a"The Dialect type records CSV parsing and generation options.\n");
463n/a
464n/astatic PyTypeObject Dialect_Type = {
465n/a PyVarObject_HEAD_INIT(NULL, 0)
466n/a "_csv.Dialect", /* tp_name */
467n/a sizeof(DialectObj), /* tp_basicsize */
468n/a 0, /* tp_itemsize */
469n/a /* methods */
470n/a (destructor)Dialect_dealloc, /* tp_dealloc */
471n/a (printfunc)0, /* tp_print */
472n/a (getattrfunc)0, /* tp_getattr */
473n/a (setattrfunc)0, /* tp_setattr */
474n/a 0, /* tp_reserved */
475n/a (reprfunc)0, /* tp_repr */
476n/a 0, /* tp_as_number */
477n/a 0, /* tp_as_sequence */
478n/a 0, /* tp_as_mapping */
479n/a (hashfunc)0, /* tp_hash */
480n/a (ternaryfunc)0, /* tp_call */
481n/a (reprfunc)0, /* tp_str */
482n/a 0, /* tp_getattro */
483n/a 0, /* tp_setattro */
484n/a 0, /* tp_as_buffer */
485n/a Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
486n/a Dialect_Type_doc, /* tp_doc */
487n/a 0, /* tp_traverse */
488n/a 0, /* tp_clear */
489n/a 0, /* tp_richcompare */
490n/a 0, /* tp_weaklistoffset */
491n/a 0, /* tp_iter */
492n/a 0, /* tp_iternext */
493n/a 0, /* tp_methods */
494n/a Dialect_memberlist, /* tp_members */
495n/a Dialect_getsetlist, /* tp_getset */
496n/a 0, /* tp_base */
497n/a 0, /* tp_dict */
498n/a 0, /* tp_descr_get */
499n/a 0, /* tp_descr_set */
500n/a 0, /* tp_dictoffset */
501n/a 0, /* tp_init */
502n/a 0, /* tp_alloc */
503n/a dialect_new, /* tp_new */
504n/a 0, /* tp_free */
505n/a};
506n/a
507n/a/*
508n/a * Return an instance of the dialect type, given a Python instance or kwarg
509n/a * description of the dialect
510n/a */
511n/astatic PyObject *
512n/a_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
513n/a{
514n/a PyObject *type = (PyObject *)&Dialect_Type;
515n/a if (dialect_inst) {
516n/a return _PyObject_FastCallDict(type, &dialect_inst, 1, kwargs);
517n/a }
518n/a else {
519n/a return _PyObject_FastCallDict(type, NULL, 0, kwargs);
520n/a }
521n/a}
522n/a
523n/a/*
524n/a * READER
525n/a */
526n/astatic int
527n/aparse_save_field(ReaderObj *self)
528n/a{
529n/a PyObject *field;
530n/a
531n/a field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
532n/a (void *) self->field, self->field_len);
533n/a if (field == NULL)
534n/a return -1;
535n/a self->field_len = 0;
536n/a if (self->numeric_field) {
537n/a PyObject *tmp;
538n/a
539n/a self->numeric_field = 0;
540n/a tmp = PyNumber_Float(field);
541n/a Py_DECREF(field);
542n/a if (tmp == NULL)
543n/a return -1;
544n/a field = tmp;
545n/a }
546n/a if (PyList_Append(self->fields, field) < 0) {
547n/a Py_DECREF(field);
548n/a return -1;
549n/a }
550n/a Py_DECREF(field);
551n/a return 0;
552n/a}
553n/a
554n/astatic int
555n/aparse_grow_buff(ReaderObj *self)
556n/a{
557n/a if (self->field_size == 0) {
558n/a self->field_size = 4096;
559n/a if (self->field != NULL)
560n/a PyMem_Free(self->field);
561n/a self->field = PyMem_New(Py_UCS4, self->field_size);
562n/a }
563n/a else {
564n/a Py_UCS4 *field = self->field;
565n/a if (self->field_size > PY_SSIZE_T_MAX / 2) {
566n/a PyErr_NoMemory();
567n/a return 0;
568n/a }
569n/a self->field_size *= 2;
570n/a self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
571n/a }
572n/a if (self->field == NULL) {
573n/a PyErr_NoMemory();
574n/a return 0;
575n/a }
576n/a return 1;
577n/a}
578n/a
579n/astatic int
580n/aparse_add_char(ReaderObj *self, Py_UCS4 c)
581n/a{
582n/a if (self->field_len >= _csvstate_global->field_limit) {
583n/a PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
584n/a _csvstate_global->field_limit);
585n/a return -1;
586n/a }
587n/a if (self->field_len == self->field_size && !parse_grow_buff(self))
588n/a return -1;
589n/a self->field[self->field_len++] = c;
590n/a return 0;
591n/a}
592n/a
593n/astatic int
594n/aparse_process_char(ReaderObj *self, Py_UCS4 c)
595n/a{
596n/a DialectObj *dialect = self->dialect;
597n/a
598n/a switch (self->state) {
599n/a case START_RECORD:
600n/a /* start of record */
601n/a if (c == '\0')
602n/a /* empty line - return [] */
603n/a break;
604n/a else if (c == '\n' || c == '\r') {
605n/a self->state = EAT_CRNL;
606n/a break;
607n/a }
608n/a /* normal character - handle as START_FIELD */
609n/a self->state = START_FIELD;
610n/a /* fallthru */
611n/a case START_FIELD:
612n/a /* expecting field */
613n/a if (c == '\n' || c == '\r' || c == '\0') {
614n/a /* save empty field - return [fields] */
615n/a if (parse_save_field(self) < 0)
616n/a return -1;
617n/a self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
618n/a }
619n/a else if (c == dialect->quotechar &&
620n/a dialect->quoting != QUOTE_NONE) {
621n/a /* start quoted field */
622n/a self->state = IN_QUOTED_FIELD;
623n/a }
624n/a else if (c == dialect->escapechar) {
625n/a /* possible escaped character */
626n/a self->state = ESCAPED_CHAR;
627n/a }
628n/a else if (c == ' ' && dialect->skipinitialspace)
629n/a /* ignore space at start of field */
630n/a ;
631n/a else if (c == dialect->delimiter) {
632n/a /* save empty field */
633n/a if (parse_save_field(self) < 0)
634n/a return -1;
635n/a }
636n/a else {
637n/a /* begin new unquoted field */
638n/a if (dialect->quoting == QUOTE_NONNUMERIC)
639n/a self->numeric_field = 1;
640n/a if (parse_add_char(self, c) < 0)
641n/a return -1;
642n/a self->state = IN_FIELD;
643n/a }
644n/a break;
645n/a
646n/a case ESCAPED_CHAR:
647n/a if (c == '\n' || c=='\r') {
648n/a if (parse_add_char(self, c) < 0)
649n/a return -1;
650n/a self->state = AFTER_ESCAPED_CRNL;
651n/a break;
652n/a }
653n/a if (c == '\0')
654n/a c = '\n';
655n/a if (parse_add_char(self, c) < 0)
656n/a return -1;
657n/a self->state = IN_FIELD;
658n/a break;
659n/a
660n/a case AFTER_ESCAPED_CRNL:
661n/a if (c == '\0')
662n/a break;
663n/a /*fallthru*/
664n/a
665n/a case IN_FIELD:
666n/a /* in unquoted field */
667n/a if (c == '\n' || c == '\r' || c == '\0') {
668n/a /* end of line - return [fields] */
669n/a if (parse_save_field(self) < 0)
670n/a return -1;
671n/a self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
672n/a }
673n/a else if (c == dialect->escapechar) {
674n/a /* possible escaped character */
675n/a self->state = ESCAPED_CHAR;
676n/a }
677n/a else if (c == dialect->delimiter) {
678n/a /* save field - wait for new field */
679n/a if (parse_save_field(self) < 0)
680n/a return -1;
681n/a self->state = START_FIELD;
682n/a }
683n/a else {
684n/a /* normal character - save in field */
685n/a if (parse_add_char(self, c) < 0)
686n/a return -1;
687n/a }
688n/a break;
689n/a
690n/a case IN_QUOTED_FIELD:
691n/a /* in quoted field */
692n/a if (c == '\0')
693n/a ;
694n/a else if (c == dialect->escapechar) {
695n/a /* Possible escape character */
696n/a self->state = ESCAPE_IN_QUOTED_FIELD;
697n/a }
698n/a else if (c == dialect->quotechar &&
699n/a dialect->quoting != QUOTE_NONE) {
700n/a if (dialect->doublequote) {
701n/a /* doublequote; " represented by "" */
702n/a self->state = QUOTE_IN_QUOTED_FIELD;
703n/a }
704n/a else {
705n/a /* end of quote part of field */
706n/a self->state = IN_FIELD;
707n/a }
708n/a }
709n/a else {
710n/a /* normal character - save in field */
711n/a if (parse_add_char(self, c) < 0)
712n/a return -1;
713n/a }
714n/a break;
715n/a
716n/a case ESCAPE_IN_QUOTED_FIELD:
717n/a if (c == '\0')
718n/a c = '\n';
719n/a if (parse_add_char(self, c) < 0)
720n/a return -1;
721n/a self->state = IN_QUOTED_FIELD;
722n/a break;
723n/a
724n/a case QUOTE_IN_QUOTED_FIELD:
725n/a /* doublequote - seen a quote in a quoted field */
726n/a if (dialect->quoting != QUOTE_NONE &&
727n/a c == dialect->quotechar) {
728n/a /* save "" as " */
729n/a if (parse_add_char(self, c) < 0)
730n/a return -1;
731n/a self->state = IN_QUOTED_FIELD;
732n/a }
733n/a else if (c == dialect->delimiter) {
734n/a /* save field - wait for new field */
735n/a if (parse_save_field(self) < 0)
736n/a return -1;
737n/a self->state = START_FIELD;
738n/a }
739n/a else if (c == '\n' || c == '\r' || c == '\0') {
740n/a /* end of line - return [fields] */
741n/a if (parse_save_field(self) < 0)
742n/a return -1;
743n/a self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
744n/a }
745n/a else if (!dialect->strict) {
746n/a if (parse_add_char(self, c) < 0)
747n/a return -1;
748n/a self->state = IN_FIELD;
749n/a }
750n/a else {
751n/a /* illegal */
752n/a PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
753n/a dialect->delimiter,
754n/a dialect->quotechar);
755n/a return -1;
756n/a }
757n/a break;
758n/a
759n/a case EAT_CRNL:
760n/a if (c == '\n' || c == '\r')
761n/a ;
762n/a else if (c == '\0')
763n/a self->state = START_RECORD;
764n/a else {
765n/a PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
766n/a return -1;
767n/a }
768n/a break;
769n/a
770n/a }
771n/a return 0;
772n/a}
773n/a
774n/astatic int
775n/aparse_reset(ReaderObj *self)
776n/a{
777n/a Py_XSETREF(self->fields, PyList_New(0));
778n/a if (self->fields == NULL)
779n/a return -1;
780n/a self->field_len = 0;
781n/a self->state = START_RECORD;
782n/a self->numeric_field = 0;
783n/a return 0;
784n/a}
785n/a
786n/astatic PyObject *
787n/aReader_iternext(ReaderObj *self)
788n/a{
789n/a PyObject *fields = NULL;
790n/a Py_UCS4 c;
791n/a Py_ssize_t pos, linelen;
792n/a unsigned int kind;
793n/a void *data;
794n/a PyObject *lineobj;
795n/a
796n/a if (parse_reset(self) < 0)
797n/a return NULL;
798n/a do {
799n/a lineobj = PyIter_Next(self->input_iter);
800n/a if (lineobj == NULL) {
801n/a /* End of input OR exception */
802n/a if (!PyErr_Occurred() && (self->field_len != 0 ||
803n/a self->state == IN_QUOTED_FIELD)) {
804n/a if (self->dialect->strict)
805n/a PyErr_SetString(_csvstate_global->error_obj,
806n/a "unexpected end of data");
807n/a else if (parse_save_field(self) >= 0)
808n/a break;
809n/a }
810n/a return NULL;
811n/a }
812n/a if (!PyUnicode_Check(lineobj)) {
813n/a PyErr_Format(_csvstate_global->error_obj,
814n/a "iterator should return strings, "
815n/a "not %.200s "
816n/a "(did you open the file in text mode?)",
817n/a lineobj->ob_type->tp_name
818n/a );
819n/a Py_DECREF(lineobj);
820n/a return NULL;
821n/a }
822n/a if (PyUnicode_READY(lineobj) == -1) {
823n/a Py_DECREF(lineobj);
824n/a return NULL;
825n/a }
826n/a ++self->line_num;
827n/a kind = PyUnicode_KIND(lineobj);
828n/a data = PyUnicode_DATA(lineobj);
829n/a pos = 0;
830n/a linelen = PyUnicode_GET_LENGTH(lineobj);
831n/a while (linelen--) {
832n/a c = PyUnicode_READ(kind, data, pos);
833n/a if (c == '\0') {
834n/a Py_DECREF(lineobj);
835n/a PyErr_Format(_csvstate_global->error_obj,
836n/a "line contains NULL byte");
837n/a goto err;
838n/a }
839n/a if (parse_process_char(self, c) < 0) {
840n/a Py_DECREF(lineobj);
841n/a goto err;
842n/a }
843n/a pos++;
844n/a }
845n/a Py_DECREF(lineobj);
846n/a if (parse_process_char(self, 0) < 0)
847n/a goto err;
848n/a } while (self->state != START_RECORD);
849n/a
850n/a fields = self->fields;
851n/a self->fields = NULL;
852n/aerr:
853n/a return fields;
854n/a}
855n/a
856n/astatic void
857n/aReader_dealloc(ReaderObj *self)
858n/a{
859n/a PyObject_GC_UnTrack(self);
860n/a Py_XDECREF(self->dialect);
861n/a Py_XDECREF(self->input_iter);
862n/a Py_XDECREF(self->fields);
863n/a if (self->field != NULL)
864n/a PyMem_Free(self->field);
865n/a PyObject_GC_Del(self);
866n/a}
867n/a
868n/astatic int
869n/aReader_traverse(ReaderObj *self, visitproc visit, void *arg)
870n/a{
871n/a Py_VISIT(self->dialect);
872n/a Py_VISIT(self->input_iter);
873n/a Py_VISIT(self->fields);
874n/a return 0;
875n/a}
876n/a
877n/astatic int
878n/aReader_clear(ReaderObj *self)
879n/a{
880n/a Py_CLEAR(self->dialect);
881n/a Py_CLEAR(self->input_iter);
882n/a Py_CLEAR(self->fields);
883n/a return 0;
884n/a}
885n/a
886n/aPyDoc_STRVAR(Reader_Type_doc,
887n/a"CSV reader\n"
888n/a"\n"
889n/a"Reader objects are responsible for reading and parsing tabular data\n"
890n/a"in CSV format.\n"
891n/a);
892n/a
893n/astatic struct PyMethodDef Reader_methods[] = {
894n/a { NULL, NULL }
895n/a};
896n/a#define R_OFF(x) offsetof(ReaderObj, x)
897n/a
898n/astatic struct PyMemberDef Reader_memberlist[] = {
899n/a { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
900n/a { "line_num", T_ULONG, R_OFF(line_num), READONLY },
901n/a { NULL }
902n/a};
903n/a
904n/a
905n/astatic PyTypeObject Reader_Type = {
906n/a PyVarObject_HEAD_INIT(NULL, 0)
907n/a "_csv.reader", /*tp_name*/
908n/a sizeof(ReaderObj), /*tp_basicsize*/
909n/a 0, /*tp_itemsize*/
910n/a /* methods */
911n/a (destructor)Reader_dealloc, /*tp_dealloc*/
912n/a (printfunc)0, /*tp_print*/
913n/a (getattrfunc)0, /*tp_getattr*/
914n/a (setattrfunc)0, /*tp_setattr*/
915n/a 0, /*tp_reserved*/
916n/a (reprfunc)0, /*tp_repr*/
917n/a 0, /*tp_as_number*/
918n/a 0, /*tp_as_sequence*/
919n/a 0, /*tp_as_mapping*/
920n/a (hashfunc)0, /*tp_hash*/
921n/a (ternaryfunc)0, /*tp_call*/
922n/a (reprfunc)0, /*tp_str*/
923n/a 0, /*tp_getattro*/
924n/a 0, /*tp_setattro*/
925n/a 0, /*tp_as_buffer*/
926n/a Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
927n/a Py_TPFLAGS_HAVE_GC, /*tp_flags*/
928n/a Reader_Type_doc, /*tp_doc*/
929n/a (traverseproc)Reader_traverse, /*tp_traverse*/
930n/a (inquiry)Reader_clear, /*tp_clear*/
931n/a 0, /*tp_richcompare*/
932n/a 0, /*tp_weaklistoffset*/
933n/a PyObject_SelfIter, /*tp_iter*/
934n/a (getiterfunc)Reader_iternext, /*tp_iternext*/
935n/a Reader_methods, /*tp_methods*/
936n/a Reader_memberlist, /*tp_members*/
937n/a 0, /*tp_getset*/
938n/a
939n/a};
940n/a
941n/astatic PyObject *
942n/acsv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
943n/a{
944n/a PyObject * iterator, * dialect = NULL;
945n/a ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
946n/a
947n/a if (!self)
948n/a return NULL;
949n/a
950n/a self->dialect = NULL;
951n/a self->fields = NULL;
952n/a self->input_iter = NULL;
953n/a self->field = NULL;
954n/a self->field_size = 0;
955n/a self->line_num = 0;
956n/a
957n/a if (parse_reset(self) < 0) {
958n/a Py_DECREF(self);
959n/a return NULL;
960n/a }
961n/a
962n/a if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
963n/a Py_DECREF(self);
964n/a return NULL;
965n/a }
966n/a self->input_iter = PyObject_GetIter(iterator);
967n/a if (self->input_iter == NULL) {
968n/a PyErr_SetString(PyExc_TypeError,
969n/a "argument 1 must be an iterator");
970n/a Py_DECREF(self);
971n/a return NULL;
972n/a }
973n/a self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
974n/a if (self->dialect == NULL) {
975n/a Py_DECREF(self);
976n/a return NULL;
977n/a }
978n/a
979n/a PyObject_GC_Track(self);
980n/a return (PyObject *)self;
981n/a}
982n/a
983n/a/*
984n/a * WRITER
985n/a */
986n/a/* ---------------------------------------------------------------- */
987n/astatic void
988n/ajoin_reset(WriterObj *self)
989n/a{
990n/a self->rec_len = 0;
991n/a self->num_fields = 0;
992n/a}
993n/a
994n/a#define MEM_INCR 32768
995n/a
996n/a/* Calculate new record length or append field to record. Return new
997n/a * record length.
998n/a */
999n/astatic Py_ssize_t
1000n/ajoin_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
1001n/a Py_ssize_t field_len, int *quoted,
1002n/a int copy_phase)
1003n/a{
1004n/a DialectObj *dialect = self->dialect;
1005n/a int i;
1006n/a Py_ssize_t rec_len;
1007n/a
1008n/a#define INCLEN \
1009n/a do {\
1010n/a if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1011n/a goto overflow; \
1012n/a } \
1013n/a rec_len++; \
1014n/a } while(0)
1015n/a
1016n/a#define ADDCH(c) \
1017n/a do {\
1018n/a if (copy_phase) \
1019n/a self->rec[rec_len] = c;\
1020n/a INCLEN;\
1021n/a } while(0)
1022n/a
1023n/a rec_len = self->rec_len;
1024n/a
1025n/a /* If this is not the first field we need a field separator */
1026n/a if (self->num_fields > 0)
1027n/a ADDCH(dialect->delimiter);
1028n/a
1029n/a /* Handle preceding quote */
1030n/a if (copy_phase && *quoted)
1031n/a ADDCH(dialect->quotechar);
1032n/a
1033n/a /* Copy/count field data */
1034n/a /* If field is null just pass over */
1035n/a for (i = 0; field_data && (i < field_len); i++) {
1036n/a Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1037n/a int want_escape = 0;
1038n/a
1039n/a if (c == dialect->delimiter ||
1040n/a c == dialect->escapechar ||
1041n/a c == dialect->quotechar ||
1042n/a PyUnicode_FindChar(
1043n/a dialect->lineterminator, c, 0,
1044n/a PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1045n/a if (dialect->quoting == QUOTE_NONE)
1046n/a want_escape = 1;
1047n/a else {
1048n/a if (c == dialect->quotechar) {
1049n/a if (dialect->doublequote)
1050n/a ADDCH(dialect->quotechar);
1051n/a else
1052n/a want_escape = 1;
1053n/a }
1054n/a if (!want_escape)
1055n/a *quoted = 1;
1056n/a }
1057n/a if (want_escape) {
1058n/a if (!dialect->escapechar) {
1059n/a PyErr_Format(_csvstate_global->error_obj,
1060n/a "need to escape, but no escapechar set");
1061n/a return -1;
1062n/a }
1063n/a ADDCH(dialect->escapechar);
1064n/a }
1065n/a }
1066n/a /* Copy field character into record buffer.
1067n/a */
1068n/a ADDCH(c);
1069n/a }
1070n/a
1071n/a if (*quoted) {
1072n/a if (copy_phase)
1073n/a ADDCH(dialect->quotechar);
1074n/a else {
1075n/a INCLEN; /* starting quote */
1076n/a INCLEN; /* ending quote */
1077n/a }
1078n/a }
1079n/a return rec_len;
1080n/a
1081n/a overflow:
1082n/a PyErr_NoMemory();
1083n/a return -1;
1084n/a#undef ADDCH
1085n/a#undef INCLEN
1086n/a}
1087n/a
1088n/astatic int
1089n/ajoin_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1090n/a{
1091n/a
1092n/a if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
1093n/a PyErr_NoMemory();
1094n/a return 0;
1095n/a }
1096n/a
1097n/a if (rec_len > self->rec_size) {
1098n/a if (self->rec_size == 0) {
1099n/a self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1100n/a if (self->rec != NULL)
1101n/a PyMem_Free(self->rec);
1102n/a self->rec = PyMem_New(Py_UCS4, self->rec_size);
1103n/a }
1104n/a else {
1105n/a Py_UCS4* old_rec = self->rec;
1106n/a
1107n/a self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1108n/a self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
1109n/a if (self->rec == NULL)
1110n/a PyMem_Free(old_rec);
1111n/a }
1112n/a if (self->rec == NULL) {
1113n/a PyErr_NoMemory();
1114n/a return 0;
1115n/a }
1116n/a }
1117n/a return 1;
1118n/a}
1119n/a
1120n/astatic int
1121n/ajoin_append(WriterObj *self, PyObject *field, int quoted)
1122n/a{
1123n/a unsigned int field_kind = -1;
1124n/a void *field_data = NULL;
1125n/a Py_ssize_t field_len = 0;
1126n/a Py_ssize_t rec_len;
1127n/a
1128n/a if (field != NULL) {
1129n/a if (PyUnicode_READY(field) == -1)
1130n/a return 0;
1131n/a field_kind = PyUnicode_KIND(field);
1132n/a field_data = PyUnicode_DATA(field);
1133n/a field_len = PyUnicode_GET_LENGTH(field);
1134n/a }
1135n/a rec_len = join_append_data(self, field_kind, field_data, field_len,
1136n/a &quoted, 0);
1137n/a if (rec_len < 0)
1138n/a return 0;
1139n/a
1140n/a /* grow record buffer if necessary */
1141n/a if (!join_check_rec_size(self, rec_len))
1142n/a return 0;
1143n/a
1144n/a self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1145n/a &quoted, 1);
1146n/a self->num_fields++;
1147n/a
1148n/a return 1;
1149n/a}
1150n/a
1151n/astatic int
1152n/ajoin_append_lineterminator(WriterObj *self)
1153n/a{
1154n/a Py_ssize_t terminator_len, i;
1155n/a unsigned int term_kind;
1156n/a void *term_data;
1157n/a
1158n/a terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1159n/a if (terminator_len == -1)
1160n/a return 0;
1161n/a
1162n/a /* grow record buffer if necessary */
1163n/a if (!join_check_rec_size(self, self->rec_len + terminator_len))
1164n/a return 0;
1165n/a
1166n/a term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1167n/a term_data = PyUnicode_DATA(self->dialect->lineterminator);
1168n/a for (i = 0; i < terminator_len; i++)
1169n/a self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1170n/a self->rec_len += terminator_len;
1171n/a
1172n/a return 1;
1173n/a}
1174n/a
1175n/aPyDoc_STRVAR(csv_writerow_doc,
1176n/a"writerow(iterable)\n"
1177n/a"\n"
1178n/a"Construct and write a CSV record from an iterable of fields. Non-string\n"
1179n/a"elements will be converted to string.");
1180n/a
1181n/astatic PyObject *
1182n/acsv_writerow(WriterObj *self, PyObject *seq)
1183n/a{
1184n/a DialectObj *dialect = self->dialect;
1185n/a PyObject *iter, *field, *line, *result;
1186n/a
1187n/a iter = PyObject_GetIter(seq);
1188n/a if (iter == NULL)
1189n/a return PyErr_Format(_csvstate_global->error_obj,
1190n/a "iterable expected, not %.200s",
1191n/a seq->ob_type->tp_name);
1192n/a
1193n/a /* Join all fields in internal buffer.
1194n/a */
1195n/a join_reset(self);
1196n/a while ((field = PyIter_Next(iter))) {
1197n/a int append_ok;
1198n/a int quoted;
1199n/a
1200n/a switch (dialect->quoting) {
1201n/a case QUOTE_NONNUMERIC:
1202n/a quoted = !PyNumber_Check(field);
1203n/a break;
1204n/a case QUOTE_ALL:
1205n/a quoted = 1;
1206n/a break;
1207n/a default:
1208n/a quoted = 0;
1209n/a break;
1210n/a }
1211n/a
1212n/a if (PyUnicode_Check(field)) {
1213n/a append_ok = join_append(self, field, quoted);
1214n/a Py_DECREF(field);
1215n/a }
1216n/a else if (field == Py_None) {
1217n/a append_ok = join_append(self, NULL, quoted);
1218n/a Py_DECREF(field);
1219n/a }
1220n/a else {
1221n/a PyObject *str;
1222n/a
1223n/a str = PyObject_Str(field);
1224n/a Py_DECREF(field);
1225n/a if (str == NULL) {
1226n/a Py_DECREF(iter);
1227n/a return NULL;
1228n/a }
1229n/a append_ok = join_append(self, str, quoted);
1230n/a Py_DECREF(str);
1231n/a }
1232n/a if (!append_ok) {
1233n/a Py_DECREF(iter);
1234n/a return NULL;
1235n/a }
1236n/a }
1237n/a Py_DECREF(iter);
1238n/a if (PyErr_Occurred())
1239n/a return NULL;
1240n/a
1241n/a if (self->num_fields > 0 && self->rec_size == 0) {
1242n/a if (dialect->quoting == QUOTE_NONE) {
1243n/a PyErr_Format(_csvstate_global->error_obj,
1244n/a "single empty field record must be quoted");
1245n/a return NULL;
1246n/a }
1247n/a self->num_fields--;
1248n/a if (!join_append(self, NULL, 1))
1249n/a return NULL;
1250n/a }
1251n/a
1252n/a /* Add line terminator.
1253n/a */
1254n/a if (!join_append_lineterminator(self))
1255n/a return NULL;
1256n/a
1257n/a line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1258n/a (void *) self->rec, self->rec_len);
1259n/a if (line == NULL)
1260n/a return NULL;
1261n/a result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1262n/a Py_DECREF(line);
1263n/a return result;
1264n/a}
1265n/a
1266n/aPyDoc_STRVAR(csv_writerows_doc,
1267n/a"writerows(iterable of iterables)\n"
1268n/a"\n"
1269n/a"Construct and write a series of iterables to a csv file. Non-string\n"
1270n/a"elements will be converted to string.");
1271n/a
1272n/astatic PyObject *
1273n/acsv_writerows(WriterObj *self, PyObject *seqseq)
1274n/a{
1275n/a PyObject *row_iter, *row_obj, *result;
1276n/a
1277n/a row_iter = PyObject_GetIter(seqseq);
1278n/a if (row_iter == NULL) {
1279n/a PyErr_SetString(PyExc_TypeError,
1280n/a "writerows() argument must be iterable");
1281n/a return NULL;
1282n/a }
1283n/a while ((row_obj = PyIter_Next(row_iter))) {
1284n/a result = csv_writerow(self, row_obj);
1285n/a Py_DECREF(row_obj);
1286n/a if (!result) {
1287n/a Py_DECREF(row_iter);
1288n/a return NULL;
1289n/a }
1290n/a else
1291n/a Py_DECREF(result);
1292n/a }
1293n/a Py_DECREF(row_iter);
1294n/a if (PyErr_Occurred())
1295n/a return NULL;
1296n/a Py_RETURN_NONE;
1297n/a}
1298n/a
1299n/astatic struct PyMethodDef Writer_methods[] = {
1300n/a { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1301n/a { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1302n/a { NULL, NULL }
1303n/a};
1304n/a
1305n/a#define W_OFF(x) offsetof(WriterObj, x)
1306n/a
1307n/astatic struct PyMemberDef Writer_memberlist[] = {
1308n/a { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1309n/a { NULL }
1310n/a};
1311n/a
1312n/astatic void
1313n/aWriter_dealloc(WriterObj *self)
1314n/a{
1315n/a PyObject_GC_UnTrack(self);
1316n/a Py_XDECREF(self->dialect);
1317n/a Py_XDECREF(self->writeline);
1318n/a if (self->rec != NULL)
1319n/a PyMem_Free(self->rec);
1320n/a PyObject_GC_Del(self);
1321n/a}
1322n/a
1323n/astatic int
1324n/aWriter_traverse(WriterObj *self, visitproc visit, void *arg)
1325n/a{
1326n/a Py_VISIT(self->dialect);
1327n/a Py_VISIT(self->writeline);
1328n/a return 0;
1329n/a}
1330n/a
1331n/astatic int
1332n/aWriter_clear(WriterObj *self)
1333n/a{
1334n/a Py_CLEAR(self->dialect);
1335n/a Py_CLEAR(self->writeline);
1336n/a return 0;
1337n/a}
1338n/a
1339n/aPyDoc_STRVAR(Writer_Type_doc,
1340n/a"CSV writer\n"
1341n/a"\n"
1342n/a"Writer objects are responsible for generating tabular data\n"
1343n/a"in CSV format from sequence input.\n"
1344n/a);
1345n/a
1346n/astatic PyTypeObject Writer_Type = {
1347n/a PyVarObject_HEAD_INIT(NULL, 0)
1348n/a "_csv.writer", /*tp_name*/
1349n/a sizeof(WriterObj), /*tp_basicsize*/
1350n/a 0, /*tp_itemsize*/
1351n/a /* methods */
1352n/a (destructor)Writer_dealloc, /*tp_dealloc*/
1353n/a (printfunc)0, /*tp_print*/
1354n/a (getattrfunc)0, /*tp_getattr*/
1355n/a (setattrfunc)0, /*tp_setattr*/
1356n/a 0, /*tp_reserved*/
1357n/a (reprfunc)0, /*tp_repr*/
1358n/a 0, /*tp_as_number*/
1359n/a 0, /*tp_as_sequence*/
1360n/a 0, /*tp_as_mapping*/
1361n/a (hashfunc)0, /*tp_hash*/
1362n/a (ternaryfunc)0, /*tp_call*/
1363n/a (reprfunc)0, /*tp_str*/
1364n/a 0, /*tp_getattro*/
1365n/a 0, /*tp_setattro*/
1366n/a 0, /*tp_as_buffer*/
1367n/a Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1368n/a Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1369n/a Writer_Type_doc,
1370n/a (traverseproc)Writer_traverse, /*tp_traverse*/
1371n/a (inquiry)Writer_clear, /*tp_clear*/
1372n/a 0, /*tp_richcompare*/
1373n/a 0, /*tp_weaklistoffset*/
1374n/a (getiterfunc)0, /*tp_iter*/
1375n/a (getiterfunc)0, /*tp_iternext*/
1376n/a Writer_methods, /*tp_methods*/
1377n/a Writer_memberlist, /*tp_members*/
1378n/a 0, /*tp_getset*/
1379n/a};
1380n/a
1381n/astatic PyObject *
1382n/acsv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1383n/a{
1384n/a PyObject * output_file, * dialect = NULL;
1385n/a WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1386n/a _Py_IDENTIFIER(write);
1387n/a
1388n/a if (!self)
1389n/a return NULL;
1390n/a
1391n/a self->dialect = NULL;
1392n/a self->writeline = NULL;
1393n/a
1394n/a self->rec = NULL;
1395n/a self->rec_size = 0;
1396n/a self->rec_len = 0;
1397n/a self->num_fields = 0;
1398n/a
1399n/a if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1400n/a Py_DECREF(self);
1401n/a return NULL;
1402n/a }
1403n/a self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
1404n/a if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1405n/a PyErr_SetString(PyExc_TypeError,
1406n/a "argument 1 must have a \"write\" method");
1407n/a Py_DECREF(self);
1408n/a return NULL;
1409n/a }
1410n/a self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1411n/a if (self->dialect == NULL) {
1412n/a Py_DECREF(self);
1413n/a return NULL;
1414n/a }
1415n/a PyObject_GC_Track(self);
1416n/a return (PyObject *)self;
1417n/a}
1418n/a
1419n/a/*
1420n/a * DIALECT REGISTRY
1421n/a */
1422n/astatic PyObject *
1423n/acsv_list_dialects(PyObject *module, PyObject *args)
1424n/a{
1425n/a return PyDict_Keys(_csvstate_global->dialects);
1426n/a}
1427n/a
1428n/astatic PyObject *
1429n/acsv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1430n/a{
1431n/a PyObject *name_obj, *dialect_obj = NULL;
1432n/a PyObject *dialect;
1433n/a
1434n/a if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1435n/a return NULL;
1436n/a if (!PyUnicode_Check(name_obj)) {
1437n/a PyErr_SetString(PyExc_TypeError,
1438n/a "dialect name must be a string");
1439n/a return NULL;
1440n/a }
1441n/a if (PyUnicode_READY(name_obj) == -1)
1442n/a return NULL;
1443n/a dialect = _call_dialect(dialect_obj, kwargs);
1444n/a if (dialect == NULL)
1445n/a return NULL;
1446n/a if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
1447n/a Py_DECREF(dialect);
1448n/a return NULL;
1449n/a }
1450n/a Py_DECREF(dialect);
1451n/a Py_RETURN_NONE;
1452n/a}
1453n/a
1454n/astatic PyObject *
1455n/acsv_unregister_dialect(PyObject *module, PyObject *name_obj)
1456n/a{
1457n/a if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1458n/a return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
1459n/a Py_RETURN_NONE;
1460n/a}
1461n/a
1462n/astatic PyObject *
1463n/acsv_get_dialect(PyObject *module, PyObject *name_obj)
1464n/a{
1465n/a return get_dialect_from_registry(name_obj);
1466n/a}
1467n/a
1468n/astatic PyObject *
1469n/acsv_field_size_limit(PyObject *module, PyObject *args)
1470n/a{
1471n/a PyObject *new_limit = NULL;
1472n/a long old_limit = _csvstate_global->field_limit;
1473n/a
1474n/a if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1475n/a return NULL;
1476n/a if (new_limit != NULL) {
1477n/a if (!PyLong_CheckExact(new_limit)) {
1478n/a PyErr_Format(PyExc_TypeError,
1479n/a "limit must be an integer");
1480n/a return NULL;
1481n/a }
1482n/a _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1483n/a if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1484n/a _csvstate_global->field_limit = old_limit;
1485n/a return NULL;
1486n/a }
1487n/a }
1488n/a return PyLong_FromLong(old_limit);
1489n/a}
1490n/a
1491n/a/*
1492n/a * MODULE
1493n/a */
1494n/a
1495n/aPyDoc_STRVAR(csv_module_doc,
1496n/a"CSV parsing and writing.\n"
1497n/a"\n"
1498n/a"This module provides classes that assist in the reading and writing\n"
1499n/a"of Comma Separated Value (CSV) files, and implements the interface\n"
1500n/a"described by PEP 305. Although many CSV files are simple to parse,\n"
1501n/a"the format is not formally defined by a stable specification and\n"
1502n/a"is subtle enough that parsing lines of a CSV file with something\n"
1503n/a"like line.split(\",\") is bound to fail. The module supports three\n"
1504n/a"basic APIs: reading, writing, and registration of dialects.\n"
1505n/a"\n"
1506n/a"\n"
1507n/a"DIALECT REGISTRATION:\n"
1508n/a"\n"
1509n/a"Readers and writers support a dialect argument, which is a convenient\n"
1510n/a"handle on a group of settings. When the dialect argument is a string,\n"
1511n/a"it identifies one of the dialects previously registered with the module.\n"
1512n/a"If it is a class or instance, the attributes of the argument are used as\n"
1513n/a"the settings for the reader or writer:\n"
1514n/a"\n"
1515n/a" class excel:\n"
1516n/a" delimiter = ','\n"
1517n/a" quotechar = '\"'\n"
1518n/a" escapechar = None\n"
1519n/a" doublequote = True\n"
1520n/a" skipinitialspace = False\n"
1521n/a" lineterminator = '\\r\\n'\n"
1522n/a" quoting = QUOTE_MINIMAL\n"
1523n/a"\n"
1524n/a"SETTINGS:\n"
1525n/a"\n"
1526n/a" * quotechar - specifies a one-character string to use as the \n"
1527n/a" quoting character. It defaults to '\"'.\n"
1528n/a" * delimiter - specifies a one-character string to use as the \n"
1529n/a" field separator. It defaults to ','.\n"
1530n/a" * skipinitialspace - specifies how to interpret whitespace which\n"
1531n/a" immediately follows a delimiter. It defaults to False, which\n"
1532n/a" means that whitespace immediately following a delimiter is part\n"
1533n/a" of the following field.\n"
1534n/a" * lineterminator - specifies the character sequence which should \n"
1535n/a" terminate rows.\n"
1536n/a" * quoting - controls when quotes should be generated by the writer.\n"
1537n/a" It can take on any of the following module constants:\n"
1538n/a"\n"
1539n/a" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1540n/a" field contains either the quotechar or the delimiter\n"
1541n/a" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1542n/a" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1543n/a" fields which do not parse as integers or floating point\n"
1544n/a" numbers.\n"
1545n/a" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1546n/a" * escapechar - specifies a one-character string used to escape \n"
1547n/a" the delimiter when quoting is set to QUOTE_NONE.\n"
1548n/a" * doublequote - controls the handling of quotes inside fields. When\n"
1549n/a" True, two consecutive quotes are interpreted as one during read,\n"
1550n/a" and when writing, each quote character embedded in the data is\n"
1551n/a" written as two quotes\n");
1552n/a
1553n/aPyDoc_STRVAR(csv_reader_doc,
1554n/a" csv_reader = reader(iterable [, dialect='excel']\n"
1555n/a" [optional keyword args])\n"
1556n/a" for row in csv_reader:\n"
1557n/a" process(row)\n"
1558n/a"\n"
1559n/a"The \"iterable\" argument can be any object that returns a line\n"
1560n/a"of input for each iteration, such as a file object or a list. The\n"
1561n/a"optional \"dialect\" parameter is discussed below. The function\n"
1562n/a"also accepts optional keyword arguments which override settings\n"
1563n/a"provided by the dialect.\n"
1564n/a"\n"
1565n/a"The returned object is an iterator. Each iteration returns a row\n"
1566n/a"of the CSV file (which can span multiple input lines).\n");
1567n/a
1568n/aPyDoc_STRVAR(csv_writer_doc,
1569n/a" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1570n/a" [optional keyword args])\n"
1571n/a" for row in sequence:\n"
1572n/a" csv_writer.writerow(row)\n"
1573n/a"\n"
1574n/a" [or]\n"
1575n/a"\n"
1576n/a" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1577n/a" [optional keyword args])\n"
1578n/a" csv_writer.writerows(rows)\n"
1579n/a"\n"
1580n/a"The \"fileobj\" argument can be any object that supports the file API.\n");
1581n/a
1582n/aPyDoc_STRVAR(csv_list_dialects_doc,
1583n/a"Return a list of all know dialect names.\n"
1584n/a" names = csv.list_dialects()");
1585n/a
1586n/aPyDoc_STRVAR(csv_get_dialect_doc,
1587n/a"Return the dialect instance associated with name.\n"
1588n/a" dialect = csv.get_dialect(name)");
1589n/a
1590n/aPyDoc_STRVAR(csv_register_dialect_doc,
1591n/a"Create a mapping from a string name to a dialect class.\n"
1592n/a" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1593n/a
1594n/aPyDoc_STRVAR(csv_unregister_dialect_doc,
1595n/a"Delete the name/dialect mapping associated with a string name.\n"
1596n/a" csv.unregister_dialect(name)");
1597n/a
1598n/aPyDoc_STRVAR(csv_field_size_limit_doc,
1599n/a"Sets an upper limit on parsed fields.\n"
1600n/a" csv.field_size_limit([limit])\n"
1601n/a"\n"
1602n/a"Returns old limit. If limit is not given, no new limit is set and\n"
1603n/a"the old limit is returned");
1604n/a
1605n/astatic struct PyMethodDef csv_methods[] = {
1606n/a { "reader", (PyCFunction)csv_reader,
1607n/a METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1608n/a { "writer", (PyCFunction)csv_writer,
1609n/a METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1610n/a { "list_dialects", (PyCFunction)csv_list_dialects,
1611n/a METH_NOARGS, csv_list_dialects_doc},
1612n/a { "register_dialect", (PyCFunction)csv_register_dialect,
1613n/a METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1614n/a { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1615n/a METH_O, csv_unregister_dialect_doc},
1616n/a { "get_dialect", (PyCFunction)csv_get_dialect,
1617n/a METH_O, csv_get_dialect_doc},
1618n/a { "field_size_limit", (PyCFunction)csv_field_size_limit,
1619n/a METH_VARARGS, csv_field_size_limit_doc},
1620n/a { NULL, NULL }
1621n/a};
1622n/a
1623n/astatic struct PyModuleDef _csvmodule = {
1624n/a PyModuleDef_HEAD_INIT,
1625n/a "_csv",
1626n/a csv_module_doc,
1627n/a sizeof(_csvstate),
1628n/a csv_methods,
1629n/a NULL,
1630n/a _csv_traverse,
1631n/a _csv_clear,
1632n/a _csv_free
1633n/a};
1634n/a
1635n/aPyMODINIT_FUNC
1636n/aPyInit__csv(void)
1637n/a{
1638n/a PyObject *module;
1639n/a const StyleDesc *style;
1640n/a
1641n/a if (PyType_Ready(&Dialect_Type) < 0)
1642n/a return NULL;
1643n/a
1644n/a if (PyType_Ready(&Reader_Type) < 0)
1645n/a return NULL;
1646n/a
1647n/a if (PyType_Ready(&Writer_Type) < 0)
1648n/a return NULL;
1649n/a
1650n/a /* Create the module and add the functions */
1651n/a module = PyModule_Create(&_csvmodule);
1652n/a if (module == NULL)
1653n/a return NULL;
1654n/a
1655n/a /* Add version to the module. */
1656n/a if (PyModule_AddStringConstant(module, "__version__",
1657n/a MODULE_VERSION) == -1)
1658n/a return NULL;
1659n/a
1660n/a /* Set the field limit */
1661n/a _csvstate(module)->field_limit = 128 * 1024;
1662n/a /* Do I still need to add this var to the Module Dict? */
1663n/a
1664n/a /* Add _dialects dictionary */
1665n/a _csvstate(module)->dialects = PyDict_New();
1666n/a if (_csvstate(module)->dialects == NULL)
1667n/a return NULL;
1668n/a Py_INCREF(_csvstate(module)->dialects);
1669n/a if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
1670n/a return NULL;
1671n/a
1672n/a /* Add quote styles into dictionary */
1673n/a for (style = quote_styles; style->name; style++) {
1674n/a if (PyModule_AddIntConstant(module, style->name,
1675n/a style->style) == -1)
1676n/a return NULL;
1677n/a }
1678n/a
1679n/a /* Add the Dialect type */
1680n/a Py_INCREF(&Dialect_Type);
1681n/a if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1682n/a return NULL;
1683n/a
1684n/a /* Add the CSV exception object to the module. */
1685n/a _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1686n/a if (_csvstate(module)->error_obj == NULL)
1687n/a return NULL;
1688n/a Py_INCREF(_csvstate(module)->error_obj);
1689n/a PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
1690n/a return module;
1691n/a}