1 | n/a | /* csv module */ |
---|
2 | n/a | |
---|
3 | n/a | /* |
---|
4 | n/a | |
---|
5 | n/a | This module provides the low-level underpinnings of a CSV reading/writing |
---|
6 | n/a | module. Users should not use this module directly, but import the csv.py |
---|
7 | n/a | module instead. |
---|
8 | n/a | |
---|
9 | n/a | */ |
---|
10 | n/a | |
---|
11 | n/a | #define MODULE_VERSION "1.0" |
---|
12 | n/a | |
---|
13 | n/a | #include "Python.h" |
---|
14 | n/a | #include "structmember.h" |
---|
15 | n/a | |
---|
16 | n/a | |
---|
17 | n/a | typedef struct { |
---|
18 | n/a | PyObject *error_obj; /* CSV exception */ |
---|
19 | n/a | PyObject *dialects; /* Dialect registry */ |
---|
20 | n/a | long field_limit; /* max parsed field size */ |
---|
21 | n/a | } _csvstate; |
---|
22 | n/a | |
---|
23 | n/a | #define _csvstate(o) ((_csvstate *)PyModule_GetState(o)) |
---|
24 | n/a | |
---|
25 | n/a | static int |
---|
26 | n/a | _csv_clear(PyObject *m) |
---|
27 | n/a | { |
---|
28 | n/a | Py_CLEAR(_csvstate(m)->error_obj); |
---|
29 | n/a | Py_CLEAR(_csvstate(m)->dialects); |
---|
30 | n/a | return 0; |
---|
31 | n/a | } |
---|
32 | n/a | |
---|
33 | n/a | static int |
---|
34 | n/a | _csv_traverse(PyObject *m, visitproc visit, void *arg) |
---|
35 | n/a | { |
---|
36 | n/a | Py_VISIT(_csvstate(m)->error_obj); |
---|
37 | n/a | Py_VISIT(_csvstate(m)->dialects); |
---|
38 | n/a | return 0; |
---|
39 | n/a | } |
---|
40 | n/a | |
---|
41 | n/a | static void |
---|
42 | n/a | _csv_free(void *m) |
---|
43 | n/a | { |
---|
44 | n/a | _csv_clear((PyObject *)m); |
---|
45 | n/a | } |
---|
46 | n/a | |
---|
47 | n/a | static struct PyModuleDef _csvmodule; |
---|
48 | n/a | |
---|
49 | n/a | #define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule))) |
---|
50 | n/a | |
---|
51 | n/a | typedef enum { |
---|
52 | n/a | START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, |
---|
53 | n/a | IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD, |
---|
54 | n/a | EAT_CRNL,AFTER_ESCAPED_CRNL |
---|
55 | n/a | } ParserState; |
---|
56 | n/a | |
---|
57 | n/a | typedef enum { |
---|
58 | n/a | QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE |
---|
59 | n/a | } QuoteStyle; |
---|
60 | n/a | |
---|
61 | n/a | typedef struct { |
---|
62 | n/a | QuoteStyle style; |
---|
63 | n/a | const char *name; |
---|
64 | n/a | } StyleDesc; |
---|
65 | n/a | |
---|
66 | n/a | static const StyleDesc quote_styles[] = { |
---|
67 | n/a | { QUOTE_MINIMAL, "QUOTE_MINIMAL" }, |
---|
68 | n/a | { QUOTE_ALL, "QUOTE_ALL" }, |
---|
69 | n/a | { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" }, |
---|
70 | n/a | { QUOTE_NONE, "QUOTE_NONE" }, |
---|
71 | n/a | { 0 } |
---|
72 | n/a | }; |
---|
73 | n/a | |
---|
74 | n/a | typedef struct { |
---|
75 | n/a | PyObject_HEAD |
---|
76 | n/a | |
---|
77 | n/a | int doublequote; /* is " represented by ""? */ |
---|
78 | n/a | Py_UCS4 delimiter; /* field separator */ |
---|
79 | n/a | Py_UCS4 quotechar; /* quote character */ |
---|
80 | n/a | Py_UCS4 escapechar; /* escape character */ |
---|
81 | n/a | int skipinitialspace; /* ignore spaces following delimiter? */ |
---|
82 | n/a | PyObject *lineterminator; /* string to write between records */ |
---|
83 | n/a | int quoting; /* style of quoting to write */ |
---|
84 | n/a | |
---|
85 | n/a | int strict; /* raise exception on bad CSV */ |
---|
86 | n/a | } DialectObj; |
---|
87 | n/a | |
---|
88 | n/a | static PyTypeObject Dialect_Type; |
---|
89 | n/a | |
---|
90 | n/a | typedef struct { |
---|
91 | n/a | PyObject_HEAD |
---|
92 | n/a | |
---|
93 | n/a | PyObject *input_iter; /* iterate over this for input lines */ |
---|
94 | n/a | |
---|
95 | n/a | DialectObj *dialect; /* parsing dialect */ |
---|
96 | n/a | |
---|
97 | n/a | PyObject *fields; /* field list for current record */ |
---|
98 | n/a | ParserState state; /* current CSV parse state */ |
---|
99 | n/a | Py_UCS4 *field; /* temporary buffer */ |
---|
100 | n/a | Py_ssize_t field_size; /* size of allocated buffer */ |
---|
101 | n/a | Py_ssize_t field_len; /* length of current field */ |
---|
102 | n/a | int numeric_field; /* treat field as numeric */ |
---|
103 | n/a | unsigned long line_num; /* Source-file line number */ |
---|
104 | n/a | } ReaderObj; |
---|
105 | n/a | |
---|
106 | n/a | static PyTypeObject Reader_Type; |
---|
107 | n/a | |
---|
108 | n/a | #define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type) |
---|
109 | n/a | |
---|
110 | n/a | typedef struct { |
---|
111 | n/a | PyObject_HEAD |
---|
112 | n/a | |
---|
113 | n/a | PyObject *writeline; /* write output lines to this file */ |
---|
114 | n/a | |
---|
115 | n/a | DialectObj *dialect; /* parsing dialect */ |
---|
116 | n/a | |
---|
117 | n/a | Py_UCS4 *rec; /* buffer for parser.join */ |
---|
118 | n/a | Py_ssize_t rec_size; /* size of allocated record */ |
---|
119 | n/a | Py_ssize_t rec_len; /* length of record */ |
---|
120 | n/a | int num_fields; /* number of fields in record */ |
---|
121 | n/a | } WriterObj; |
---|
122 | n/a | |
---|
123 | n/a | static PyTypeObject Writer_Type; |
---|
124 | n/a | |
---|
125 | n/a | /* |
---|
126 | n/a | * DIALECT class |
---|
127 | n/a | */ |
---|
128 | n/a | |
---|
129 | n/a | static PyObject * |
---|
130 | n/a | get_dialect_from_registry(PyObject * name_obj) |
---|
131 | n/a | { |
---|
132 | n/a | PyObject *dialect_obj; |
---|
133 | n/a | |
---|
134 | n/a | dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj); |
---|
135 | n/a | if (dialect_obj == NULL) { |
---|
136 | n/a | if (!PyErr_Occurred()) |
---|
137 | n/a | PyErr_Format(_csvstate_global->error_obj, "unknown dialect"); |
---|
138 | n/a | } |
---|
139 | n/a | else |
---|
140 | n/a | Py_INCREF(dialect_obj); |
---|
141 | n/a | return dialect_obj; |
---|
142 | n/a | } |
---|
143 | n/a | |
---|
144 | n/a | static PyObject * |
---|
145 | n/a | get_string(PyObject *str) |
---|
146 | n/a | { |
---|
147 | n/a | Py_XINCREF(str); |
---|
148 | n/a | return str; |
---|
149 | n/a | } |
---|
150 | n/a | |
---|
151 | n/a | static PyObject * |
---|
152 | n/a | get_nullchar_as_None(Py_UCS4 c) |
---|
153 | n/a | { |
---|
154 | n/a | if (c == '\0') { |
---|
155 | n/a | Py_RETURN_NONE; |
---|
156 | n/a | } |
---|
157 | n/a | else |
---|
158 | n/a | return PyUnicode_FromOrdinal(c); |
---|
159 | n/a | } |
---|
160 | n/a | |
---|
161 | n/a | static PyObject * |
---|
162 | n/a | Dialect_get_lineterminator(DialectObj *self) |
---|
163 | n/a | { |
---|
164 | n/a | return get_string(self->lineterminator); |
---|
165 | n/a | } |
---|
166 | n/a | |
---|
167 | n/a | static PyObject * |
---|
168 | n/a | Dialect_get_delimiter(DialectObj *self) |
---|
169 | n/a | { |
---|
170 | n/a | return get_nullchar_as_None(self->delimiter); |
---|
171 | n/a | } |
---|
172 | n/a | |
---|
173 | n/a | static PyObject * |
---|
174 | n/a | Dialect_get_escapechar(DialectObj *self) |
---|
175 | n/a | { |
---|
176 | n/a | return get_nullchar_as_None(self->escapechar); |
---|
177 | n/a | } |
---|
178 | n/a | |
---|
179 | n/a | static PyObject * |
---|
180 | n/a | Dialect_get_quotechar(DialectObj *self) |
---|
181 | n/a | { |
---|
182 | n/a | return get_nullchar_as_None(self->quotechar); |
---|
183 | n/a | } |
---|
184 | n/a | |
---|
185 | n/a | static PyObject * |
---|
186 | n/a | Dialect_get_quoting(DialectObj *self) |
---|
187 | n/a | { |
---|
188 | n/a | return PyLong_FromLong(self->quoting); |
---|
189 | n/a | } |
---|
190 | n/a | |
---|
191 | n/a | static int |
---|
192 | n/a | _set_bool(const char *name, int *target, PyObject *src, int dflt) |
---|
193 | n/a | { |
---|
194 | n/a | if (src == NULL) |
---|
195 | n/a | *target = dflt; |
---|
196 | n/a | else { |
---|
197 | n/a | int b = PyObject_IsTrue(src); |
---|
198 | n/a | if (b < 0) |
---|
199 | n/a | return -1; |
---|
200 | n/a | *target = b; |
---|
201 | n/a | } |
---|
202 | n/a | return 0; |
---|
203 | n/a | } |
---|
204 | n/a | |
---|
205 | n/a | static int |
---|
206 | n/a | _set_int(const char *name, int *target, PyObject *src, int dflt) |
---|
207 | n/a | { |
---|
208 | n/a | if (src == NULL) |
---|
209 | n/a | *target = dflt; |
---|
210 | n/a | else { |
---|
211 | n/a | int value; |
---|
212 | n/a | if (!PyLong_CheckExact(src)) { |
---|
213 | n/a | PyErr_Format(PyExc_TypeError, |
---|
214 | n/a | "\"%s\" must be an integer", name); |
---|
215 | n/a | return -1; |
---|
216 | n/a | } |
---|
217 | n/a | value = _PyLong_AsInt(src); |
---|
218 | n/a | if (value == -1 && PyErr_Occurred()) { |
---|
219 | n/a | return -1; |
---|
220 | n/a | } |
---|
221 | n/a | *target = value; |
---|
222 | n/a | } |
---|
223 | n/a | return 0; |
---|
224 | n/a | } |
---|
225 | n/a | |
---|
226 | n/a | static int |
---|
227 | n/a | _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) |
---|
228 | n/a | { |
---|
229 | n/a | if (src == NULL) |
---|
230 | n/a | *target = dflt; |
---|
231 | n/a | else { |
---|
232 | n/a | *target = '\0'; |
---|
233 | n/a | if (src != Py_None) { |
---|
234 | n/a | Py_ssize_t len; |
---|
235 | n/a | if (!PyUnicode_Check(src)) { |
---|
236 | n/a | PyErr_Format(PyExc_TypeError, |
---|
237 | n/a | "\"%s\" must be string, not %.200s", name, |
---|
238 | n/a | src->ob_type->tp_name); |
---|
239 | n/a | return -1; |
---|
240 | n/a | } |
---|
241 | n/a | len = PyUnicode_GetLength(src); |
---|
242 | n/a | if (len > 1) { |
---|
243 | n/a | PyErr_Format(PyExc_TypeError, |
---|
244 | n/a | "\"%s\" must be a 1-character string", |
---|
245 | n/a | name); |
---|
246 | n/a | return -1; |
---|
247 | n/a | } |
---|
248 | n/a | /* PyUnicode_READY() is called in PyUnicode_GetLength() */ |
---|
249 | n/a | if (len > 0) |
---|
250 | n/a | *target = PyUnicode_READ_CHAR(src, 0); |
---|
251 | n/a | } |
---|
252 | n/a | } |
---|
253 | n/a | return 0; |
---|
254 | n/a | } |
---|
255 | n/a | |
---|
256 | n/a | static int |
---|
257 | n/a | _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) |
---|
258 | n/a | { |
---|
259 | n/a | if (src == NULL) |
---|
260 | n/a | *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL); |
---|
261 | n/a | else { |
---|
262 | n/a | if (src == Py_None) |
---|
263 | n/a | *target = NULL; |
---|
264 | n/a | else if (!PyUnicode_Check(src)) { |
---|
265 | n/a | PyErr_Format(PyExc_TypeError, |
---|
266 | n/a | "\"%s\" must be a string", name); |
---|
267 | n/a | return -1; |
---|
268 | n/a | } |
---|
269 | n/a | else { |
---|
270 | n/a | if (PyUnicode_READY(src) == -1) |
---|
271 | n/a | return -1; |
---|
272 | n/a | Py_INCREF(src); |
---|
273 | n/a | Py_XSETREF(*target, src); |
---|
274 | n/a | } |
---|
275 | n/a | } |
---|
276 | n/a | return 0; |
---|
277 | n/a | } |
---|
278 | n/a | |
---|
279 | n/a | static int |
---|
280 | n/a | dialect_check_quoting(int quoting) |
---|
281 | n/a | { |
---|
282 | n/a | const StyleDesc *qs; |
---|
283 | n/a | |
---|
284 | n/a | for (qs = quote_styles; qs->name; qs++) { |
---|
285 | n/a | if ((int)qs->style == quoting) |
---|
286 | n/a | return 0; |
---|
287 | n/a | } |
---|
288 | n/a | PyErr_Format(PyExc_TypeError, "bad \"quoting\" value"); |
---|
289 | n/a | return -1; |
---|
290 | n/a | } |
---|
291 | n/a | |
---|
292 | n/a | #define D_OFF(x) offsetof(DialectObj, x) |
---|
293 | n/a | |
---|
294 | n/a | static struct PyMemberDef Dialect_memberlist[] = { |
---|
295 | n/a | { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY }, |
---|
296 | n/a | { "doublequote", T_INT, D_OFF(doublequote), READONLY }, |
---|
297 | n/a | { "strict", T_INT, D_OFF(strict), READONLY }, |
---|
298 | n/a | { NULL } |
---|
299 | n/a | }; |
---|
300 | n/a | |
---|
301 | n/a | static PyGetSetDef Dialect_getsetlist[] = { |
---|
302 | n/a | { "delimiter", (getter)Dialect_get_delimiter}, |
---|
303 | n/a | { "escapechar", (getter)Dialect_get_escapechar}, |
---|
304 | n/a | { "lineterminator", (getter)Dialect_get_lineterminator}, |
---|
305 | n/a | { "quotechar", (getter)Dialect_get_quotechar}, |
---|
306 | n/a | { "quoting", (getter)Dialect_get_quoting}, |
---|
307 | n/a | {NULL}, |
---|
308 | n/a | }; |
---|
309 | n/a | |
---|
310 | n/a | static void |
---|
311 | n/a | Dialect_dealloc(DialectObj *self) |
---|
312 | n/a | { |
---|
313 | n/a | Py_XDECREF(self->lineterminator); |
---|
314 | n/a | Py_TYPE(self)->tp_free((PyObject *)self); |
---|
315 | n/a | } |
---|
316 | n/a | |
---|
317 | n/a | static char *dialect_kws[] = { |
---|
318 | n/a | "dialect", |
---|
319 | n/a | "delimiter", |
---|
320 | n/a | "doublequote", |
---|
321 | n/a | "escapechar", |
---|
322 | n/a | "lineterminator", |
---|
323 | n/a | "quotechar", |
---|
324 | n/a | "quoting", |
---|
325 | n/a | "skipinitialspace", |
---|
326 | n/a | "strict", |
---|
327 | n/a | NULL |
---|
328 | n/a | }; |
---|
329 | n/a | |
---|
330 | n/a | static PyObject * |
---|
331 | n/a | dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) |
---|
332 | n/a | { |
---|
333 | n/a | DialectObj *self; |
---|
334 | n/a | PyObject *ret = NULL; |
---|
335 | n/a | PyObject *dialect = NULL; |
---|
336 | n/a | PyObject *delimiter = NULL; |
---|
337 | n/a | PyObject *doublequote = NULL; |
---|
338 | n/a | PyObject *escapechar = NULL; |
---|
339 | n/a | PyObject *lineterminator = NULL; |
---|
340 | n/a | PyObject *quotechar = NULL; |
---|
341 | n/a | PyObject *quoting = NULL; |
---|
342 | n/a | PyObject *skipinitialspace = NULL; |
---|
343 | n/a | PyObject *strict = NULL; |
---|
344 | n/a | |
---|
345 | n/a | if (!PyArg_ParseTupleAndKeywords(args, kwargs, |
---|
346 | n/a | "|OOOOOOOOO", dialect_kws, |
---|
347 | n/a | &dialect, |
---|
348 | n/a | &delimiter, |
---|
349 | n/a | &doublequote, |
---|
350 | n/a | &escapechar, |
---|
351 | n/a | &lineterminator, |
---|
352 | n/a | "echar, |
---|
353 | n/a | "ing, |
---|
354 | n/a | &skipinitialspace, |
---|
355 | n/a | &strict)) |
---|
356 | n/a | return NULL; |
---|
357 | n/a | |
---|
358 | n/a | if (dialect != NULL) { |
---|
359 | n/a | if (PyUnicode_Check(dialect)) { |
---|
360 | n/a | dialect = get_dialect_from_registry(dialect); |
---|
361 | n/a | if (dialect == NULL) |
---|
362 | n/a | return NULL; |
---|
363 | n/a | } |
---|
364 | n/a | else |
---|
365 | n/a | Py_INCREF(dialect); |
---|
366 | n/a | /* Can we reuse this instance? */ |
---|
367 | n/a | if (PyObject_TypeCheck(dialect, &Dialect_Type) && |
---|
368 | n/a | delimiter == 0 && |
---|
369 | n/a | doublequote == 0 && |
---|
370 | n/a | escapechar == 0 && |
---|
371 | n/a | lineterminator == 0 && |
---|
372 | n/a | quotechar == 0 && |
---|
373 | n/a | quoting == 0 && |
---|
374 | n/a | skipinitialspace == 0 && |
---|
375 | n/a | strict == 0) |
---|
376 | n/a | return dialect; |
---|
377 | n/a | } |
---|
378 | n/a | |
---|
379 | n/a | self = (DialectObj *)type->tp_alloc(type, 0); |
---|
380 | n/a | if (self == NULL) { |
---|
381 | n/a | Py_XDECREF(dialect); |
---|
382 | n/a | return NULL; |
---|
383 | n/a | } |
---|
384 | n/a | self->lineterminator = NULL; |
---|
385 | n/a | |
---|
386 | n/a | Py_XINCREF(delimiter); |
---|
387 | n/a | Py_XINCREF(doublequote); |
---|
388 | n/a | Py_XINCREF(escapechar); |
---|
389 | n/a | Py_XINCREF(lineterminator); |
---|
390 | n/a | Py_XINCREF(quotechar); |
---|
391 | n/a | Py_XINCREF(quoting); |
---|
392 | n/a | Py_XINCREF(skipinitialspace); |
---|
393 | n/a | Py_XINCREF(strict); |
---|
394 | n/a | if (dialect != NULL) { |
---|
395 | n/a | #define DIALECT_GETATTR(v, n) \ |
---|
396 | n/a | if (v == NULL) \ |
---|
397 | n/a | v = PyObject_GetAttrString(dialect, n) |
---|
398 | n/a | DIALECT_GETATTR(delimiter, "delimiter"); |
---|
399 | n/a | DIALECT_GETATTR(doublequote, "doublequote"); |
---|
400 | n/a | DIALECT_GETATTR(escapechar, "escapechar"); |
---|
401 | n/a | DIALECT_GETATTR(lineterminator, "lineterminator"); |
---|
402 | n/a | DIALECT_GETATTR(quotechar, "quotechar"); |
---|
403 | n/a | DIALECT_GETATTR(quoting, "quoting"); |
---|
404 | n/a | DIALECT_GETATTR(skipinitialspace, "skipinitialspace"); |
---|
405 | n/a | DIALECT_GETATTR(strict, "strict"); |
---|
406 | n/a | PyErr_Clear(); |
---|
407 | n/a | } |
---|
408 | n/a | |
---|
409 | n/a | /* check types and convert to C values */ |
---|
410 | n/a | #define DIASET(meth, name, target, src, dflt) \ |
---|
411 | n/a | if (meth(name, target, src, dflt)) \ |
---|
412 | n/a | goto err |
---|
413 | n/a | DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ','); |
---|
414 | n/a | DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1); |
---|
415 | n/a | DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0); |
---|
416 | n/a | DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n"); |
---|
417 | n/a | DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"'); |
---|
418 | n/a | DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL); |
---|
419 | n/a | DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0); |
---|
420 | n/a | DIASET(_set_bool, "strict", &self->strict, strict, 0); |
---|
421 | n/a | |
---|
422 | n/a | /* validate options */ |
---|
423 | n/a | if (dialect_check_quoting(self->quoting)) |
---|
424 | n/a | goto err; |
---|
425 | n/a | if (self->delimiter == 0) { |
---|
426 | n/a | PyErr_SetString(PyExc_TypeError, |
---|
427 | n/a | "\"delimiter\" must be a 1-character string"); |
---|
428 | n/a | goto err; |
---|
429 | n/a | } |
---|
430 | n/a | if (quotechar == Py_None && quoting == NULL) |
---|
431 | n/a | self->quoting = QUOTE_NONE; |
---|
432 | n/a | if (self->quoting != QUOTE_NONE && self->quotechar == 0) { |
---|
433 | n/a | PyErr_SetString(PyExc_TypeError, |
---|
434 | n/a | "quotechar must be set if quoting enabled"); |
---|
435 | n/a | goto err; |
---|
436 | n/a | } |
---|
437 | n/a | if (self->lineterminator == 0) { |
---|
438 | n/a | PyErr_SetString(PyExc_TypeError, "lineterminator must be set"); |
---|
439 | n/a | goto err; |
---|
440 | n/a | } |
---|
441 | n/a | |
---|
442 | n/a | ret = (PyObject *)self; |
---|
443 | n/a | Py_INCREF(self); |
---|
444 | n/a | err: |
---|
445 | n/a | Py_XDECREF(self); |
---|
446 | n/a | Py_XDECREF(dialect); |
---|
447 | n/a | Py_XDECREF(delimiter); |
---|
448 | n/a | Py_XDECREF(doublequote); |
---|
449 | n/a | Py_XDECREF(escapechar); |
---|
450 | n/a | Py_XDECREF(lineterminator); |
---|
451 | n/a | Py_XDECREF(quotechar); |
---|
452 | n/a | Py_XDECREF(quoting); |
---|
453 | n/a | Py_XDECREF(skipinitialspace); |
---|
454 | n/a | Py_XDECREF(strict); |
---|
455 | n/a | return ret; |
---|
456 | n/a | } |
---|
457 | n/a | |
---|
458 | n/a | |
---|
459 | n/a | PyDoc_STRVAR(Dialect_Type_doc, |
---|
460 | n/a | "CSV dialect\n" |
---|
461 | n/a | "\n" |
---|
462 | n/a | "The Dialect type records CSV parsing and generation options.\n"); |
---|
463 | n/a | |
---|
464 | n/a | static PyTypeObject Dialect_Type = { |
---|
465 | n/a | PyVarObject_HEAD_INIT(NULL, 0) |
---|
466 | n/a | "_csv.Dialect", /* tp_name */ |
---|
467 | n/a | sizeof(DialectObj), /* tp_basicsize */ |
---|
468 | n/a | 0, /* tp_itemsize */ |
---|
469 | n/a | /* methods */ |
---|
470 | n/a | (destructor)Dialect_dealloc, /* tp_dealloc */ |
---|
471 | n/a | (printfunc)0, /* tp_print */ |
---|
472 | n/a | (getattrfunc)0, /* tp_getattr */ |
---|
473 | n/a | (setattrfunc)0, /* tp_setattr */ |
---|
474 | n/a | 0, /* tp_reserved */ |
---|
475 | n/a | (reprfunc)0, /* tp_repr */ |
---|
476 | n/a | 0, /* tp_as_number */ |
---|
477 | n/a | 0, /* tp_as_sequence */ |
---|
478 | n/a | 0, /* tp_as_mapping */ |
---|
479 | n/a | (hashfunc)0, /* tp_hash */ |
---|
480 | n/a | (ternaryfunc)0, /* tp_call */ |
---|
481 | n/a | (reprfunc)0, /* tp_str */ |
---|
482 | n/a | 0, /* tp_getattro */ |
---|
483 | n/a | 0, /* tp_setattro */ |
---|
484 | n/a | 0, /* tp_as_buffer */ |
---|
485 | n/a | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
---|
486 | n/a | Dialect_Type_doc, /* tp_doc */ |
---|
487 | n/a | 0, /* tp_traverse */ |
---|
488 | n/a | 0, /* tp_clear */ |
---|
489 | n/a | 0, /* tp_richcompare */ |
---|
490 | n/a | 0, /* tp_weaklistoffset */ |
---|
491 | n/a | 0, /* tp_iter */ |
---|
492 | n/a | 0, /* tp_iternext */ |
---|
493 | n/a | 0, /* tp_methods */ |
---|
494 | n/a | Dialect_memberlist, /* tp_members */ |
---|
495 | n/a | Dialect_getsetlist, /* tp_getset */ |
---|
496 | n/a | 0, /* tp_base */ |
---|
497 | n/a | 0, /* tp_dict */ |
---|
498 | n/a | 0, /* tp_descr_get */ |
---|
499 | n/a | 0, /* tp_descr_set */ |
---|
500 | n/a | 0, /* tp_dictoffset */ |
---|
501 | n/a | 0, /* tp_init */ |
---|
502 | n/a | 0, /* tp_alloc */ |
---|
503 | n/a | dialect_new, /* tp_new */ |
---|
504 | n/a | 0, /* tp_free */ |
---|
505 | n/a | }; |
---|
506 | n/a | |
---|
507 | n/a | /* |
---|
508 | n/a | * Return an instance of the dialect type, given a Python instance or kwarg |
---|
509 | n/a | * description of the dialect |
---|
510 | n/a | */ |
---|
511 | n/a | static PyObject * |
---|
512 | n/a | _call_dialect(PyObject *dialect_inst, PyObject *kwargs) |
---|
513 | n/a | { |
---|
514 | n/a | PyObject *type = (PyObject *)&Dialect_Type; |
---|
515 | n/a | if (dialect_inst) { |
---|
516 | n/a | return _PyObject_FastCallDict(type, &dialect_inst, 1, kwargs); |
---|
517 | n/a | } |
---|
518 | n/a | else { |
---|
519 | n/a | return _PyObject_FastCallDict(type, NULL, 0, kwargs); |
---|
520 | n/a | } |
---|
521 | n/a | } |
---|
522 | n/a | |
---|
523 | n/a | /* |
---|
524 | n/a | * READER |
---|
525 | n/a | */ |
---|
526 | n/a | static int |
---|
527 | n/a | parse_save_field(ReaderObj *self) |
---|
528 | n/a | { |
---|
529 | n/a | PyObject *field; |
---|
530 | n/a | |
---|
531 | n/a | field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, |
---|
532 | n/a | (void *) self->field, self->field_len); |
---|
533 | n/a | if (field == NULL) |
---|
534 | n/a | return -1; |
---|
535 | n/a | self->field_len = 0; |
---|
536 | n/a | if (self->numeric_field) { |
---|
537 | n/a | PyObject *tmp; |
---|
538 | n/a | |
---|
539 | n/a | self->numeric_field = 0; |
---|
540 | n/a | tmp = PyNumber_Float(field); |
---|
541 | n/a | Py_DECREF(field); |
---|
542 | n/a | if (tmp == NULL) |
---|
543 | n/a | return -1; |
---|
544 | n/a | field = tmp; |
---|
545 | n/a | } |
---|
546 | n/a | if (PyList_Append(self->fields, field) < 0) { |
---|
547 | n/a | Py_DECREF(field); |
---|
548 | n/a | return -1; |
---|
549 | n/a | } |
---|
550 | n/a | Py_DECREF(field); |
---|
551 | n/a | return 0; |
---|
552 | n/a | } |
---|
553 | n/a | |
---|
554 | n/a | static int |
---|
555 | n/a | parse_grow_buff(ReaderObj *self) |
---|
556 | n/a | { |
---|
557 | n/a | if (self->field_size == 0) { |
---|
558 | n/a | self->field_size = 4096; |
---|
559 | n/a | if (self->field != NULL) |
---|
560 | n/a | PyMem_Free(self->field); |
---|
561 | n/a | self->field = PyMem_New(Py_UCS4, self->field_size); |
---|
562 | n/a | } |
---|
563 | n/a | else { |
---|
564 | n/a | Py_UCS4 *field = self->field; |
---|
565 | n/a | if (self->field_size > PY_SSIZE_T_MAX / 2) { |
---|
566 | n/a | PyErr_NoMemory(); |
---|
567 | n/a | return 0; |
---|
568 | n/a | } |
---|
569 | n/a | self->field_size *= 2; |
---|
570 | n/a | self->field = PyMem_Resize(field, Py_UCS4, self->field_size); |
---|
571 | n/a | } |
---|
572 | n/a | if (self->field == NULL) { |
---|
573 | n/a | PyErr_NoMemory(); |
---|
574 | n/a | return 0; |
---|
575 | n/a | } |
---|
576 | n/a | return 1; |
---|
577 | n/a | } |
---|
578 | n/a | |
---|
579 | n/a | static int |
---|
580 | n/a | parse_add_char(ReaderObj *self, Py_UCS4 c) |
---|
581 | n/a | { |
---|
582 | n/a | if (self->field_len >= _csvstate_global->field_limit) { |
---|
583 | n/a | PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)", |
---|
584 | n/a | _csvstate_global->field_limit); |
---|
585 | n/a | return -1; |
---|
586 | n/a | } |
---|
587 | n/a | if (self->field_len == self->field_size && !parse_grow_buff(self)) |
---|
588 | n/a | return -1; |
---|
589 | n/a | self->field[self->field_len++] = c; |
---|
590 | n/a | return 0; |
---|
591 | n/a | } |
---|
592 | n/a | |
---|
593 | n/a | static int |
---|
594 | n/a | parse_process_char(ReaderObj *self, Py_UCS4 c) |
---|
595 | n/a | { |
---|
596 | n/a | DialectObj *dialect = self->dialect; |
---|
597 | n/a | |
---|
598 | n/a | switch (self->state) { |
---|
599 | n/a | case START_RECORD: |
---|
600 | n/a | /* start of record */ |
---|
601 | n/a | if (c == '\0') |
---|
602 | n/a | /* empty line - return [] */ |
---|
603 | n/a | break; |
---|
604 | n/a | else if (c == '\n' || c == '\r') { |
---|
605 | n/a | self->state = EAT_CRNL; |
---|
606 | n/a | break; |
---|
607 | n/a | } |
---|
608 | n/a | /* normal character - handle as START_FIELD */ |
---|
609 | n/a | self->state = START_FIELD; |
---|
610 | n/a | /* fallthru */ |
---|
611 | n/a | case START_FIELD: |
---|
612 | n/a | /* expecting field */ |
---|
613 | n/a | if (c == '\n' || c == '\r' || c == '\0') { |
---|
614 | n/a | /* save empty field - return [fields] */ |
---|
615 | n/a | if (parse_save_field(self) < 0) |
---|
616 | n/a | return -1; |
---|
617 | n/a | self->state = (c == '\0' ? START_RECORD : EAT_CRNL); |
---|
618 | n/a | } |
---|
619 | n/a | else if (c == dialect->quotechar && |
---|
620 | n/a | dialect->quoting != QUOTE_NONE) { |
---|
621 | n/a | /* start quoted field */ |
---|
622 | n/a | self->state = IN_QUOTED_FIELD; |
---|
623 | n/a | } |
---|
624 | n/a | else if (c == dialect->escapechar) { |
---|
625 | n/a | /* possible escaped character */ |
---|
626 | n/a | self->state = ESCAPED_CHAR; |
---|
627 | n/a | } |
---|
628 | n/a | else if (c == ' ' && dialect->skipinitialspace) |
---|
629 | n/a | /* ignore space at start of field */ |
---|
630 | n/a | ; |
---|
631 | n/a | else if (c == dialect->delimiter) { |
---|
632 | n/a | /* save empty field */ |
---|
633 | n/a | if (parse_save_field(self) < 0) |
---|
634 | n/a | return -1; |
---|
635 | n/a | } |
---|
636 | n/a | else { |
---|
637 | n/a | /* begin new unquoted field */ |
---|
638 | n/a | if (dialect->quoting == QUOTE_NONNUMERIC) |
---|
639 | n/a | self->numeric_field = 1; |
---|
640 | n/a | if (parse_add_char(self, c) < 0) |
---|
641 | n/a | return -1; |
---|
642 | n/a | self->state = IN_FIELD; |
---|
643 | n/a | } |
---|
644 | n/a | break; |
---|
645 | n/a | |
---|
646 | n/a | case ESCAPED_CHAR: |
---|
647 | n/a | if (c == '\n' || c=='\r') { |
---|
648 | n/a | if (parse_add_char(self, c) < 0) |
---|
649 | n/a | return -1; |
---|
650 | n/a | self->state = AFTER_ESCAPED_CRNL; |
---|
651 | n/a | break; |
---|
652 | n/a | } |
---|
653 | n/a | if (c == '\0') |
---|
654 | n/a | c = '\n'; |
---|
655 | n/a | if (parse_add_char(self, c) < 0) |
---|
656 | n/a | return -1; |
---|
657 | n/a | self->state = IN_FIELD; |
---|
658 | n/a | break; |
---|
659 | n/a | |
---|
660 | n/a | case AFTER_ESCAPED_CRNL: |
---|
661 | n/a | if (c == '\0') |
---|
662 | n/a | break; |
---|
663 | n/a | /*fallthru*/ |
---|
664 | n/a | |
---|
665 | n/a | case IN_FIELD: |
---|
666 | n/a | /* in unquoted field */ |
---|
667 | n/a | if (c == '\n' || c == '\r' || c == '\0') { |
---|
668 | n/a | /* end of line - return [fields] */ |
---|
669 | n/a | if (parse_save_field(self) < 0) |
---|
670 | n/a | return -1; |
---|
671 | n/a | self->state = (c == '\0' ? START_RECORD : EAT_CRNL); |
---|
672 | n/a | } |
---|
673 | n/a | else if (c == dialect->escapechar) { |
---|
674 | n/a | /* possible escaped character */ |
---|
675 | n/a | self->state = ESCAPED_CHAR; |
---|
676 | n/a | } |
---|
677 | n/a | else if (c == dialect->delimiter) { |
---|
678 | n/a | /* save field - wait for new field */ |
---|
679 | n/a | if (parse_save_field(self) < 0) |
---|
680 | n/a | return -1; |
---|
681 | n/a | self->state = START_FIELD; |
---|
682 | n/a | } |
---|
683 | n/a | else { |
---|
684 | n/a | /* normal character - save in field */ |
---|
685 | n/a | if (parse_add_char(self, c) < 0) |
---|
686 | n/a | return -1; |
---|
687 | n/a | } |
---|
688 | n/a | break; |
---|
689 | n/a | |
---|
690 | n/a | case IN_QUOTED_FIELD: |
---|
691 | n/a | /* in quoted field */ |
---|
692 | n/a | if (c == '\0') |
---|
693 | n/a | ; |
---|
694 | n/a | else if (c == dialect->escapechar) { |
---|
695 | n/a | /* Possible escape character */ |
---|
696 | n/a | self->state = ESCAPE_IN_QUOTED_FIELD; |
---|
697 | n/a | } |
---|
698 | n/a | else if (c == dialect->quotechar && |
---|
699 | n/a | dialect->quoting != QUOTE_NONE) { |
---|
700 | n/a | if (dialect->doublequote) { |
---|
701 | n/a | /* doublequote; " represented by "" */ |
---|
702 | n/a | self->state = QUOTE_IN_QUOTED_FIELD; |
---|
703 | n/a | } |
---|
704 | n/a | else { |
---|
705 | n/a | /* end of quote part of field */ |
---|
706 | n/a | self->state = IN_FIELD; |
---|
707 | n/a | } |
---|
708 | n/a | } |
---|
709 | n/a | else { |
---|
710 | n/a | /* normal character - save in field */ |
---|
711 | n/a | if (parse_add_char(self, c) < 0) |
---|
712 | n/a | return -1; |
---|
713 | n/a | } |
---|
714 | n/a | break; |
---|
715 | n/a | |
---|
716 | n/a | case ESCAPE_IN_QUOTED_FIELD: |
---|
717 | n/a | if (c == '\0') |
---|
718 | n/a | c = '\n'; |
---|
719 | n/a | if (parse_add_char(self, c) < 0) |
---|
720 | n/a | return -1; |
---|
721 | n/a | self->state = IN_QUOTED_FIELD; |
---|
722 | n/a | break; |
---|
723 | n/a | |
---|
724 | n/a | case QUOTE_IN_QUOTED_FIELD: |
---|
725 | n/a | /* doublequote - seen a quote in a quoted field */ |
---|
726 | n/a | if (dialect->quoting != QUOTE_NONE && |
---|
727 | n/a | c == dialect->quotechar) { |
---|
728 | n/a | /* save "" as " */ |
---|
729 | n/a | if (parse_add_char(self, c) < 0) |
---|
730 | n/a | return -1; |
---|
731 | n/a | self->state = IN_QUOTED_FIELD; |
---|
732 | n/a | } |
---|
733 | n/a | else if (c == dialect->delimiter) { |
---|
734 | n/a | /* save field - wait for new field */ |
---|
735 | n/a | if (parse_save_field(self) < 0) |
---|
736 | n/a | return -1; |
---|
737 | n/a | self->state = START_FIELD; |
---|
738 | n/a | } |
---|
739 | n/a | else if (c == '\n' || c == '\r' || c == '\0') { |
---|
740 | n/a | /* end of line - return [fields] */ |
---|
741 | n/a | if (parse_save_field(self) < 0) |
---|
742 | n/a | return -1; |
---|
743 | n/a | self->state = (c == '\0' ? START_RECORD : EAT_CRNL); |
---|
744 | n/a | } |
---|
745 | n/a | else if (!dialect->strict) { |
---|
746 | n/a | if (parse_add_char(self, c) < 0) |
---|
747 | n/a | return -1; |
---|
748 | n/a | self->state = IN_FIELD; |
---|
749 | n/a | } |
---|
750 | n/a | else { |
---|
751 | n/a | /* illegal */ |
---|
752 | n/a | PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'", |
---|
753 | n/a | dialect->delimiter, |
---|
754 | n/a | dialect->quotechar); |
---|
755 | n/a | return -1; |
---|
756 | n/a | } |
---|
757 | n/a | break; |
---|
758 | n/a | |
---|
759 | n/a | case EAT_CRNL: |
---|
760 | n/a | if (c == '\n' || c == '\r') |
---|
761 | n/a | ; |
---|
762 | n/a | else if (c == '\0') |
---|
763 | n/a | self->state = START_RECORD; |
---|
764 | n/a | else { |
---|
765 | n/a | PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); |
---|
766 | n/a | return -1; |
---|
767 | n/a | } |
---|
768 | n/a | break; |
---|
769 | n/a | |
---|
770 | n/a | } |
---|
771 | n/a | return 0; |
---|
772 | n/a | } |
---|
773 | n/a | |
---|
774 | n/a | static int |
---|
775 | n/a | parse_reset(ReaderObj *self) |
---|
776 | n/a | { |
---|
777 | n/a | Py_XSETREF(self->fields, PyList_New(0)); |
---|
778 | n/a | if (self->fields == NULL) |
---|
779 | n/a | return -1; |
---|
780 | n/a | self->field_len = 0; |
---|
781 | n/a | self->state = START_RECORD; |
---|
782 | n/a | self->numeric_field = 0; |
---|
783 | n/a | return 0; |
---|
784 | n/a | } |
---|
785 | n/a | |
---|
786 | n/a | static PyObject * |
---|
787 | n/a | Reader_iternext(ReaderObj *self) |
---|
788 | n/a | { |
---|
789 | n/a | PyObject *fields = NULL; |
---|
790 | n/a | Py_UCS4 c; |
---|
791 | n/a | Py_ssize_t pos, linelen; |
---|
792 | n/a | unsigned int kind; |
---|
793 | n/a | void *data; |
---|
794 | n/a | PyObject *lineobj; |
---|
795 | n/a | |
---|
796 | n/a | if (parse_reset(self) < 0) |
---|
797 | n/a | return NULL; |
---|
798 | n/a | do { |
---|
799 | n/a | lineobj = PyIter_Next(self->input_iter); |
---|
800 | n/a | if (lineobj == NULL) { |
---|
801 | n/a | /* End of input OR exception */ |
---|
802 | n/a | if (!PyErr_Occurred() && (self->field_len != 0 || |
---|
803 | n/a | self->state == IN_QUOTED_FIELD)) { |
---|
804 | n/a | if (self->dialect->strict) |
---|
805 | n/a | PyErr_SetString(_csvstate_global->error_obj, |
---|
806 | n/a | "unexpected end of data"); |
---|
807 | n/a | else if (parse_save_field(self) >= 0) |
---|
808 | n/a | break; |
---|
809 | n/a | } |
---|
810 | n/a | return NULL; |
---|
811 | n/a | } |
---|
812 | n/a | if (!PyUnicode_Check(lineobj)) { |
---|
813 | n/a | PyErr_Format(_csvstate_global->error_obj, |
---|
814 | n/a | "iterator should return strings, " |
---|
815 | n/a | "not %.200s " |
---|
816 | n/a | "(did you open the file in text mode?)", |
---|
817 | n/a | lineobj->ob_type->tp_name |
---|
818 | n/a | ); |
---|
819 | n/a | Py_DECREF(lineobj); |
---|
820 | n/a | return NULL; |
---|
821 | n/a | } |
---|
822 | n/a | if (PyUnicode_READY(lineobj) == -1) { |
---|
823 | n/a | Py_DECREF(lineobj); |
---|
824 | n/a | return NULL; |
---|
825 | n/a | } |
---|
826 | n/a | ++self->line_num; |
---|
827 | n/a | kind = PyUnicode_KIND(lineobj); |
---|
828 | n/a | data = PyUnicode_DATA(lineobj); |
---|
829 | n/a | pos = 0; |
---|
830 | n/a | linelen = PyUnicode_GET_LENGTH(lineobj); |
---|
831 | n/a | while (linelen--) { |
---|
832 | n/a | c = PyUnicode_READ(kind, data, pos); |
---|
833 | n/a | if (c == '\0') { |
---|
834 | n/a | Py_DECREF(lineobj); |
---|
835 | n/a | PyErr_Format(_csvstate_global->error_obj, |
---|
836 | n/a | "line contains NULL byte"); |
---|
837 | n/a | goto err; |
---|
838 | n/a | } |
---|
839 | n/a | if (parse_process_char(self, c) < 0) { |
---|
840 | n/a | Py_DECREF(lineobj); |
---|
841 | n/a | goto err; |
---|
842 | n/a | } |
---|
843 | n/a | pos++; |
---|
844 | n/a | } |
---|
845 | n/a | Py_DECREF(lineobj); |
---|
846 | n/a | if (parse_process_char(self, 0) < 0) |
---|
847 | n/a | goto err; |
---|
848 | n/a | } while (self->state != START_RECORD); |
---|
849 | n/a | |
---|
850 | n/a | fields = self->fields; |
---|
851 | n/a | self->fields = NULL; |
---|
852 | n/a | err: |
---|
853 | n/a | return fields; |
---|
854 | n/a | } |
---|
855 | n/a | |
---|
856 | n/a | static void |
---|
857 | n/a | Reader_dealloc(ReaderObj *self) |
---|
858 | n/a | { |
---|
859 | n/a | PyObject_GC_UnTrack(self); |
---|
860 | n/a | Py_XDECREF(self->dialect); |
---|
861 | n/a | Py_XDECREF(self->input_iter); |
---|
862 | n/a | Py_XDECREF(self->fields); |
---|
863 | n/a | if (self->field != NULL) |
---|
864 | n/a | PyMem_Free(self->field); |
---|
865 | n/a | PyObject_GC_Del(self); |
---|
866 | n/a | } |
---|
867 | n/a | |
---|
868 | n/a | static int |
---|
869 | n/a | Reader_traverse(ReaderObj *self, visitproc visit, void *arg) |
---|
870 | n/a | { |
---|
871 | n/a | Py_VISIT(self->dialect); |
---|
872 | n/a | Py_VISIT(self->input_iter); |
---|
873 | n/a | Py_VISIT(self->fields); |
---|
874 | n/a | return 0; |
---|
875 | n/a | } |
---|
876 | n/a | |
---|
877 | n/a | static int |
---|
878 | n/a | Reader_clear(ReaderObj *self) |
---|
879 | n/a | { |
---|
880 | n/a | Py_CLEAR(self->dialect); |
---|
881 | n/a | Py_CLEAR(self->input_iter); |
---|
882 | n/a | Py_CLEAR(self->fields); |
---|
883 | n/a | return 0; |
---|
884 | n/a | } |
---|
885 | n/a | |
---|
886 | n/a | PyDoc_STRVAR(Reader_Type_doc, |
---|
887 | n/a | "CSV reader\n" |
---|
888 | n/a | "\n" |
---|
889 | n/a | "Reader objects are responsible for reading and parsing tabular data\n" |
---|
890 | n/a | "in CSV format.\n" |
---|
891 | n/a | ); |
---|
892 | n/a | |
---|
893 | n/a | static struct PyMethodDef Reader_methods[] = { |
---|
894 | n/a | { NULL, NULL } |
---|
895 | n/a | }; |
---|
896 | n/a | #define R_OFF(x) offsetof(ReaderObj, x) |
---|
897 | n/a | |
---|
898 | n/a | static struct PyMemberDef Reader_memberlist[] = { |
---|
899 | n/a | { "dialect", T_OBJECT, R_OFF(dialect), READONLY }, |
---|
900 | n/a | { "line_num", T_ULONG, R_OFF(line_num), READONLY }, |
---|
901 | n/a | { NULL } |
---|
902 | n/a | }; |
---|
903 | n/a | |
---|
904 | n/a | |
---|
905 | n/a | static PyTypeObject Reader_Type = { |
---|
906 | n/a | PyVarObject_HEAD_INIT(NULL, 0) |
---|
907 | n/a | "_csv.reader", /*tp_name*/ |
---|
908 | n/a | sizeof(ReaderObj), /*tp_basicsize*/ |
---|
909 | n/a | 0, /*tp_itemsize*/ |
---|
910 | n/a | /* methods */ |
---|
911 | n/a | (destructor)Reader_dealloc, /*tp_dealloc*/ |
---|
912 | n/a | (printfunc)0, /*tp_print*/ |
---|
913 | n/a | (getattrfunc)0, /*tp_getattr*/ |
---|
914 | n/a | (setattrfunc)0, /*tp_setattr*/ |
---|
915 | n/a | 0, /*tp_reserved*/ |
---|
916 | n/a | (reprfunc)0, /*tp_repr*/ |
---|
917 | n/a | 0, /*tp_as_number*/ |
---|
918 | n/a | 0, /*tp_as_sequence*/ |
---|
919 | n/a | 0, /*tp_as_mapping*/ |
---|
920 | n/a | (hashfunc)0, /*tp_hash*/ |
---|
921 | n/a | (ternaryfunc)0, /*tp_call*/ |
---|
922 | n/a | (reprfunc)0, /*tp_str*/ |
---|
923 | n/a | 0, /*tp_getattro*/ |
---|
924 | n/a | 0, /*tp_setattro*/ |
---|
925 | n/a | 0, /*tp_as_buffer*/ |
---|
926 | n/a | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | |
---|
927 | n/a | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ |
---|
928 | n/a | Reader_Type_doc, /*tp_doc*/ |
---|
929 | n/a | (traverseproc)Reader_traverse, /*tp_traverse*/ |
---|
930 | n/a | (inquiry)Reader_clear, /*tp_clear*/ |
---|
931 | n/a | 0, /*tp_richcompare*/ |
---|
932 | n/a | 0, /*tp_weaklistoffset*/ |
---|
933 | n/a | PyObject_SelfIter, /*tp_iter*/ |
---|
934 | n/a | (getiterfunc)Reader_iternext, /*tp_iternext*/ |
---|
935 | n/a | Reader_methods, /*tp_methods*/ |
---|
936 | n/a | Reader_memberlist, /*tp_members*/ |
---|
937 | n/a | 0, /*tp_getset*/ |
---|
938 | n/a | |
---|
939 | n/a | }; |
---|
940 | n/a | |
---|
941 | n/a | static PyObject * |
---|
942 | n/a | csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) |
---|
943 | n/a | { |
---|
944 | n/a | PyObject * iterator, * dialect = NULL; |
---|
945 | n/a | ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type); |
---|
946 | n/a | |
---|
947 | n/a | if (!self) |
---|
948 | n/a | return NULL; |
---|
949 | n/a | |
---|
950 | n/a | self->dialect = NULL; |
---|
951 | n/a | self->fields = NULL; |
---|
952 | n/a | self->input_iter = NULL; |
---|
953 | n/a | self->field = NULL; |
---|
954 | n/a | self->field_size = 0; |
---|
955 | n/a | self->line_num = 0; |
---|
956 | n/a | |
---|
957 | n/a | if (parse_reset(self) < 0) { |
---|
958 | n/a | Py_DECREF(self); |
---|
959 | n/a | return NULL; |
---|
960 | n/a | } |
---|
961 | n/a | |
---|
962 | n/a | if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) { |
---|
963 | n/a | Py_DECREF(self); |
---|
964 | n/a | return NULL; |
---|
965 | n/a | } |
---|
966 | n/a | self->input_iter = PyObject_GetIter(iterator); |
---|
967 | n/a | if (self->input_iter == NULL) { |
---|
968 | n/a | PyErr_SetString(PyExc_TypeError, |
---|
969 | n/a | "argument 1 must be an iterator"); |
---|
970 | n/a | Py_DECREF(self); |
---|
971 | n/a | return NULL; |
---|
972 | n/a | } |
---|
973 | n/a | self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); |
---|
974 | n/a | if (self->dialect == NULL) { |
---|
975 | n/a | Py_DECREF(self); |
---|
976 | n/a | return NULL; |
---|
977 | n/a | } |
---|
978 | n/a | |
---|
979 | n/a | PyObject_GC_Track(self); |
---|
980 | n/a | return (PyObject *)self; |
---|
981 | n/a | } |
---|
982 | n/a | |
---|
983 | n/a | /* |
---|
984 | n/a | * WRITER |
---|
985 | n/a | */ |
---|
986 | n/a | /* ---------------------------------------------------------------- */ |
---|
987 | n/a | static void |
---|
988 | n/a | join_reset(WriterObj *self) |
---|
989 | n/a | { |
---|
990 | n/a | self->rec_len = 0; |
---|
991 | n/a | self->num_fields = 0; |
---|
992 | n/a | } |
---|
993 | n/a | |
---|
994 | n/a | #define MEM_INCR 32768 |
---|
995 | n/a | |
---|
996 | n/a | /* Calculate new record length or append field to record. Return new |
---|
997 | n/a | * record length. |
---|
998 | n/a | */ |
---|
999 | n/a | static Py_ssize_t |
---|
1000 | n/a | join_append_data(WriterObj *self, unsigned int field_kind, void *field_data, |
---|
1001 | n/a | Py_ssize_t field_len, int *quoted, |
---|
1002 | n/a | int copy_phase) |
---|
1003 | n/a | { |
---|
1004 | n/a | DialectObj *dialect = self->dialect; |
---|
1005 | n/a | int i; |
---|
1006 | n/a | Py_ssize_t rec_len; |
---|
1007 | n/a | |
---|
1008 | n/a | #define INCLEN \ |
---|
1009 | n/a | do {\ |
---|
1010 | n/a | if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \ |
---|
1011 | n/a | goto overflow; \ |
---|
1012 | n/a | } \ |
---|
1013 | n/a | rec_len++; \ |
---|
1014 | n/a | } while(0) |
---|
1015 | n/a | |
---|
1016 | n/a | #define ADDCH(c) \ |
---|
1017 | n/a | do {\ |
---|
1018 | n/a | if (copy_phase) \ |
---|
1019 | n/a | self->rec[rec_len] = c;\ |
---|
1020 | n/a | INCLEN;\ |
---|
1021 | n/a | } while(0) |
---|
1022 | n/a | |
---|
1023 | n/a | rec_len = self->rec_len; |
---|
1024 | n/a | |
---|
1025 | n/a | /* If this is not the first field we need a field separator */ |
---|
1026 | n/a | if (self->num_fields > 0) |
---|
1027 | n/a | ADDCH(dialect->delimiter); |
---|
1028 | n/a | |
---|
1029 | n/a | /* Handle preceding quote */ |
---|
1030 | n/a | if (copy_phase && *quoted) |
---|
1031 | n/a | ADDCH(dialect->quotechar); |
---|
1032 | n/a | |
---|
1033 | n/a | /* Copy/count field data */ |
---|
1034 | n/a | /* If field is null just pass over */ |
---|
1035 | n/a | for (i = 0; field_data && (i < field_len); i++) { |
---|
1036 | n/a | Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i); |
---|
1037 | n/a | int want_escape = 0; |
---|
1038 | n/a | |
---|
1039 | n/a | if (c == dialect->delimiter || |
---|
1040 | n/a | c == dialect->escapechar || |
---|
1041 | n/a | c == dialect->quotechar || |
---|
1042 | n/a | PyUnicode_FindChar( |
---|
1043 | n/a | dialect->lineterminator, c, 0, |
---|
1044 | n/a | PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) { |
---|
1045 | n/a | if (dialect->quoting == QUOTE_NONE) |
---|
1046 | n/a | want_escape = 1; |
---|
1047 | n/a | else { |
---|
1048 | n/a | if (c == dialect->quotechar) { |
---|
1049 | n/a | if (dialect->doublequote) |
---|
1050 | n/a | ADDCH(dialect->quotechar); |
---|
1051 | n/a | else |
---|
1052 | n/a | want_escape = 1; |
---|
1053 | n/a | } |
---|
1054 | n/a | if (!want_escape) |
---|
1055 | n/a | *quoted = 1; |
---|
1056 | n/a | } |
---|
1057 | n/a | if (want_escape) { |
---|
1058 | n/a | if (!dialect->escapechar) { |
---|
1059 | n/a | PyErr_Format(_csvstate_global->error_obj, |
---|
1060 | n/a | "need to escape, but no escapechar set"); |
---|
1061 | n/a | return -1; |
---|
1062 | n/a | } |
---|
1063 | n/a | ADDCH(dialect->escapechar); |
---|
1064 | n/a | } |
---|
1065 | n/a | } |
---|
1066 | n/a | /* Copy field character into record buffer. |
---|
1067 | n/a | */ |
---|
1068 | n/a | ADDCH(c); |
---|
1069 | n/a | } |
---|
1070 | n/a | |
---|
1071 | n/a | if (*quoted) { |
---|
1072 | n/a | if (copy_phase) |
---|
1073 | n/a | ADDCH(dialect->quotechar); |
---|
1074 | n/a | else { |
---|
1075 | n/a | INCLEN; /* starting quote */ |
---|
1076 | n/a | INCLEN; /* ending quote */ |
---|
1077 | n/a | } |
---|
1078 | n/a | } |
---|
1079 | n/a | return rec_len; |
---|
1080 | n/a | |
---|
1081 | n/a | overflow: |
---|
1082 | n/a | PyErr_NoMemory(); |
---|
1083 | n/a | return -1; |
---|
1084 | n/a | #undef ADDCH |
---|
1085 | n/a | #undef INCLEN |
---|
1086 | n/a | } |
---|
1087 | n/a | |
---|
1088 | n/a | static int |
---|
1089 | n/a | join_check_rec_size(WriterObj *self, Py_ssize_t rec_len) |
---|
1090 | n/a | { |
---|
1091 | n/a | |
---|
1092 | n/a | if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) { |
---|
1093 | n/a | PyErr_NoMemory(); |
---|
1094 | n/a | return 0; |
---|
1095 | n/a | } |
---|
1096 | n/a | |
---|
1097 | n/a | if (rec_len > self->rec_size) { |
---|
1098 | n/a | if (self->rec_size == 0) { |
---|
1099 | n/a | self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; |
---|
1100 | n/a | if (self->rec != NULL) |
---|
1101 | n/a | PyMem_Free(self->rec); |
---|
1102 | n/a | self->rec = PyMem_New(Py_UCS4, self->rec_size); |
---|
1103 | n/a | } |
---|
1104 | n/a | else { |
---|
1105 | n/a | Py_UCS4* old_rec = self->rec; |
---|
1106 | n/a | |
---|
1107 | n/a | self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; |
---|
1108 | n/a | self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size); |
---|
1109 | n/a | if (self->rec == NULL) |
---|
1110 | n/a | PyMem_Free(old_rec); |
---|
1111 | n/a | } |
---|
1112 | n/a | if (self->rec == NULL) { |
---|
1113 | n/a | PyErr_NoMemory(); |
---|
1114 | n/a | return 0; |
---|
1115 | n/a | } |
---|
1116 | n/a | } |
---|
1117 | n/a | return 1; |
---|
1118 | n/a | } |
---|
1119 | n/a | |
---|
1120 | n/a | static int |
---|
1121 | n/a | join_append(WriterObj *self, PyObject *field, int quoted) |
---|
1122 | n/a | { |
---|
1123 | n/a | unsigned int field_kind = -1; |
---|
1124 | n/a | void *field_data = NULL; |
---|
1125 | n/a | Py_ssize_t field_len = 0; |
---|
1126 | n/a | Py_ssize_t rec_len; |
---|
1127 | n/a | |
---|
1128 | n/a | if (field != NULL) { |
---|
1129 | n/a | if (PyUnicode_READY(field) == -1) |
---|
1130 | n/a | return 0; |
---|
1131 | n/a | field_kind = PyUnicode_KIND(field); |
---|
1132 | n/a | field_data = PyUnicode_DATA(field); |
---|
1133 | n/a | field_len = PyUnicode_GET_LENGTH(field); |
---|
1134 | n/a | } |
---|
1135 | n/a | rec_len = join_append_data(self, field_kind, field_data, field_len, |
---|
1136 | n/a | "ed, 0); |
---|
1137 | n/a | if (rec_len < 0) |
---|
1138 | n/a | return 0; |
---|
1139 | n/a | |
---|
1140 | n/a | /* grow record buffer if necessary */ |
---|
1141 | n/a | if (!join_check_rec_size(self, rec_len)) |
---|
1142 | n/a | return 0; |
---|
1143 | n/a | |
---|
1144 | n/a | self->rec_len = join_append_data(self, field_kind, field_data, field_len, |
---|
1145 | n/a | "ed, 1); |
---|
1146 | n/a | self->num_fields++; |
---|
1147 | n/a | |
---|
1148 | n/a | return 1; |
---|
1149 | n/a | } |
---|
1150 | n/a | |
---|
1151 | n/a | static int |
---|
1152 | n/a | join_append_lineterminator(WriterObj *self) |
---|
1153 | n/a | { |
---|
1154 | n/a | Py_ssize_t terminator_len, i; |
---|
1155 | n/a | unsigned int term_kind; |
---|
1156 | n/a | void *term_data; |
---|
1157 | n/a | |
---|
1158 | n/a | terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator); |
---|
1159 | n/a | if (terminator_len == -1) |
---|
1160 | n/a | return 0; |
---|
1161 | n/a | |
---|
1162 | n/a | /* grow record buffer if necessary */ |
---|
1163 | n/a | if (!join_check_rec_size(self, self->rec_len + terminator_len)) |
---|
1164 | n/a | return 0; |
---|
1165 | n/a | |
---|
1166 | n/a | term_kind = PyUnicode_KIND(self->dialect->lineterminator); |
---|
1167 | n/a | term_data = PyUnicode_DATA(self->dialect->lineterminator); |
---|
1168 | n/a | for (i = 0; i < terminator_len; i++) |
---|
1169 | n/a | self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i); |
---|
1170 | n/a | self->rec_len += terminator_len; |
---|
1171 | n/a | |
---|
1172 | n/a | return 1; |
---|
1173 | n/a | } |
---|
1174 | n/a | |
---|
1175 | n/a | PyDoc_STRVAR(csv_writerow_doc, |
---|
1176 | n/a | "writerow(iterable)\n" |
---|
1177 | n/a | "\n" |
---|
1178 | n/a | "Construct and write a CSV record from an iterable of fields. Non-string\n" |
---|
1179 | n/a | "elements will be converted to string."); |
---|
1180 | n/a | |
---|
1181 | n/a | static PyObject * |
---|
1182 | n/a | csv_writerow(WriterObj *self, PyObject *seq) |
---|
1183 | n/a | { |
---|
1184 | n/a | DialectObj *dialect = self->dialect; |
---|
1185 | n/a | PyObject *iter, *field, *line, *result; |
---|
1186 | n/a | |
---|
1187 | n/a | iter = PyObject_GetIter(seq); |
---|
1188 | n/a | if (iter == NULL) |
---|
1189 | n/a | return PyErr_Format(_csvstate_global->error_obj, |
---|
1190 | n/a | "iterable expected, not %.200s", |
---|
1191 | n/a | seq->ob_type->tp_name); |
---|
1192 | n/a | |
---|
1193 | n/a | /* Join all fields in internal buffer. |
---|
1194 | n/a | */ |
---|
1195 | n/a | join_reset(self); |
---|
1196 | n/a | while ((field = PyIter_Next(iter))) { |
---|
1197 | n/a | int append_ok; |
---|
1198 | n/a | int quoted; |
---|
1199 | n/a | |
---|
1200 | n/a | switch (dialect->quoting) { |
---|
1201 | n/a | case QUOTE_NONNUMERIC: |
---|
1202 | n/a | quoted = !PyNumber_Check(field); |
---|
1203 | n/a | break; |
---|
1204 | n/a | case QUOTE_ALL: |
---|
1205 | n/a | quoted = 1; |
---|
1206 | n/a | break; |
---|
1207 | n/a | default: |
---|
1208 | n/a | quoted = 0; |
---|
1209 | n/a | break; |
---|
1210 | n/a | } |
---|
1211 | n/a | |
---|
1212 | n/a | if (PyUnicode_Check(field)) { |
---|
1213 | n/a | append_ok = join_append(self, field, quoted); |
---|
1214 | n/a | Py_DECREF(field); |
---|
1215 | n/a | } |
---|
1216 | n/a | else if (field == Py_None) { |
---|
1217 | n/a | append_ok = join_append(self, NULL, quoted); |
---|
1218 | n/a | Py_DECREF(field); |
---|
1219 | n/a | } |
---|
1220 | n/a | else { |
---|
1221 | n/a | PyObject *str; |
---|
1222 | n/a | |
---|
1223 | n/a | str = PyObject_Str(field); |
---|
1224 | n/a | Py_DECREF(field); |
---|
1225 | n/a | if (str == NULL) { |
---|
1226 | n/a | Py_DECREF(iter); |
---|
1227 | n/a | return NULL; |
---|
1228 | n/a | } |
---|
1229 | n/a | append_ok = join_append(self, str, quoted); |
---|
1230 | n/a | Py_DECREF(str); |
---|
1231 | n/a | } |
---|
1232 | n/a | if (!append_ok) { |
---|
1233 | n/a | Py_DECREF(iter); |
---|
1234 | n/a | return NULL; |
---|
1235 | n/a | } |
---|
1236 | n/a | } |
---|
1237 | n/a | Py_DECREF(iter); |
---|
1238 | n/a | if (PyErr_Occurred()) |
---|
1239 | n/a | return NULL; |
---|
1240 | n/a | |
---|
1241 | n/a | if (self->num_fields > 0 && self->rec_size == 0) { |
---|
1242 | n/a | if (dialect->quoting == QUOTE_NONE) { |
---|
1243 | n/a | PyErr_Format(_csvstate_global->error_obj, |
---|
1244 | n/a | "single empty field record must be quoted"); |
---|
1245 | n/a | return NULL; |
---|
1246 | n/a | } |
---|
1247 | n/a | self->num_fields--; |
---|
1248 | n/a | if (!join_append(self, NULL, 1)) |
---|
1249 | n/a | return NULL; |
---|
1250 | n/a | } |
---|
1251 | n/a | |
---|
1252 | n/a | /* Add line terminator. |
---|
1253 | n/a | */ |
---|
1254 | n/a | if (!join_append_lineterminator(self)) |
---|
1255 | n/a | return NULL; |
---|
1256 | n/a | |
---|
1257 | n/a | line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, |
---|
1258 | n/a | (void *) self->rec, self->rec_len); |
---|
1259 | n/a | if (line == NULL) |
---|
1260 | n/a | return NULL; |
---|
1261 | n/a | result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL); |
---|
1262 | n/a | Py_DECREF(line); |
---|
1263 | n/a | return result; |
---|
1264 | n/a | } |
---|
1265 | n/a | |
---|
1266 | n/a | PyDoc_STRVAR(csv_writerows_doc, |
---|
1267 | n/a | "writerows(iterable of iterables)\n" |
---|
1268 | n/a | "\n" |
---|
1269 | n/a | "Construct and write a series of iterables to a csv file. Non-string\n" |
---|
1270 | n/a | "elements will be converted to string."); |
---|
1271 | n/a | |
---|
1272 | n/a | static PyObject * |
---|
1273 | n/a | csv_writerows(WriterObj *self, PyObject *seqseq) |
---|
1274 | n/a | { |
---|
1275 | n/a | PyObject *row_iter, *row_obj, *result; |
---|
1276 | n/a | |
---|
1277 | n/a | row_iter = PyObject_GetIter(seqseq); |
---|
1278 | n/a | if (row_iter == NULL) { |
---|
1279 | n/a | PyErr_SetString(PyExc_TypeError, |
---|
1280 | n/a | "writerows() argument must be iterable"); |
---|
1281 | n/a | return NULL; |
---|
1282 | n/a | } |
---|
1283 | n/a | while ((row_obj = PyIter_Next(row_iter))) { |
---|
1284 | n/a | result = csv_writerow(self, row_obj); |
---|
1285 | n/a | Py_DECREF(row_obj); |
---|
1286 | n/a | if (!result) { |
---|
1287 | n/a | Py_DECREF(row_iter); |
---|
1288 | n/a | return NULL; |
---|
1289 | n/a | } |
---|
1290 | n/a | else |
---|
1291 | n/a | Py_DECREF(result); |
---|
1292 | n/a | } |
---|
1293 | n/a | Py_DECREF(row_iter); |
---|
1294 | n/a | if (PyErr_Occurred()) |
---|
1295 | n/a | return NULL; |
---|
1296 | n/a | Py_RETURN_NONE; |
---|
1297 | n/a | } |
---|
1298 | n/a | |
---|
1299 | n/a | static struct PyMethodDef Writer_methods[] = { |
---|
1300 | n/a | { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc}, |
---|
1301 | n/a | { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc}, |
---|
1302 | n/a | { NULL, NULL } |
---|
1303 | n/a | }; |
---|
1304 | n/a | |
---|
1305 | n/a | #define W_OFF(x) offsetof(WriterObj, x) |
---|
1306 | n/a | |
---|
1307 | n/a | static struct PyMemberDef Writer_memberlist[] = { |
---|
1308 | n/a | { "dialect", T_OBJECT, W_OFF(dialect), READONLY }, |
---|
1309 | n/a | { NULL } |
---|
1310 | n/a | }; |
---|
1311 | n/a | |
---|
1312 | n/a | static void |
---|
1313 | n/a | Writer_dealloc(WriterObj *self) |
---|
1314 | n/a | { |
---|
1315 | n/a | PyObject_GC_UnTrack(self); |
---|
1316 | n/a | Py_XDECREF(self->dialect); |
---|
1317 | n/a | Py_XDECREF(self->writeline); |
---|
1318 | n/a | if (self->rec != NULL) |
---|
1319 | n/a | PyMem_Free(self->rec); |
---|
1320 | n/a | PyObject_GC_Del(self); |
---|
1321 | n/a | } |
---|
1322 | n/a | |
---|
1323 | n/a | static int |
---|
1324 | n/a | Writer_traverse(WriterObj *self, visitproc visit, void *arg) |
---|
1325 | n/a | { |
---|
1326 | n/a | Py_VISIT(self->dialect); |
---|
1327 | n/a | Py_VISIT(self->writeline); |
---|
1328 | n/a | return 0; |
---|
1329 | n/a | } |
---|
1330 | n/a | |
---|
1331 | n/a | static int |
---|
1332 | n/a | Writer_clear(WriterObj *self) |
---|
1333 | n/a | { |
---|
1334 | n/a | Py_CLEAR(self->dialect); |
---|
1335 | n/a | Py_CLEAR(self->writeline); |
---|
1336 | n/a | return 0; |
---|
1337 | n/a | } |
---|
1338 | n/a | |
---|
1339 | n/a | PyDoc_STRVAR(Writer_Type_doc, |
---|
1340 | n/a | "CSV writer\n" |
---|
1341 | n/a | "\n" |
---|
1342 | n/a | "Writer objects are responsible for generating tabular data\n" |
---|
1343 | n/a | "in CSV format from sequence input.\n" |
---|
1344 | n/a | ); |
---|
1345 | n/a | |
---|
1346 | n/a | static PyTypeObject Writer_Type = { |
---|
1347 | n/a | PyVarObject_HEAD_INIT(NULL, 0) |
---|
1348 | n/a | "_csv.writer", /*tp_name*/ |
---|
1349 | n/a | sizeof(WriterObj), /*tp_basicsize*/ |
---|
1350 | n/a | 0, /*tp_itemsize*/ |
---|
1351 | n/a | /* methods */ |
---|
1352 | n/a | (destructor)Writer_dealloc, /*tp_dealloc*/ |
---|
1353 | n/a | (printfunc)0, /*tp_print*/ |
---|
1354 | n/a | (getattrfunc)0, /*tp_getattr*/ |
---|
1355 | n/a | (setattrfunc)0, /*tp_setattr*/ |
---|
1356 | n/a | 0, /*tp_reserved*/ |
---|
1357 | n/a | (reprfunc)0, /*tp_repr*/ |
---|
1358 | n/a | 0, /*tp_as_number*/ |
---|
1359 | n/a | 0, /*tp_as_sequence*/ |
---|
1360 | n/a | 0, /*tp_as_mapping*/ |
---|
1361 | n/a | (hashfunc)0, /*tp_hash*/ |
---|
1362 | n/a | (ternaryfunc)0, /*tp_call*/ |
---|
1363 | n/a | (reprfunc)0, /*tp_str*/ |
---|
1364 | n/a | 0, /*tp_getattro*/ |
---|
1365 | n/a | 0, /*tp_setattro*/ |
---|
1366 | n/a | 0, /*tp_as_buffer*/ |
---|
1367 | n/a | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | |
---|
1368 | n/a | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ |
---|
1369 | n/a | Writer_Type_doc, |
---|
1370 | n/a | (traverseproc)Writer_traverse, /*tp_traverse*/ |
---|
1371 | n/a | (inquiry)Writer_clear, /*tp_clear*/ |
---|
1372 | n/a | 0, /*tp_richcompare*/ |
---|
1373 | n/a | 0, /*tp_weaklistoffset*/ |
---|
1374 | n/a | (getiterfunc)0, /*tp_iter*/ |
---|
1375 | n/a | (getiterfunc)0, /*tp_iternext*/ |
---|
1376 | n/a | Writer_methods, /*tp_methods*/ |
---|
1377 | n/a | Writer_memberlist, /*tp_members*/ |
---|
1378 | n/a | 0, /*tp_getset*/ |
---|
1379 | n/a | }; |
---|
1380 | n/a | |
---|
1381 | n/a | static PyObject * |
---|
1382 | n/a | csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) |
---|
1383 | n/a | { |
---|
1384 | n/a | PyObject * output_file, * dialect = NULL; |
---|
1385 | n/a | WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type); |
---|
1386 | n/a | _Py_IDENTIFIER(write); |
---|
1387 | n/a | |
---|
1388 | n/a | if (!self) |
---|
1389 | n/a | return NULL; |
---|
1390 | n/a | |
---|
1391 | n/a | self->dialect = NULL; |
---|
1392 | n/a | self->writeline = NULL; |
---|
1393 | n/a | |
---|
1394 | n/a | self->rec = NULL; |
---|
1395 | n/a | self->rec_size = 0; |
---|
1396 | n/a | self->rec_len = 0; |
---|
1397 | n/a | self->num_fields = 0; |
---|
1398 | n/a | |
---|
1399 | n/a | if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) { |
---|
1400 | n/a | Py_DECREF(self); |
---|
1401 | n/a | return NULL; |
---|
1402 | n/a | } |
---|
1403 | n/a | self->writeline = _PyObject_GetAttrId(output_file, &PyId_write); |
---|
1404 | n/a | if (self->writeline == NULL || !PyCallable_Check(self->writeline)) { |
---|
1405 | n/a | PyErr_SetString(PyExc_TypeError, |
---|
1406 | n/a | "argument 1 must have a \"write\" method"); |
---|
1407 | n/a | Py_DECREF(self); |
---|
1408 | n/a | return NULL; |
---|
1409 | n/a | } |
---|
1410 | n/a | self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); |
---|
1411 | n/a | if (self->dialect == NULL) { |
---|
1412 | n/a | Py_DECREF(self); |
---|
1413 | n/a | return NULL; |
---|
1414 | n/a | } |
---|
1415 | n/a | PyObject_GC_Track(self); |
---|
1416 | n/a | return (PyObject *)self; |
---|
1417 | n/a | } |
---|
1418 | n/a | |
---|
1419 | n/a | /* |
---|
1420 | n/a | * DIALECT REGISTRY |
---|
1421 | n/a | */ |
---|
1422 | n/a | static PyObject * |
---|
1423 | n/a | csv_list_dialects(PyObject *module, PyObject *args) |
---|
1424 | n/a | { |
---|
1425 | n/a | return PyDict_Keys(_csvstate_global->dialects); |
---|
1426 | n/a | } |
---|
1427 | n/a | |
---|
1428 | n/a | static PyObject * |
---|
1429 | n/a | csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) |
---|
1430 | n/a | { |
---|
1431 | n/a | PyObject *name_obj, *dialect_obj = NULL; |
---|
1432 | n/a | PyObject *dialect; |
---|
1433 | n/a | |
---|
1434 | n/a | if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj)) |
---|
1435 | n/a | return NULL; |
---|
1436 | n/a | if (!PyUnicode_Check(name_obj)) { |
---|
1437 | n/a | PyErr_SetString(PyExc_TypeError, |
---|
1438 | n/a | "dialect name must be a string"); |
---|
1439 | n/a | return NULL; |
---|
1440 | n/a | } |
---|
1441 | n/a | if (PyUnicode_READY(name_obj) == -1) |
---|
1442 | n/a | return NULL; |
---|
1443 | n/a | dialect = _call_dialect(dialect_obj, kwargs); |
---|
1444 | n/a | if (dialect == NULL) |
---|
1445 | n/a | return NULL; |
---|
1446 | n/a | if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) { |
---|
1447 | n/a | Py_DECREF(dialect); |
---|
1448 | n/a | return NULL; |
---|
1449 | n/a | } |
---|
1450 | n/a | Py_DECREF(dialect); |
---|
1451 | n/a | Py_RETURN_NONE; |
---|
1452 | n/a | } |
---|
1453 | n/a | |
---|
1454 | n/a | static PyObject * |
---|
1455 | n/a | csv_unregister_dialect(PyObject *module, PyObject *name_obj) |
---|
1456 | n/a | { |
---|
1457 | n/a | if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0) |
---|
1458 | n/a | return PyErr_Format(_csvstate_global->error_obj, "unknown dialect"); |
---|
1459 | n/a | Py_RETURN_NONE; |
---|
1460 | n/a | } |
---|
1461 | n/a | |
---|
1462 | n/a | static PyObject * |
---|
1463 | n/a | csv_get_dialect(PyObject *module, PyObject *name_obj) |
---|
1464 | n/a | { |
---|
1465 | n/a | return get_dialect_from_registry(name_obj); |
---|
1466 | n/a | } |
---|
1467 | n/a | |
---|
1468 | n/a | static PyObject * |
---|
1469 | n/a | csv_field_size_limit(PyObject *module, PyObject *args) |
---|
1470 | n/a | { |
---|
1471 | n/a | PyObject *new_limit = NULL; |
---|
1472 | n/a | long old_limit = _csvstate_global->field_limit; |
---|
1473 | n/a | |
---|
1474 | n/a | if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit)) |
---|
1475 | n/a | return NULL; |
---|
1476 | n/a | if (new_limit != NULL) { |
---|
1477 | n/a | if (!PyLong_CheckExact(new_limit)) { |
---|
1478 | n/a | PyErr_Format(PyExc_TypeError, |
---|
1479 | n/a | "limit must be an integer"); |
---|
1480 | n/a | return NULL; |
---|
1481 | n/a | } |
---|
1482 | n/a | _csvstate_global->field_limit = PyLong_AsLong(new_limit); |
---|
1483 | n/a | if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) { |
---|
1484 | n/a | _csvstate_global->field_limit = old_limit; |
---|
1485 | n/a | return NULL; |
---|
1486 | n/a | } |
---|
1487 | n/a | } |
---|
1488 | n/a | return PyLong_FromLong(old_limit); |
---|
1489 | n/a | } |
---|
1490 | n/a | |
---|
1491 | n/a | /* |
---|
1492 | n/a | * MODULE |
---|
1493 | n/a | */ |
---|
1494 | n/a | |
---|
1495 | n/a | PyDoc_STRVAR(csv_module_doc, |
---|
1496 | n/a | "CSV parsing and writing.\n" |
---|
1497 | n/a | "\n" |
---|
1498 | n/a | "This module provides classes that assist in the reading and writing\n" |
---|
1499 | n/a | "of Comma Separated Value (CSV) files, and implements the interface\n" |
---|
1500 | n/a | "described by PEP 305. Although many CSV files are simple to parse,\n" |
---|
1501 | n/a | "the format is not formally defined by a stable specification and\n" |
---|
1502 | n/a | "is subtle enough that parsing lines of a CSV file with something\n" |
---|
1503 | n/a | "like line.split(\",\") is bound to fail. The module supports three\n" |
---|
1504 | n/a | "basic APIs: reading, writing, and registration of dialects.\n" |
---|
1505 | n/a | "\n" |
---|
1506 | n/a | "\n" |
---|
1507 | n/a | "DIALECT REGISTRATION:\n" |
---|
1508 | n/a | "\n" |
---|
1509 | n/a | "Readers and writers support a dialect argument, which is a convenient\n" |
---|
1510 | n/a | "handle on a group of settings. When the dialect argument is a string,\n" |
---|
1511 | n/a | "it identifies one of the dialects previously registered with the module.\n" |
---|
1512 | n/a | "If it is a class or instance, the attributes of the argument are used as\n" |
---|
1513 | n/a | "the settings for the reader or writer:\n" |
---|
1514 | n/a | "\n" |
---|
1515 | n/a | " class excel:\n" |
---|
1516 | n/a | " delimiter = ','\n" |
---|
1517 | n/a | " quotechar = '\"'\n" |
---|
1518 | n/a | " escapechar = None\n" |
---|
1519 | n/a | " doublequote = True\n" |
---|
1520 | n/a | " skipinitialspace = False\n" |
---|
1521 | n/a | " lineterminator = '\\r\\n'\n" |
---|
1522 | n/a | " quoting = QUOTE_MINIMAL\n" |
---|
1523 | n/a | "\n" |
---|
1524 | n/a | "SETTINGS:\n" |
---|
1525 | n/a | "\n" |
---|
1526 | n/a | " * quotechar - specifies a one-character string to use as the \n" |
---|
1527 | n/a | " quoting character. It defaults to '\"'.\n" |
---|
1528 | n/a | " * delimiter - specifies a one-character string to use as the \n" |
---|
1529 | n/a | " field separator. It defaults to ','.\n" |
---|
1530 | n/a | " * skipinitialspace - specifies how to interpret whitespace which\n" |
---|
1531 | n/a | " immediately follows a delimiter. It defaults to False, which\n" |
---|
1532 | n/a | " means that whitespace immediately following a delimiter is part\n" |
---|
1533 | n/a | " of the following field.\n" |
---|
1534 | n/a | " * lineterminator - specifies the character sequence which should \n" |
---|
1535 | n/a | " terminate rows.\n" |
---|
1536 | n/a | " * quoting - controls when quotes should be generated by the writer.\n" |
---|
1537 | n/a | " It can take on any of the following module constants:\n" |
---|
1538 | n/a | "\n" |
---|
1539 | n/a | " csv.QUOTE_MINIMAL means only when required, for example, when a\n" |
---|
1540 | n/a | " field contains either the quotechar or the delimiter\n" |
---|
1541 | n/a | " csv.QUOTE_ALL means that quotes are always placed around fields.\n" |
---|
1542 | n/a | " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n" |
---|
1543 | n/a | " fields which do not parse as integers or floating point\n" |
---|
1544 | n/a | " numbers.\n" |
---|
1545 | n/a | " csv.QUOTE_NONE means that quotes are never placed around fields.\n" |
---|
1546 | n/a | " * escapechar - specifies a one-character string used to escape \n" |
---|
1547 | n/a | " the delimiter when quoting is set to QUOTE_NONE.\n" |
---|
1548 | n/a | " * doublequote - controls the handling of quotes inside fields. When\n" |
---|
1549 | n/a | " True, two consecutive quotes are interpreted as one during read,\n" |
---|
1550 | n/a | " and when writing, each quote character embedded in the data is\n" |
---|
1551 | n/a | " written as two quotes\n"); |
---|
1552 | n/a | |
---|
1553 | n/a | PyDoc_STRVAR(csv_reader_doc, |
---|
1554 | n/a | " csv_reader = reader(iterable [, dialect='excel']\n" |
---|
1555 | n/a | " [optional keyword args])\n" |
---|
1556 | n/a | " for row in csv_reader:\n" |
---|
1557 | n/a | " process(row)\n" |
---|
1558 | n/a | "\n" |
---|
1559 | n/a | "The \"iterable\" argument can be any object that returns a line\n" |
---|
1560 | n/a | "of input for each iteration, such as a file object or a list. The\n" |
---|
1561 | n/a | "optional \"dialect\" parameter is discussed below. The function\n" |
---|
1562 | n/a | "also accepts optional keyword arguments which override settings\n" |
---|
1563 | n/a | "provided by the dialect.\n" |
---|
1564 | n/a | "\n" |
---|
1565 | n/a | "The returned object is an iterator. Each iteration returns a row\n" |
---|
1566 | n/a | "of the CSV file (which can span multiple input lines).\n"); |
---|
1567 | n/a | |
---|
1568 | n/a | PyDoc_STRVAR(csv_writer_doc, |
---|
1569 | n/a | " csv_writer = csv.writer(fileobj [, dialect='excel']\n" |
---|
1570 | n/a | " [optional keyword args])\n" |
---|
1571 | n/a | " for row in sequence:\n" |
---|
1572 | n/a | " csv_writer.writerow(row)\n" |
---|
1573 | n/a | "\n" |
---|
1574 | n/a | " [or]\n" |
---|
1575 | n/a | "\n" |
---|
1576 | n/a | " csv_writer = csv.writer(fileobj [, dialect='excel']\n" |
---|
1577 | n/a | " [optional keyword args])\n" |
---|
1578 | n/a | " csv_writer.writerows(rows)\n" |
---|
1579 | n/a | "\n" |
---|
1580 | n/a | "The \"fileobj\" argument can be any object that supports the file API.\n"); |
---|
1581 | n/a | |
---|
1582 | n/a | PyDoc_STRVAR(csv_list_dialects_doc, |
---|
1583 | n/a | "Return a list of all know dialect names.\n" |
---|
1584 | n/a | " names = csv.list_dialects()"); |
---|
1585 | n/a | |
---|
1586 | n/a | PyDoc_STRVAR(csv_get_dialect_doc, |
---|
1587 | n/a | "Return the dialect instance associated with name.\n" |
---|
1588 | n/a | " dialect = csv.get_dialect(name)"); |
---|
1589 | n/a | |
---|
1590 | n/a | PyDoc_STRVAR(csv_register_dialect_doc, |
---|
1591 | n/a | "Create a mapping from a string name to a dialect class.\n" |
---|
1592 | n/a | " dialect = csv.register_dialect(name[, dialect[, **fmtparams]])"); |
---|
1593 | n/a | |
---|
1594 | n/a | PyDoc_STRVAR(csv_unregister_dialect_doc, |
---|
1595 | n/a | "Delete the name/dialect mapping associated with a string name.\n" |
---|
1596 | n/a | " csv.unregister_dialect(name)"); |
---|
1597 | n/a | |
---|
1598 | n/a | PyDoc_STRVAR(csv_field_size_limit_doc, |
---|
1599 | n/a | "Sets an upper limit on parsed fields.\n" |
---|
1600 | n/a | " csv.field_size_limit([limit])\n" |
---|
1601 | n/a | "\n" |
---|
1602 | n/a | "Returns old limit. If limit is not given, no new limit is set and\n" |
---|
1603 | n/a | "the old limit is returned"); |
---|
1604 | n/a | |
---|
1605 | n/a | static struct PyMethodDef csv_methods[] = { |
---|
1606 | n/a | { "reader", (PyCFunction)csv_reader, |
---|
1607 | n/a | METH_VARARGS | METH_KEYWORDS, csv_reader_doc}, |
---|
1608 | n/a | { "writer", (PyCFunction)csv_writer, |
---|
1609 | n/a | METH_VARARGS | METH_KEYWORDS, csv_writer_doc}, |
---|
1610 | n/a | { "list_dialects", (PyCFunction)csv_list_dialects, |
---|
1611 | n/a | METH_NOARGS, csv_list_dialects_doc}, |
---|
1612 | n/a | { "register_dialect", (PyCFunction)csv_register_dialect, |
---|
1613 | n/a | METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc}, |
---|
1614 | n/a | { "unregister_dialect", (PyCFunction)csv_unregister_dialect, |
---|
1615 | n/a | METH_O, csv_unregister_dialect_doc}, |
---|
1616 | n/a | { "get_dialect", (PyCFunction)csv_get_dialect, |
---|
1617 | n/a | METH_O, csv_get_dialect_doc}, |
---|
1618 | n/a | { "field_size_limit", (PyCFunction)csv_field_size_limit, |
---|
1619 | n/a | METH_VARARGS, csv_field_size_limit_doc}, |
---|
1620 | n/a | { NULL, NULL } |
---|
1621 | n/a | }; |
---|
1622 | n/a | |
---|
1623 | n/a | static struct PyModuleDef _csvmodule = { |
---|
1624 | n/a | PyModuleDef_HEAD_INIT, |
---|
1625 | n/a | "_csv", |
---|
1626 | n/a | csv_module_doc, |
---|
1627 | n/a | sizeof(_csvstate), |
---|
1628 | n/a | csv_methods, |
---|
1629 | n/a | NULL, |
---|
1630 | n/a | _csv_traverse, |
---|
1631 | n/a | _csv_clear, |
---|
1632 | n/a | _csv_free |
---|
1633 | n/a | }; |
---|
1634 | n/a | |
---|
1635 | n/a | PyMODINIT_FUNC |
---|
1636 | n/a | PyInit__csv(void) |
---|
1637 | n/a | { |
---|
1638 | n/a | PyObject *module; |
---|
1639 | n/a | const StyleDesc *style; |
---|
1640 | n/a | |
---|
1641 | n/a | if (PyType_Ready(&Dialect_Type) < 0) |
---|
1642 | n/a | return NULL; |
---|
1643 | n/a | |
---|
1644 | n/a | if (PyType_Ready(&Reader_Type) < 0) |
---|
1645 | n/a | return NULL; |
---|
1646 | n/a | |
---|
1647 | n/a | if (PyType_Ready(&Writer_Type) < 0) |
---|
1648 | n/a | return NULL; |
---|
1649 | n/a | |
---|
1650 | n/a | /* Create the module and add the functions */ |
---|
1651 | n/a | module = PyModule_Create(&_csvmodule); |
---|
1652 | n/a | if (module == NULL) |
---|
1653 | n/a | return NULL; |
---|
1654 | n/a | |
---|
1655 | n/a | /* Add version to the module. */ |
---|
1656 | n/a | if (PyModule_AddStringConstant(module, "__version__", |
---|
1657 | n/a | MODULE_VERSION) == -1) |
---|
1658 | n/a | return NULL; |
---|
1659 | n/a | |
---|
1660 | n/a | /* Set the field limit */ |
---|
1661 | n/a | _csvstate(module)->field_limit = 128 * 1024; |
---|
1662 | n/a | /* Do I still need to add this var to the Module Dict? */ |
---|
1663 | n/a | |
---|
1664 | n/a | /* Add _dialects dictionary */ |
---|
1665 | n/a | _csvstate(module)->dialects = PyDict_New(); |
---|
1666 | n/a | if (_csvstate(module)->dialects == NULL) |
---|
1667 | n/a | return NULL; |
---|
1668 | n/a | Py_INCREF(_csvstate(module)->dialects); |
---|
1669 | n/a | if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects)) |
---|
1670 | n/a | return NULL; |
---|
1671 | n/a | |
---|
1672 | n/a | /* Add quote styles into dictionary */ |
---|
1673 | n/a | for (style = quote_styles; style->name; style++) { |
---|
1674 | n/a | if (PyModule_AddIntConstant(module, style->name, |
---|
1675 | n/a | style->style) == -1) |
---|
1676 | n/a | return NULL; |
---|
1677 | n/a | } |
---|
1678 | n/a | |
---|
1679 | n/a | /* Add the Dialect type */ |
---|
1680 | n/a | Py_INCREF(&Dialect_Type); |
---|
1681 | n/a | if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type)) |
---|
1682 | n/a | return NULL; |
---|
1683 | n/a | |
---|
1684 | n/a | /* Add the CSV exception object to the module. */ |
---|
1685 | n/a | _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL); |
---|
1686 | n/a | if (_csvstate(module)->error_obj == NULL) |
---|
1687 | n/a | return NULL; |
---|
1688 | n/a | Py_INCREF(_csvstate(module)->error_obj); |
---|
1689 | n/a | PyModule_AddObject(module, "Error", _csvstate(module)->error_obj); |
---|
1690 | n/a | return module; |
---|
1691 | n/a | } |
---|