1 | n/a | /* _bz2 - Low-level Python interface to libbzip2. */ |
---|
2 | n/a | |
---|
3 | n/a | #define PY_SSIZE_T_CLEAN |
---|
4 | n/a | |
---|
5 | n/a | #include "Python.h" |
---|
6 | n/a | #include "structmember.h" |
---|
7 | n/a | |
---|
8 | n/a | #ifdef WITH_THREAD |
---|
9 | n/a | #include "pythread.h" |
---|
10 | n/a | #endif |
---|
11 | n/a | |
---|
12 | n/a | #include <bzlib.h> |
---|
13 | n/a | #include <stdio.h> |
---|
14 | n/a | |
---|
15 | n/a | |
---|
16 | n/a | #ifndef BZ_CONFIG_ERROR |
---|
17 | n/a | #define BZ2_bzCompress bzCompress |
---|
18 | n/a | #define BZ2_bzCompressInit bzCompressInit |
---|
19 | n/a | #define BZ2_bzCompressEnd bzCompressEnd |
---|
20 | n/a | #define BZ2_bzDecompress bzDecompress |
---|
21 | n/a | #define BZ2_bzDecompressInit bzDecompressInit |
---|
22 | n/a | #define BZ2_bzDecompressEnd bzDecompressEnd |
---|
23 | n/a | #endif /* ! BZ_CONFIG_ERROR */ |
---|
24 | n/a | |
---|
25 | n/a | |
---|
26 | n/a | #ifdef WITH_THREAD |
---|
27 | n/a | #define ACQUIRE_LOCK(obj) do { \ |
---|
28 | n/a | if (!PyThread_acquire_lock((obj)->lock, 0)) { \ |
---|
29 | n/a | Py_BEGIN_ALLOW_THREADS \ |
---|
30 | n/a | PyThread_acquire_lock((obj)->lock, 1); \ |
---|
31 | n/a | Py_END_ALLOW_THREADS \ |
---|
32 | n/a | } } while (0) |
---|
33 | n/a | #define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock) |
---|
34 | n/a | #else |
---|
35 | n/a | #define ACQUIRE_LOCK(obj) |
---|
36 | n/a | #define RELEASE_LOCK(obj) |
---|
37 | n/a | #endif |
---|
38 | n/a | |
---|
39 | n/a | |
---|
40 | n/a | typedef struct { |
---|
41 | n/a | PyObject_HEAD |
---|
42 | n/a | bz_stream bzs; |
---|
43 | n/a | int flushed; |
---|
44 | n/a | #ifdef WITH_THREAD |
---|
45 | n/a | PyThread_type_lock lock; |
---|
46 | n/a | #endif |
---|
47 | n/a | } BZ2Compressor; |
---|
48 | n/a | |
---|
49 | n/a | typedef struct { |
---|
50 | n/a | PyObject_HEAD |
---|
51 | n/a | bz_stream bzs; |
---|
52 | n/a | char eof; /* T_BOOL expects a char */ |
---|
53 | n/a | PyObject *unused_data; |
---|
54 | n/a | char needs_input; |
---|
55 | n/a | char *input_buffer; |
---|
56 | n/a | size_t input_buffer_size; |
---|
57 | n/a | |
---|
58 | n/a | /* bzs->avail_in is only 32 bit, so we store the true length |
---|
59 | n/a | separately. Conversion and looping is encapsulated in |
---|
60 | n/a | decompress_buf() */ |
---|
61 | n/a | size_t bzs_avail_in_real; |
---|
62 | n/a | #ifdef WITH_THREAD |
---|
63 | n/a | PyThread_type_lock lock; |
---|
64 | n/a | #endif |
---|
65 | n/a | } BZ2Decompressor; |
---|
66 | n/a | |
---|
67 | n/a | static PyTypeObject BZ2Compressor_Type; |
---|
68 | n/a | static PyTypeObject BZ2Decompressor_Type; |
---|
69 | n/a | |
---|
70 | n/a | /* Helper functions. */ |
---|
71 | n/a | |
---|
72 | n/a | static int |
---|
73 | n/a | catch_bz2_error(int bzerror) |
---|
74 | n/a | { |
---|
75 | n/a | switch(bzerror) { |
---|
76 | n/a | case BZ_OK: |
---|
77 | n/a | case BZ_RUN_OK: |
---|
78 | n/a | case BZ_FLUSH_OK: |
---|
79 | n/a | case BZ_FINISH_OK: |
---|
80 | n/a | case BZ_STREAM_END: |
---|
81 | n/a | return 0; |
---|
82 | n/a | |
---|
83 | n/a | #ifdef BZ_CONFIG_ERROR |
---|
84 | n/a | case BZ_CONFIG_ERROR: |
---|
85 | n/a | PyErr_SetString(PyExc_SystemError, |
---|
86 | n/a | "libbzip2 was not compiled correctly"); |
---|
87 | n/a | return 1; |
---|
88 | n/a | #endif |
---|
89 | n/a | case BZ_PARAM_ERROR: |
---|
90 | n/a | PyErr_SetString(PyExc_ValueError, |
---|
91 | n/a | "Internal error - " |
---|
92 | n/a | "invalid parameters passed to libbzip2"); |
---|
93 | n/a | return 1; |
---|
94 | n/a | case BZ_MEM_ERROR: |
---|
95 | n/a | PyErr_NoMemory(); |
---|
96 | n/a | return 1; |
---|
97 | n/a | case BZ_DATA_ERROR: |
---|
98 | n/a | case BZ_DATA_ERROR_MAGIC: |
---|
99 | n/a | PyErr_SetString(PyExc_IOError, "Invalid data stream"); |
---|
100 | n/a | return 1; |
---|
101 | n/a | case BZ_IO_ERROR: |
---|
102 | n/a | PyErr_SetString(PyExc_IOError, "Unknown I/O error"); |
---|
103 | n/a | return 1; |
---|
104 | n/a | case BZ_UNEXPECTED_EOF: |
---|
105 | n/a | PyErr_SetString(PyExc_EOFError, |
---|
106 | n/a | "Compressed file ended before the logical " |
---|
107 | n/a | "end-of-stream was detected"); |
---|
108 | n/a | return 1; |
---|
109 | n/a | case BZ_SEQUENCE_ERROR: |
---|
110 | n/a | PyErr_SetString(PyExc_RuntimeError, |
---|
111 | n/a | "Internal error - " |
---|
112 | n/a | "Invalid sequence of commands sent to libbzip2"); |
---|
113 | n/a | return 1; |
---|
114 | n/a | default: |
---|
115 | n/a | PyErr_Format(PyExc_IOError, |
---|
116 | n/a | "Unrecognized error from libbzip2: %d", bzerror); |
---|
117 | n/a | return 1; |
---|
118 | n/a | } |
---|
119 | n/a | } |
---|
120 | n/a | |
---|
121 | n/a | #if BUFSIZ < 8192 |
---|
122 | n/a | #define INITIAL_BUFFER_SIZE 8192 |
---|
123 | n/a | #else |
---|
124 | n/a | #define INITIAL_BUFFER_SIZE BUFSIZ |
---|
125 | n/a | #endif |
---|
126 | n/a | |
---|
127 | n/a | static int |
---|
128 | n/a | grow_buffer(PyObject **buf, Py_ssize_t max_length) |
---|
129 | n/a | { |
---|
130 | n/a | /* Expand the buffer by an amount proportional to the current size, |
---|
131 | n/a | giving us amortized linear-time behavior. Use a less-than-double |
---|
132 | n/a | growth factor to avoid excessive allocation. */ |
---|
133 | n/a | size_t size = PyBytes_GET_SIZE(*buf); |
---|
134 | n/a | size_t new_size = size + (size >> 3) + 6; |
---|
135 | n/a | |
---|
136 | n/a | if (max_length > 0 && new_size > (size_t) max_length) |
---|
137 | n/a | new_size = (size_t) max_length; |
---|
138 | n/a | |
---|
139 | n/a | if (new_size > size) { |
---|
140 | n/a | return _PyBytes_Resize(buf, new_size); |
---|
141 | n/a | } else { /* overflow */ |
---|
142 | n/a | PyErr_SetString(PyExc_OverflowError, |
---|
143 | n/a | "Unable to allocate buffer - output too large"); |
---|
144 | n/a | return -1; |
---|
145 | n/a | } |
---|
146 | n/a | } |
---|
147 | n/a | |
---|
148 | n/a | |
---|
149 | n/a | /* BZ2Compressor class. */ |
---|
150 | n/a | |
---|
151 | n/a | static PyObject * |
---|
152 | n/a | compress(BZ2Compressor *c, char *data, size_t len, int action) |
---|
153 | n/a | { |
---|
154 | n/a | size_t data_size = 0; |
---|
155 | n/a | PyObject *result; |
---|
156 | n/a | |
---|
157 | n/a | result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE); |
---|
158 | n/a | if (result == NULL) |
---|
159 | n/a | return NULL; |
---|
160 | n/a | |
---|
161 | n/a | c->bzs.next_in = data; |
---|
162 | n/a | c->bzs.avail_in = 0; |
---|
163 | n/a | c->bzs.next_out = PyBytes_AS_STRING(result); |
---|
164 | n/a | c->bzs.avail_out = INITIAL_BUFFER_SIZE; |
---|
165 | n/a | for (;;) { |
---|
166 | n/a | char *this_out; |
---|
167 | n/a | int bzerror; |
---|
168 | n/a | |
---|
169 | n/a | /* On a 64-bit system, len might not fit in avail_in (an unsigned int). |
---|
170 | n/a | Do compression in chunks of no more than UINT_MAX bytes each. */ |
---|
171 | n/a | if (c->bzs.avail_in == 0 && len > 0) { |
---|
172 | n/a | c->bzs.avail_in = (unsigned int)Py_MIN(len, UINT_MAX); |
---|
173 | n/a | len -= c->bzs.avail_in; |
---|
174 | n/a | } |
---|
175 | n/a | |
---|
176 | n/a | /* In regular compression mode, stop when input data is exhausted. */ |
---|
177 | n/a | if (action == BZ_RUN && c->bzs.avail_in == 0) |
---|
178 | n/a | break; |
---|
179 | n/a | |
---|
180 | n/a | if (c->bzs.avail_out == 0) { |
---|
181 | n/a | size_t buffer_left = PyBytes_GET_SIZE(result) - data_size; |
---|
182 | n/a | if (buffer_left == 0) { |
---|
183 | n/a | if (grow_buffer(&result, -1) < 0) |
---|
184 | n/a | goto error; |
---|
185 | n/a | c->bzs.next_out = PyBytes_AS_STRING(result) + data_size; |
---|
186 | n/a | buffer_left = PyBytes_GET_SIZE(result) - data_size; |
---|
187 | n/a | } |
---|
188 | n/a | c->bzs.avail_out = (unsigned int)Py_MIN(buffer_left, UINT_MAX); |
---|
189 | n/a | } |
---|
190 | n/a | |
---|
191 | n/a | Py_BEGIN_ALLOW_THREADS |
---|
192 | n/a | this_out = c->bzs.next_out; |
---|
193 | n/a | bzerror = BZ2_bzCompress(&c->bzs, action); |
---|
194 | n/a | data_size += c->bzs.next_out - this_out; |
---|
195 | n/a | Py_END_ALLOW_THREADS |
---|
196 | n/a | if (catch_bz2_error(bzerror)) |
---|
197 | n/a | goto error; |
---|
198 | n/a | |
---|
199 | n/a | /* In flushing mode, stop when all buffered data has been flushed. */ |
---|
200 | n/a | if (action == BZ_FINISH && bzerror == BZ_STREAM_END) |
---|
201 | n/a | break; |
---|
202 | n/a | } |
---|
203 | n/a | if (data_size != (size_t)PyBytes_GET_SIZE(result)) |
---|
204 | n/a | if (_PyBytes_Resize(&result, data_size) < 0) |
---|
205 | n/a | goto error; |
---|
206 | n/a | return result; |
---|
207 | n/a | |
---|
208 | n/a | error: |
---|
209 | n/a | Py_XDECREF(result); |
---|
210 | n/a | return NULL; |
---|
211 | n/a | } |
---|
212 | n/a | |
---|
213 | n/a | /*[clinic input] |
---|
214 | n/a | module _bz2 |
---|
215 | n/a | class _bz2.BZ2Compressor "BZ2Compressor *" "&BZ2Compressor_Type" |
---|
216 | n/a | class _bz2.BZ2Decompressor "BZ2Decompressor *" "&BZ2Decompressor_Type" |
---|
217 | n/a | [clinic start generated code]*/ |
---|
218 | n/a | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=dc7d7992a79f9cb7]*/ |
---|
219 | n/a | |
---|
220 | n/a | #include "clinic/_bz2module.c.h" |
---|
221 | n/a | |
---|
222 | n/a | /*[clinic input] |
---|
223 | n/a | _bz2.BZ2Compressor.compress |
---|
224 | n/a | |
---|
225 | n/a | data: Py_buffer |
---|
226 | n/a | / |
---|
227 | n/a | |
---|
228 | n/a | Provide data to the compressor object. |
---|
229 | n/a | |
---|
230 | n/a | Returns a chunk of compressed data if possible, or b'' otherwise. |
---|
231 | n/a | |
---|
232 | n/a | When you have finished providing data to the compressor, call the |
---|
233 | n/a | flush() method to finish the compression process. |
---|
234 | n/a | [clinic start generated code]*/ |
---|
235 | n/a | |
---|
236 | n/a | static PyObject * |
---|
237 | n/a | _bz2_BZ2Compressor_compress_impl(BZ2Compressor *self, Py_buffer *data) |
---|
238 | n/a | /*[clinic end generated code: output=59365426e941fbcc input=85c963218070fc4c]*/ |
---|
239 | n/a | { |
---|
240 | n/a | PyObject *result = NULL; |
---|
241 | n/a | |
---|
242 | n/a | ACQUIRE_LOCK(self); |
---|
243 | n/a | if (self->flushed) |
---|
244 | n/a | PyErr_SetString(PyExc_ValueError, "Compressor has been flushed"); |
---|
245 | n/a | else |
---|
246 | n/a | result = compress(self, data->buf, data->len, BZ_RUN); |
---|
247 | n/a | RELEASE_LOCK(self); |
---|
248 | n/a | return result; |
---|
249 | n/a | } |
---|
250 | n/a | |
---|
251 | n/a | /*[clinic input] |
---|
252 | n/a | _bz2.BZ2Compressor.flush |
---|
253 | n/a | |
---|
254 | n/a | Finish the compression process. |
---|
255 | n/a | |
---|
256 | n/a | Returns the compressed data left in internal buffers. |
---|
257 | n/a | |
---|
258 | n/a | The compressor object may not be used after this method is called. |
---|
259 | n/a | [clinic start generated code]*/ |
---|
260 | n/a | |
---|
261 | n/a | static PyObject * |
---|
262 | n/a | _bz2_BZ2Compressor_flush_impl(BZ2Compressor *self) |
---|
263 | n/a | /*[clinic end generated code: output=3ef03fc1b092a701 input=d64405d3c6f76691]*/ |
---|
264 | n/a | { |
---|
265 | n/a | PyObject *result = NULL; |
---|
266 | n/a | |
---|
267 | n/a | ACQUIRE_LOCK(self); |
---|
268 | n/a | if (self->flushed) |
---|
269 | n/a | PyErr_SetString(PyExc_ValueError, "Repeated call to flush()"); |
---|
270 | n/a | else { |
---|
271 | n/a | self->flushed = 1; |
---|
272 | n/a | result = compress(self, NULL, 0, BZ_FINISH); |
---|
273 | n/a | } |
---|
274 | n/a | RELEASE_LOCK(self); |
---|
275 | n/a | return result; |
---|
276 | n/a | } |
---|
277 | n/a | |
---|
278 | n/a | static PyObject * |
---|
279 | n/a | BZ2Compressor_getstate(BZ2Compressor *self, PyObject *noargs) |
---|
280 | n/a | { |
---|
281 | n/a | PyErr_Format(PyExc_TypeError, "cannot serialize '%s' object", |
---|
282 | n/a | Py_TYPE(self)->tp_name); |
---|
283 | n/a | return NULL; |
---|
284 | n/a | } |
---|
285 | n/a | |
---|
286 | n/a | static void* |
---|
287 | n/a | BZ2_Malloc(void* ctx, int items, int size) |
---|
288 | n/a | { |
---|
289 | n/a | if (items < 0 || size < 0) |
---|
290 | n/a | return NULL; |
---|
291 | n/a | if ((size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size) |
---|
292 | n/a | return NULL; |
---|
293 | n/a | /* PyMem_Malloc() cannot be used: compress() and decompress() |
---|
294 | n/a | release the GIL */ |
---|
295 | n/a | return PyMem_RawMalloc(items * size); |
---|
296 | n/a | } |
---|
297 | n/a | |
---|
298 | n/a | static void |
---|
299 | n/a | BZ2_Free(void* ctx, void *ptr) |
---|
300 | n/a | { |
---|
301 | n/a | PyMem_RawFree(ptr); |
---|
302 | n/a | } |
---|
303 | n/a | |
---|
304 | n/a | /*[clinic input] |
---|
305 | n/a | _bz2.BZ2Compressor.__init__ |
---|
306 | n/a | |
---|
307 | n/a | compresslevel: int = 9 |
---|
308 | n/a | Compression level, as a number between 1 and 9. |
---|
309 | n/a | / |
---|
310 | n/a | |
---|
311 | n/a | Create a compressor object for compressing data incrementally. |
---|
312 | n/a | |
---|
313 | n/a | For one-shot compression, use the compress() function instead. |
---|
314 | n/a | [clinic start generated code]*/ |
---|
315 | n/a | |
---|
316 | n/a | static int |
---|
317 | n/a | _bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel) |
---|
318 | n/a | /*[clinic end generated code: output=c4e6adfd02963827 input=4e1ff7b8394b6e9a]*/ |
---|
319 | n/a | { |
---|
320 | n/a | int bzerror; |
---|
321 | n/a | |
---|
322 | n/a | if (!(1 <= compresslevel && compresslevel <= 9)) { |
---|
323 | n/a | PyErr_SetString(PyExc_ValueError, |
---|
324 | n/a | "compresslevel must be between 1 and 9"); |
---|
325 | n/a | return -1; |
---|
326 | n/a | } |
---|
327 | n/a | |
---|
328 | n/a | #ifdef WITH_THREAD |
---|
329 | n/a | self->lock = PyThread_allocate_lock(); |
---|
330 | n/a | if (self->lock == NULL) { |
---|
331 | n/a | PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock"); |
---|
332 | n/a | return -1; |
---|
333 | n/a | } |
---|
334 | n/a | #endif |
---|
335 | n/a | |
---|
336 | n/a | self->bzs.opaque = NULL; |
---|
337 | n/a | self->bzs.bzalloc = BZ2_Malloc; |
---|
338 | n/a | self->bzs.bzfree = BZ2_Free; |
---|
339 | n/a | bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0); |
---|
340 | n/a | if (catch_bz2_error(bzerror)) |
---|
341 | n/a | goto error; |
---|
342 | n/a | |
---|
343 | n/a | return 0; |
---|
344 | n/a | |
---|
345 | n/a | error: |
---|
346 | n/a | #ifdef WITH_THREAD |
---|
347 | n/a | PyThread_free_lock(self->lock); |
---|
348 | n/a | self->lock = NULL; |
---|
349 | n/a | #endif |
---|
350 | n/a | return -1; |
---|
351 | n/a | } |
---|
352 | n/a | |
---|
353 | n/a | static void |
---|
354 | n/a | BZ2Compressor_dealloc(BZ2Compressor *self) |
---|
355 | n/a | { |
---|
356 | n/a | BZ2_bzCompressEnd(&self->bzs); |
---|
357 | n/a | #ifdef WITH_THREAD |
---|
358 | n/a | if (self->lock != NULL) |
---|
359 | n/a | PyThread_free_lock(self->lock); |
---|
360 | n/a | #endif |
---|
361 | n/a | Py_TYPE(self)->tp_free((PyObject *)self); |
---|
362 | n/a | } |
---|
363 | n/a | |
---|
364 | n/a | static PyMethodDef BZ2Compressor_methods[] = { |
---|
365 | n/a | _BZ2_BZ2COMPRESSOR_COMPRESS_METHODDEF |
---|
366 | n/a | _BZ2_BZ2COMPRESSOR_FLUSH_METHODDEF |
---|
367 | n/a | {"__getstate__", (PyCFunction)BZ2Compressor_getstate, METH_NOARGS}, |
---|
368 | n/a | {NULL} |
---|
369 | n/a | }; |
---|
370 | n/a | |
---|
371 | n/a | |
---|
372 | n/a | static PyTypeObject BZ2Compressor_Type = { |
---|
373 | n/a | PyVarObject_HEAD_INIT(NULL, 0) |
---|
374 | n/a | "_bz2.BZ2Compressor", /* tp_name */ |
---|
375 | n/a | sizeof(BZ2Compressor), /* tp_basicsize */ |
---|
376 | n/a | 0, /* tp_itemsize */ |
---|
377 | n/a | (destructor)BZ2Compressor_dealloc, /* tp_dealloc */ |
---|
378 | n/a | 0, /* tp_print */ |
---|
379 | n/a | 0, /* tp_getattr */ |
---|
380 | n/a | 0, /* tp_setattr */ |
---|
381 | n/a | 0, /* tp_reserved */ |
---|
382 | n/a | 0, /* tp_repr */ |
---|
383 | n/a | 0, /* tp_as_number */ |
---|
384 | n/a | 0, /* tp_as_sequence */ |
---|
385 | n/a | 0, /* tp_as_mapping */ |
---|
386 | n/a | 0, /* tp_hash */ |
---|
387 | n/a | 0, /* tp_call */ |
---|
388 | n/a | 0, /* tp_str */ |
---|
389 | n/a | 0, /* tp_getattro */ |
---|
390 | n/a | 0, /* tp_setattro */ |
---|
391 | n/a | 0, /* tp_as_buffer */ |
---|
392 | n/a | Py_TPFLAGS_DEFAULT, /* tp_flags */ |
---|
393 | n/a | _bz2_BZ2Compressor___init____doc__, /* tp_doc */ |
---|
394 | n/a | 0, /* tp_traverse */ |
---|
395 | n/a | 0, /* tp_clear */ |
---|
396 | n/a | 0, /* tp_richcompare */ |
---|
397 | n/a | 0, /* tp_weaklistoffset */ |
---|
398 | n/a | 0, /* tp_iter */ |
---|
399 | n/a | 0, /* tp_iternext */ |
---|
400 | n/a | BZ2Compressor_methods, /* tp_methods */ |
---|
401 | n/a | 0, /* tp_members */ |
---|
402 | n/a | 0, /* tp_getset */ |
---|
403 | n/a | 0, /* tp_base */ |
---|
404 | n/a | 0, /* tp_dict */ |
---|
405 | n/a | 0, /* tp_descr_get */ |
---|
406 | n/a | 0, /* tp_descr_set */ |
---|
407 | n/a | 0, /* tp_dictoffset */ |
---|
408 | n/a | _bz2_BZ2Compressor___init__, /* tp_init */ |
---|
409 | n/a | 0, /* tp_alloc */ |
---|
410 | n/a | PyType_GenericNew, /* tp_new */ |
---|
411 | n/a | }; |
---|
412 | n/a | |
---|
413 | n/a | |
---|
414 | n/a | /* BZ2Decompressor class. */ |
---|
415 | n/a | |
---|
416 | n/a | /* Decompress data of length d->bzs_avail_in_real in d->bzs.next_in. The output |
---|
417 | n/a | buffer is allocated dynamically and returned. At most max_length bytes are |
---|
418 | n/a | returned, so some of the input may not be consumed. d->bzs.next_in and |
---|
419 | n/a | d->bzs_avail_in_real are updated to reflect the consumed input. */ |
---|
420 | n/a | static PyObject* |
---|
421 | n/a | decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length) |
---|
422 | n/a | { |
---|
423 | n/a | /* data_size is strictly positive, but because we repeatedly have to |
---|
424 | n/a | compare against max_length and PyBytes_GET_SIZE we declare it as |
---|
425 | n/a | signed */ |
---|
426 | n/a | Py_ssize_t data_size = 0; |
---|
427 | n/a | PyObject *result; |
---|
428 | n/a | bz_stream *bzs = &d->bzs; |
---|
429 | n/a | |
---|
430 | n/a | if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE) |
---|
431 | n/a | result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE); |
---|
432 | n/a | else |
---|
433 | n/a | result = PyBytes_FromStringAndSize(NULL, max_length); |
---|
434 | n/a | if (result == NULL) |
---|
435 | n/a | return NULL; |
---|
436 | n/a | |
---|
437 | n/a | bzs->next_out = PyBytes_AS_STRING(result); |
---|
438 | n/a | for (;;) { |
---|
439 | n/a | int bzret; |
---|
440 | n/a | size_t avail; |
---|
441 | n/a | |
---|
442 | n/a | /* On a 64-bit system, buffer length might not fit in avail_out, so we |
---|
443 | n/a | do decompression in chunks of no more than UINT_MAX bytes |
---|
444 | n/a | each. Note that the expression for `avail` is guaranteed to be |
---|
445 | n/a | positive, so the cast is safe. */ |
---|
446 | n/a | avail = (size_t) (PyBytes_GET_SIZE(result) - data_size); |
---|
447 | n/a | bzs->avail_out = (unsigned int)Py_MIN(avail, UINT_MAX); |
---|
448 | n/a | bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX); |
---|
449 | n/a | d->bzs_avail_in_real -= bzs->avail_in; |
---|
450 | n/a | |
---|
451 | n/a | Py_BEGIN_ALLOW_THREADS |
---|
452 | n/a | bzret = BZ2_bzDecompress(bzs); |
---|
453 | n/a | data_size = bzs->next_out - PyBytes_AS_STRING(result); |
---|
454 | n/a | d->bzs_avail_in_real += bzs->avail_in; |
---|
455 | n/a | Py_END_ALLOW_THREADS |
---|
456 | n/a | if (catch_bz2_error(bzret)) |
---|
457 | n/a | goto error; |
---|
458 | n/a | if (bzret == BZ_STREAM_END) { |
---|
459 | n/a | d->eof = 1; |
---|
460 | n/a | break; |
---|
461 | n/a | } else if (d->bzs_avail_in_real == 0) { |
---|
462 | n/a | break; |
---|
463 | n/a | } else if (bzs->avail_out == 0) { |
---|
464 | n/a | if (data_size == max_length) |
---|
465 | n/a | break; |
---|
466 | n/a | if (data_size == PyBytes_GET_SIZE(result) && |
---|
467 | n/a | grow_buffer(&result, max_length) == -1) |
---|
468 | n/a | goto error; |
---|
469 | n/a | bzs->next_out = PyBytes_AS_STRING(result) + data_size; |
---|
470 | n/a | } |
---|
471 | n/a | } |
---|
472 | n/a | if (data_size != PyBytes_GET_SIZE(result)) |
---|
473 | n/a | if (_PyBytes_Resize(&result, data_size) == -1) |
---|
474 | n/a | goto error; |
---|
475 | n/a | |
---|
476 | n/a | return result; |
---|
477 | n/a | |
---|
478 | n/a | error: |
---|
479 | n/a | Py_XDECREF(result); |
---|
480 | n/a | return NULL; |
---|
481 | n/a | } |
---|
482 | n/a | |
---|
483 | n/a | |
---|
484 | n/a | static PyObject * |
---|
485 | n/a | decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length) |
---|
486 | n/a | { |
---|
487 | n/a | char input_buffer_in_use; |
---|
488 | n/a | PyObject *result; |
---|
489 | n/a | bz_stream *bzs = &d->bzs; |
---|
490 | n/a | |
---|
491 | n/a | /* Prepend unconsumed input if necessary */ |
---|
492 | n/a | if (bzs->next_in != NULL) { |
---|
493 | n/a | size_t avail_now, avail_total; |
---|
494 | n/a | |
---|
495 | n/a | /* Number of bytes we can append to input buffer */ |
---|
496 | n/a | avail_now = (d->input_buffer + d->input_buffer_size) |
---|
497 | n/a | - (bzs->next_in + d->bzs_avail_in_real); |
---|
498 | n/a | |
---|
499 | n/a | /* Number of bytes we can append if we move existing |
---|
500 | n/a | contents to beginning of buffer (overwriting |
---|
501 | n/a | consumed input) */ |
---|
502 | n/a | avail_total = d->input_buffer_size - d->bzs_avail_in_real; |
---|
503 | n/a | |
---|
504 | n/a | if (avail_total < len) { |
---|
505 | n/a | size_t offset = bzs->next_in - d->input_buffer; |
---|
506 | n/a | char *tmp; |
---|
507 | n/a | size_t new_size = d->input_buffer_size + len - avail_now; |
---|
508 | n/a | |
---|
509 | n/a | /* Assign to temporary variable first, so we don't |
---|
510 | n/a | lose address of allocated buffer if realloc fails */ |
---|
511 | n/a | tmp = PyMem_Realloc(d->input_buffer, new_size); |
---|
512 | n/a | if (tmp == NULL) { |
---|
513 | n/a | PyErr_SetNone(PyExc_MemoryError); |
---|
514 | n/a | return NULL; |
---|
515 | n/a | } |
---|
516 | n/a | d->input_buffer = tmp; |
---|
517 | n/a | d->input_buffer_size = new_size; |
---|
518 | n/a | |
---|
519 | n/a | bzs->next_in = d->input_buffer + offset; |
---|
520 | n/a | } |
---|
521 | n/a | else if (avail_now < len) { |
---|
522 | n/a | memmove(d->input_buffer, bzs->next_in, |
---|
523 | n/a | d->bzs_avail_in_real); |
---|
524 | n/a | bzs->next_in = d->input_buffer; |
---|
525 | n/a | } |
---|
526 | n/a | memcpy((void*)(bzs->next_in + d->bzs_avail_in_real), data, len); |
---|
527 | n/a | d->bzs_avail_in_real += len; |
---|
528 | n/a | input_buffer_in_use = 1; |
---|
529 | n/a | } |
---|
530 | n/a | else { |
---|
531 | n/a | bzs->next_in = data; |
---|
532 | n/a | d->bzs_avail_in_real = len; |
---|
533 | n/a | input_buffer_in_use = 0; |
---|
534 | n/a | } |
---|
535 | n/a | |
---|
536 | n/a | result = decompress_buf(d, max_length); |
---|
537 | n/a | if(result == NULL) { |
---|
538 | n/a | bzs->next_in = NULL; |
---|
539 | n/a | return NULL; |
---|
540 | n/a | } |
---|
541 | n/a | |
---|
542 | n/a | if (d->eof) { |
---|
543 | n/a | d->needs_input = 0; |
---|
544 | n/a | if (d->bzs_avail_in_real > 0) { |
---|
545 | n/a | Py_XSETREF(d->unused_data, |
---|
546 | n/a | PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real)); |
---|
547 | n/a | if (d->unused_data == NULL) |
---|
548 | n/a | goto error; |
---|
549 | n/a | } |
---|
550 | n/a | } |
---|
551 | n/a | else if (d->bzs_avail_in_real == 0) { |
---|
552 | n/a | bzs->next_in = NULL; |
---|
553 | n/a | d->needs_input = 1; |
---|
554 | n/a | } |
---|
555 | n/a | else { |
---|
556 | n/a | d->needs_input = 0; |
---|
557 | n/a | |
---|
558 | n/a | /* If we did not use the input buffer, we now have |
---|
559 | n/a | to copy the tail from the caller's buffer into the |
---|
560 | n/a | input buffer */ |
---|
561 | n/a | if (!input_buffer_in_use) { |
---|
562 | n/a | |
---|
563 | n/a | /* Discard buffer if it's too small |
---|
564 | n/a | (resizing it may needlessly copy the current contents) */ |
---|
565 | n/a | if (d->input_buffer != NULL && |
---|
566 | n/a | d->input_buffer_size < d->bzs_avail_in_real) { |
---|
567 | n/a | PyMem_Free(d->input_buffer); |
---|
568 | n/a | d->input_buffer = NULL; |
---|
569 | n/a | } |
---|
570 | n/a | |
---|
571 | n/a | /* Allocate if necessary */ |
---|
572 | n/a | if (d->input_buffer == NULL) { |
---|
573 | n/a | d->input_buffer = PyMem_Malloc(d->bzs_avail_in_real); |
---|
574 | n/a | if (d->input_buffer == NULL) { |
---|
575 | n/a | PyErr_SetNone(PyExc_MemoryError); |
---|
576 | n/a | goto error; |
---|
577 | n/a | } |
---|
578 | n/a | d->input_buffer_size = d->bzs_avail_in_real; |
---|
579 | n/a | } |
---|
580 | n/a | |
---|
581 | n/a | /* Copy tail */ |
---|
582 | n/a | memcpy(d->input_buffer, bzs->next_in, d->bzs_avail_in_real); |
---|
583 | n/a | bzs->next_in = d->input_buffer; |
---|
584 | n/a | } |
---|
585 | n/a | } |
---|
586 | n/a | |
---|
587 | n/a | return result; |
---|
588 | n/a | |
---|
589 | n/a | error: |
---|
590 | n/a | Py_XDECREF(result); |
---|
591 | n/a | return NULL; |
---|
592 | n/a | } |
---|
593 | n/a | |
---|
594 | n/a | /*[clinic input] |
---|
595 | n/a | _bz2.BZ2Decompressor.decompress |
---|
596 | n/a | |
---|
597 | n/a | data: Py_buffer |
---|
598 | n/a | max_length: Py_ssize_t=-1 |
---|
599 | n/a | |
---|
600 | n/a | Decompress *data*, returning uncompressed data as bytes. |
---|
601 | n/a | |
---|
602 | n/a | If *max_length* is nonnegative, returns at most *max_length* bytes of |
---|
603 | n/a | decompressed data. If this limit is reached and further output can be |
---|
604 | n/a | produced, *self.needs_input* will be set to ``False``. In this case, the next |
---|
605 | n/a | call to *decompress()* may provide *data* as b'' to obtain more of the output. |
---|
606 | n/a | |
---|
607 | n/a | If all of the input data was decompressed and returned (either because this |
---|
608 | n/a | was less than *max_length* bytes, or because *max_length* was negative), |
---|
609 | n/a | *self.needs_input* will be set to True. |
---|
610 | n/a | |
---|
611 | n/a | Attempting to decompress data after the end of stream is reached raises an |
---|
612 | n/a | EOFError. Any data found after the end of the stream is ignored and saved in |
---|
613 | n/a | the unused_data attribute. |
---|
614 | n/a | [clinic start generated code]*/ |
---|
615 | n/a | |
---|
616 | n/a | static PyObject * |
---|
617 | n/a | _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data, |
---|
618 | n/a | Py_ssize_t max_length) |
---|
619 | n/a | /*[clinic end generated code: output=23e41045deb240a3 input=52e1ffc66a8ea624]*/ |
---|
620 | n/a | { |
---|
621 | n/a | PyObject *result = NULL; |
---|
622 | n/a | |
---|
623 | n/a | ACQUIRE_LOCK(self); |
---|
624 | n/a | if (self->eof) |
---|
625 | n/a | PyErr_SetString(PyExc_EOFError, "End of stream already reached"); |
---|
626 | n/a | else |
---|
627 | n/a | result = decompress(self, data->buf, data->len, max_length); |
---|
628 | n/a | RELEASE_LOCK(self); |
---|
629 | n/a | return result; |
---|
630 | n/a | } |
---|
631 | n/a | |
---|
632 | n/a | static PyObject * |
---|
633 | n/a | BZ2Decompressor_getstate(BZ2Decompressor *self, PyObject *noargs) |
---|
634 | n/a | { |
---|
635 | n/a | PyErr_Format(PyExc_TypeError, "cannot serialize '%s' object", |
---|
636 | n/a | Py_TYPE(self)->tp_name); |
---|
637 | n/a | return NULL; |
---|
638 | n/a | } |
---|
639 | n/a | |
---|
640 | n/a | /*[clinic input] |
---|
641 | n/a | _bz2.BZ2Decompressor.__init__ |
---|
642 | n/a | |
---|
643 | n/a | Create a decompressor object for decompressing data incrementally. |
---|
644 | n/a | |
---|
645 | n/a | For one-shot decompression, use the decompress() function instead. |
---|
646 | n/a | [clinic start generated code]*/ |
---|
647 | n/a | |
---|
648 | n/a | static int |
---|
649 | n/a | _bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self) |
---|
650 | n/a | /*[clinic end generated code: output=e4d2b9bb866ab8f1 input=95f6500dcda60088]*/ |
---|
651 | n/a | { |
---|
652 | n/a | int bzerror; |
---|
653 | n/a | |
---|
654 | n/a | #ifdef WITH_THREAD |
---|
655 | n/a | self->lock = PyThread_allocate_lock(); |
---|
656 | n/a | if (self->lock == NULL) { |
---|
657 | n/a | PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock"); |
---|
658 | n/a | return -1; |
---|
659 | n/a | } |
---|
660 | n/a | #endif |
---|
661 | n/a | |
---|
662 | n/a | self->needs_input = 1; |
---|
663 | n/a | self->bzs_avail_in_real = 0; |
---|
664 | n/a | self->input_buffer = NULL; |
---|
665 | n/a | self->input_buffer_size = 0; |
---|
666 | n/a | self->unused_data = PyBytes_FromStringAndSize(NULL, 0); |
---|
667 | n/a | if (self->unused_data == NULL) |
---|
668 | n/a | goto error; |
---|
669 | n/a | |
---|
670 | n/a | bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0); |
---|
671 | n/a | if (catch_bz2_error(bzerror)) |
---|
672 | n/a | goto error; |
---|
673 | n/a | |
---|
674 | n/a | return 0; |
---|
675 | n/a | |
---|
676 | n/a | error: |
---|
677 | n/a | Py_CLEAR(self->unused_data); |
---|
678 | n/a | #ifdef WITH_THREAD |
---|
679 | n/a | PyThread_free_lock(self->lock); |
---|
680 | n/a | self->lock = NULL; |
---|
681 | n/a | #endif |
---|
682 | n/a | return -1; |
---|
683 | n/a | } |
---|
684 | n/a | |
---|
685 | n/a | static void |
---|
686 | n/a | BZ2Decompressor_dealloc(BZ2Decompressor *self) |
---|
687 | n/a | { |
---|
688 | n/a | if(self->input_buffer != NULL) |
---|
689 | n/a | PyMem_Free(self->input_buffer); |
---|
690 | n/a | BZ2_bzDecompressEnd(&self->bzs); |
---|
691 | n/a | Py_CLEAR(self->unused_data); |
---|
692 | n/a | #ifdef WITH_THREAD |
---|
693 | n/a | if (self->lock != NULL) |
---|
694 | n/a | PyThread_free_lock(self->lock); |
---|
695 | n/a | #endif |
---|
696 | n/a | Py_TYPE(self)->tp_free((PyObject *)self); |
---|
697 | n/a | } |
---|
698 | n/a | |
---|
699 | n/a | static PyMethodDef BZ2Decompressor_methods[] = { |
---|
700 | n/a | _BZ2_BZ2DECOMPRESSOR_DECOMPRESS_METHODDEF |
---|
701 | n/a | {"__getstate__", (PyCFunction)BZ2Decompressor_getstate, METH_NOARGS}, |
---|
702 | n/a | {NULL} |
---|
703 | n/a | }; |
---|
704 | n/a | |
---|
705 | n/a | PyDoc_STRVAR(BZ2Decompressor_eof__doc__, |
---|
706 | n/a | "True if the end-of-stream marker has been reached."); |
---|
707 | n/a | |
---|
708 | n/a | PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__, |
---|
709 | n/a | "Data found after the end of the compressed stream."); |
---|
710 | n/a | |
---|
711 | n/a | PyDoc_STRVAR(BZ2Decompressor_needs_input_doc, |
---|
712 | n/a | "True if more input is needed before more decompressed data can be produced."); |
---|
713 | n/a | |
---|
714 | n/a | static PyMemberDef BZ2Decompressor_members[] = { |
---|
715 | n/a | {"eof", T_BOOL, offsetof(BZ2Decompressor, eof), |
---|
716 | n/a | READONLY, BZ2Decompressor_eof__doc__}, |
---|
717 | n/a | {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data), |
---|
718 | n/a | READONLY, BZ2Decompressor_unused_data__doc__}, |
---|
719 | n/a | {"needs_input", T_BOOL, offsetof(BZ2Decompressor, needs_input), READONLY, |
---|
720 | n/a | BZ2Decompressor_needs_input_doc}, |
---|
721 | n/a | {NULL} |
---|
722 | n/a | }; |
---|
723 | n/a | |
---|
724 | n/a | static PyTypeObject BZ2Decompressor_Type = { |
---|
725 | n/a | PyVarObject_HEAD_INIT(NULL, 0) |
---|
726 | n/a | "_bz2.BZ2Decompressor", /* tp_name */ |
---|
727 | n/a | sizeof(BZ2Decompressor), /* tp_basicsize */ |
---|
728 | n/a | 0, /* tp_itemsize */ |
---|
729 | n/a | (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */ |
---|
730 | n/a | 0, /* tp_print */ |
---|
731 | n/a | 0, /* tp_getattr */ |
---|
732 | n/a | 0, /* tp_setattr */ |
---|
733 | n/a | 0, /* tp_reserved */ |
---|
734 | n/a | 0, /* tp_repr */ |
---|
735 | n/a | 0, /* tp_as_number */ |
---|
736 | n/a | 0, /* tp_as_sequence */ |
---|
737 | n/a | 0, /* tp_as_mapping */ |
---|
738 | n/a | 0, /* tp_hash */ |
---|
739 | n/a | 0, /* tp_call */ |
---|
740 | n/a | 0, /* tp_str */ |
---|
741 | n/a | 0, /* tp_getattro */ |
---|
742 | n/a | 0, /* tp_setattro */ |
---|
743 | n/a | 0, /* tp_as_buffer */ |
---|
744 | n/a | Py_TPFLAGS_DEFAULT, /* tp_flags */ |
---|
745 | n/a | _bz2_BZ2Decompressor___init____doc__, /* tp_doc */ |
---|
746 | n/a | 0, /* tp_traverse */ |
---|
747 | n/a | 0, /* tp_clear */ |
---|
748 | n/a | 0, /* tp_richcompare */ |
---|
749 | n/a | 0, /* tp_weaklistoffset */ |
---|
750 | n/a | 0, /* tp_iter */ |
---|
751 | n/a | 0, /* tp_iternext */ |
---|
752 | n/a | BZ2Decompressor_methods, /* tp_methods */ |
---|
753 | n/a | BZ2Decompressor_members, /* tp_members */ |
---|
754 | n/a | 0, /* tp_getset */ |
---|
755 | n/a | 0, /* tp_base */ |
---|
756 | n/a | 0, /* tp_dict */ |
---|
757 | n/a | 0, /* tp_descr_get */ |
---|
758 | n/a | 0, /* tp_descr_set */ |
---|
759 | n/a | 0, /* tp_dictoffset */ |
---|
760 | n/a | _bz2_BZ2Decompressor___init__, /* tp_init */ |
---|
761 | n/a | 0, /* tp_alloc */ |
---|
762 | n/a | PyType_GenericNew, /* tp_new */ |
---|
763 | n/a | }; |
---|
764 | n/a | |
---|
765 | n/a | |
---|
766 | n/a | /* Module initialization. */ |
---|
767 | n/a | |
---|
768 | n/a | static struct PyModuleDef _bz2module = { |
---|
769 | n/a | PyModuleDef_HEAD_INIT, |
---|
770 | n/a | "_bz2", |
---|
771 | n/a | NULL, |
---|
772 | n/a | -1, |
---|
773 | n/a | NULL, |
---|
774 | n/a | NULL, |
---|
775 | n/a | NULL, |
---|
776 | n/a | NULL, |
---|
777 | n/a | NULL |
---|
778 | n/a | }; |
---|
779 | n/a | |
---|
780 | n/a | PyMODINIT_FUNC |
---|
781 | n/a | PyInit__bz2(void) |
---|
782 | n/a | { |
---|
783 | n/a | PyObject *m; |
---|
784 | n/a | |
---|
785 | n/a | if (PyType_Ready(&BZ2Compressor_Type) < 0) |
---|
786 | n/a | return NULL; |
---|
787 | n/a | if (PyType_Ready(&BZ2Decompressor_Type) < 0) |
---|
788 | n/a | return NULL; |
---|
789 | n/a | |
---|
790 | n/a | m = PyModule_Create(&_bz2module); |
---|
791 | n/a | if (m == NULL) |
---|
792 | n/a | return NULL; |
---|
793 | n/a | |
---|
794 | n/a | Py_INCREF(&BZ2Compressor_Type); |
---|
795 | n/a | PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type); |
---|
796 | n/a | |
---|
797 | n/a | Py_INCREF(&BZ2Decompressor_Type); |
---|
798 | n/a | PyModule_AddObject(m, "BZ2Decompressor", |
---|
799 | n/a | (PyObject *)&BZ2Decompressor_Type); |
---|
800 | n/a | |
---|
801 | n/a | return m; |
---|
802 | n/a | } |
---|