Python code coverage for Lib/lzma.py

#	count	content
1	n/a	"""Interface to the liblzma compression library.
2	n/a
3	n/a	This module provides a class for reading and writing compressed files,
4	n/a	classes for incremental (de)compression, and convenience functions for
5	n/a	one-shot (de)compression.
6	n/a
7	n/a	These classes and functions support both the XZ and legacy LZMA
8	n/a	container formats, as well as raw compressed data streams.
9	n/a	"""
10	n/a
11	n/a	__all__ = [
12	n/a	"CHECK_NONE", "CHECK_CRC32", "CHECK_CRC64", "CHECK_SHA256",
13	n/a	"CHECK_ID_MAX", "CHECK_UNKNOWN",
14	n/a	"FILTER_LZMA1", "FILTER_LZMA2", "FILTER_DELTA", "FILTER_X86", "FILTER_IA64",
15	n/a	"FILTER_ARM", "FILTER_ARMTHUMB", "FILTER_POWERPC", "FILTER_SPARC",
16	n/a	"FORMAT_AUTO", "FORMAT_XZ", "FORMAT_ALONE", "FORMAT_RAW",
17	n/a	"MF_HC3", "MF_HC4", "MF_BT2", "MF_BT3", "MF_BT4",
18	n/a	"MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME",
19	n/a
20	n/a	"LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError",
21	n/a	"open", "compress", "decompress", "is_check_supported",
22	n/a	]
23	n/a
24	n/a	import builtins
25	n/a	import io
26	n/a	import os
27	n/a	from _lzma import *
28	n/a	from _lzma import _encode_filter_properties, _decode_filter_properties
29	n/a	import _compression
30	n/a
31	n/a
32	n/a	_MODE_CLOSED = 0
33	n/a	_MODE_READ = 1
34	n/a	# Value 2 no longer used
35	n/a	_MODE_WRITE = 3
36	n/a
37	n/a
38	n/a	class LZMAFile(_compression.BaseStream):
39	n/a
40	n/a	"""A file object providing transparent LZMA (de)compression.
41	n/a
42	n/a	An LZMAFile can act as a wrapper for an existing file object, or
43	n/a	refer directly to a named file on disk.
44	n/a
45	n/a	Note that LZMAFile provides a binary file interface - data read
46	n/a	is returned as bytes, and data to be written must be given as bytes.
47	n/a	"""
48	n/a
49	n/a	def __init__(self, filename=None, mode="r", *,
50	n/a	format=None, check=-1, preset=None, filters=None):
51	n/a	"""Open an LZMA-compressed file in binary mode.
52	n/a
53	n/a	filename can be either an actual file name (given as a str,
54	n/a	bytes, or PathLike object), in which case the named file is
55	n/a	opened, or it can be an existing file object to read from or
56	n/a	write to.
57	n/a
58	n/a	mode can be "r" for reading (default), "w" for (over)writing,
59	n/a	"x" for creating exclusively, or "a" for appending. These can
60	n/a	equivalently be given as "rb", "wb", "xb" and "ab" respectively.
61	n/a
62	n/a	format specifies the container format to use for the file.
63	n/a	If mode is "r", this defaults to FORMAT_AUTO. Otherwise, the
64	n/a	default is FORMAT_XZ.
65	n/a
66	n/a	check specifies the integrity check to use. This argument can
67	n/a	only be used when opening a file for writing. For FORMAT_XZ,
68	n/a	the default is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not
69	n/a	support integrity checks - for these formats, check must be
70	n/a	omitted, or be CHECK_NONE.
71	n/a
72	n/a	When opening a file for reading, the preset argument is not
73	n/a	meaningful, and should be omitted. The filters argument should
74	n/a	also be omitted, except when format is FORMAT_RAW (in which case
75	n/a	it is required).
76	n/a
77	n/a	When opening a file for writing, the settings used by the
78	n/a	compressor can be specified either as a preset compression
79	n/a	level (with the preset argument), or in detail as a custom
80	n/a	filter chain (with the filters argument). For FORMAT_XZ and
81	n/a	FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
82	n/a	level. For FORMAT_RAW, the caller must always specify a filter
83	n/a	chain; the raw compressor does not support preset compression
84	n/a	levels.
85	n/a
86	n/a	preset (if provided) should be an integer in the range 0-9,
87	n/a	optionally OR-ed with the constant PRESET_EXTREME.
88	n/a
89	n/a	filters (if provided) should be a sequence of dicts. Each dict
90	n/a	should have an entry for "id" indicating ID of the filter, plus
91	n/a	additional entries for options to the filter.
92	n/a	"""
93	n/a	self._fp = None
94	n/a	self._closefp = False
95	n/a	self._mode = _MODE_CLOSED
96	n/a
97	n/a	if mode in ("r", "rb"):
98	n/a	if check != -1:
99	n/a	raise ValueError("Cannot specify an integrity check "
100	n/a	"when opening a file for reading")
101	n/a	if preset is not None:
102	n/a	raise ValueError("Cannot specify a preset compression "
103	n/a	"level when opening a file for reading")
104	n/a	if format is None:
105	n/a	format = FORMAT_AUTO
106	n/a	mode_code = _MODE_READ
107	n/a	elif mode in ("w", "wb", "a", "ab", "x", "xb"):
108	n/a	if format is None:
109	n/a	format = FORMAT_XZ
110	n/a	mode_code = _MODE_WRITE
111	n/a	self._compressor = LZMACompressor(format=format, check=check,
112	n/a	preset=preset, filters=filters)
113	n/a	self._pos = 0
114	n/a	else:
115	n/a	raise ValueError("Invalid mode: {!r}".format(mode))
116	n/a
117	n/a	if isinstance(filename, (str, bytes, os.PathLike)):
118	n/a	if "b" not in mode:
119	n/a	mode += "b"
120	n/a	self._fp = builtins.open(filename, mode)
121	n/a	self._closefp = True
122	n/a	self._mode = mode_code
123	n/a	elif hasattr(filename, "read") or hasattr(filename, "write"):
124	n/a	self._fp = filename
125	n/a	self._mode = mode_code
126	n/a	else:
127	n/a	raise TypeError("filename must be a str, bytes, file or PathLike object")
128	n/a
129	n/a	if self._mode == _MODE_READ:
130	n/a	raw = _compression.DecompressReader(self._fp, LZMADecompressor,
131	n/a	trailing_error=LZMAError, format=format, filters=filters)
132	n/a	self._buffer = io.BufferedReader(raw)
133	n/a
134	n/a	def close(self):
135	n/a	"""Flush and close the file.
136	n/a
137	n/a	May be called more than once without error. Once the file is
138	n/a	closed, any other operation on it will raise a ValueError.
139	n/a	"""
140	n/a	if self._mode == _MODE_CLOSED:
141	n/a	return
142	n/a	try:
143	n/a	if self._mode == _MODE_READ:
144	n/a	self._buffer.close()
145	n/a	self._buffer = None
146	n/a	elif self._mode == _MODE_WRITE:
147	n/a	self._fp.write(self._compressor.flush())
148	n/a	self._compressor = None
149	n/a	finally:
150	n/a	try:
151	n/a	if self._closefp:
152	n/a	self._fp.close()
153	n/a	finally:
154	n/a	self._fp = None
155	n/a	self._closefp = False
156	n/a	self._mode = _MODE_CLOSED
157	n/a
158	n/a	@property
159	n/a	def closed(self):
160	n/a	"""True if this file is closed."""
161	n/a	return self._mode == _MODE_CLOSED
162	n/a
163	n/a	def fileno(self):
164	n/a	"""Return the file descriptor for the underlying file."""
165	n/a	self._check_not_closed()
166	n/a	return self._fp.fileno()
167	n/a
168	n/a	def seekable(self):
169	n/a	"""Return whether the file supports seeking."""
170	n/a	return self.readable() and self._buffer.seekable()
171	n/a
172	n/a	def readable(self):
173	n/a	"""Return whether the file was opened for reading."""
174	n/a	self._check_not_closed()
175	n/a	return self._mode == _MODE_READ
176	n/a
177	n/a	def writable(self):
178	n/a	"""Return whether the file was opened for writing."""
179	n/a	self._check_not_closed()
180	n/a	return self._mode == _MODE_WRITE
181	n/a
182	n/a	def peek(self, size=-1):
183	n/a	"""Return buffered data without advancing the file position.
184	n/a
185	n/a	Always returns at least one byte of data, unless at EOF.
186	n/a	The exact number of bytes returned is unspecified.
187	n/a	"""
188	n/a	self._check_can_read()
189	n/a	# Relies on the undocumented fact that BufferedReader.peek() always
190	n/a	# returns at least one byte (except at EOF)
191	n/a	return self._buffer.peek(size)
192	n/a
193	n/a	def read(self, size=-1):
194	n/a	"""Read up to size uncompressed bytes from the file.
195	n/a
196	n/a	If size is negative or omitted, read until EOF is reached.
197	n/a	Returns b"" if the file is already at EOF.
198	n/a	"""
199	n/a	self._check_can_read()
200	n/a	return self._buffer.read(size)
201	n/a
202	n/a	def read1(self, size=-1):
203	n/a	"""Read up to size uncompressed bytes, while trying to avoid
204	n/a	making multiple reads from the underlying stream. Reads up to a
205	n/a	buffer's worth of data if size is negative.
206	n/a
207	n/a	Returns b"" if the file is at EOF.
208	n/a	"""
209	n/a	self._check_can_read()
210	n/a	if size < 0:
211	n/a	size = io.DEFAULT_BUFFER_SIZE
212	n/a	return self._buffer.read1(size)
213	n/a
214	n/a	def readline(self, size=-1):
215	n/a	"""Read a line of uncompressed bytes from the file.
216	n/a
217	n/a	The terminating newline (if present) is retained. If size is
218	n/a	non-negative, no more than size bytes will be read (in which
219	n/a	case the line may be incomplete). Returns b'' if already at EOF.
220	n/a	"""
221	n/a	self._check_can_read()
222	n/a	return self._buffer.readline(size)
223	n/a
224	n/a	def write(self, data):
225	n/a	"""Write a bytes object to the file.
226	n/a
227	n/a	Returns the number of uncompressed bytes written, which is
228	n/a	always len(data). Note that due to buffering, the file on disk
229	n/a	may not reflect the data written until close() is called.
230	n/a	"""
231	n/a	self._check_can_write()
232	n/a	compressed = self._compressor.compress(data)
233	n/a	self._fp.write(compressed)
234	n/a	self._pos += len(data)
235	n/a	return len(data)
236	n/a
237	n/a	def seek(self, offset, whence=io.SEEK_SET):
238	n/a	"""Change the file position.
239	n/a
240	n/a	The new position is specified by offset, relative to the
241	n/a	position indicated by whence. Possible values for whence are:
242	n/a
243	n/a	0: start of stream (default): offset must not be negative
244	n/a	1: current stream position
245	n/a	2: end of stream; offset must not be positive
246	n/a
247	n/a	Returns the new file position.
248	n/a
249	n/a	Note that seeking is emulated, so depending on the parameters,
250	n/a	this operation may be extremely slow.
251	n/a	"""
252	n/a	self._check_can_seek()
253	n/a	return self._buffer.seek(offset, whence)
254	n/a
255	n/a	def tell(self):
256	n/a	"""Return the current file position."""
257	n/a	self._check_not_closed()
258	n/a	if self._mode == _MODE_READ:
259	n/a	return self._buffer.tell()
260	n/a	return self._pos
261	n/a
262	n/a
263	n/a	def open(filename, mode="rb", *,
264	n/a	format=None, check=-1, preset=None, filters=None,
265	n/a	encoding=None, errors=None, newline=None):
266	n/a	"""Open an LZMA-compressed file in binary or text mode.
267	n/a
268	n/a	filename can be either an actual file name (given as a str, bytes,
269	n/a	or PathLike object), in which case the named file is opened, or it
270	n/a	can be an existing file object to read from or write to.
271	n/a
272	n/a	The mode argument can be "r", "rb" (default), "w", "wb", "x", "xb",
273	n/a	"a", or "ab" for binary mode, or "rt", "wt", "xt", or "at" for text
274	n/a	mode.
275	n/a
276	n/a	The format, check, preset and filters arguments specify the
277	n/a	compression settings, as for LZMACompressor, LZMADecompressor and
278	n/a	LZMAFile.
279	n/a
280	n/a	For binary mode, this function is equivalent to the LZMAFile
281	n/a	constructor: LZMAFile(filename, mode, ...). In this case, the
282	n/a	encoding, errors and newline arguments must not be provided.
283	n/a
284	n/a	For text mode, an LZMAFile object is created, and wrapped in an
285	n/a	io.TextIOWrapper instance with the specified encoding, error
286	n/a	handling behavior, and line ending(s).
287	n/a
288	n/a	"""
289	n/a	if "t" in mode:
290	n/a	if "b" in mode:
291	n/a	raise ValueError("Invalid mode: %r" % (mode,))
292	n/a	else:
293	n/a	if encoding is not None:
294	n/a	raise ValueError("Argument 'encoding' not supported in binary mode")
295	n/a	if errors is not None:
296	n/a	raise ValueError("Argument 'errors' not supported in binary mode")
297	n/a	if newline is not None:
298	n/a	raise ValueError("Argument 'newline' not supported in binary mode")
299	n/a
300	n/a	lz_mode = mode.replace("t", "")
301	n/a	binary_file = LZMAFile(filename, lz_mode, format=format, check=check,
302	n/a	preset=preset, filters=filters)
303	n/a
304	n/a	if "t" in mode:
305	n/a	return io.TextIOWrapper(binary_file, encoding, errors, newline)
306	n/a	else:
307	n/a	return binary_file
308	n/a
309	n/a
310	n/a	def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None):
311	n/a	"""Compress a block of data.
312	n/a
313	n/a	Refer to LZMACompressor's docstring for a description of the
314	n/a	optional arguments format, check, preset and filters.
315	n/a
316	n/a	For incremental compression, use an LZMACompressor instead.
317	n/a	"""
318	n/a	comp = LZMACompressor(format, check, preset, filters)
319	n/a	return comp.compress(data) + comp.flush()
320	n/a
321	n/a
322	n/a	def decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None):
323	n/a	"""Decompress a block of data.
324	n/a
325	n/a	Refer to LZMADecompressor's docstring for a description of the
326	n/a	optional arguments format, check and filters.
327	n/a
328	n/a	For incremental decompression, use an LZMADecompressor instead.
329	n/a	"""
330	n/a	results = []
331	n/a	while True:
332	n/a	decomp = LZMADecompressor(format, memlimit, filters)
333	n/a	try:
334	n/a	res = decomp.decompress(data)
335	n/a	except LZMAError:
336	n/a	if results:
337	n/a	break # Leftover data is not a valid LZMA/XZ stream; ignore it.
338	n/a	else:
339	n/a	raise # Error on the first iteration; bail out.
340	n/a	results.append(res)
341	n/a	if not decomp.eof:
342	n/a	raise LZMAError("Compressed data ended before the "
343	n/a	"end-of-stream marker was reached")
344	n/a	data = decomp.unused_data
345	n/a	if not data:
346	n/a	break
347	n/a	return b"".join(results)