ยปCore Development>Code coverage>Lib/bz2.py

Python code coverage for Lib/bz2.py

#countcontent
1n/a"""Interface to the libbzip2 compression library.
2n/a
3n/aThis module provides a file interface, classes for incremental
4n/a(de)compression, and functions for one-shot (de)compression.
5n/a"""
6n/a
7n/a__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor",
8n/a "open", "compress", "decompress"]
9n/a
10n/a__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"
11n/a
12n/afrom builtins import open as _builtin_open
13n/aimport io
14n/aimport os
15n/aimport warnings
16n/aimport _compression
17n/a
18n/atry:
19n/a from threading import RLock
20n/aexcept ImportError:
21n/a from dummy_threading import RLock
22n/a
23n/afrom _bz2 import BZ2Compressor, BZ2Decompressor
24n/a
25n/a
26n/a_MODE_CLOSED = 0
27n/a_MODE_READ = 1
28n/a# Value 2 no longer used
29n/a_MODE_WRITE = 3
30n/a
31n/a
32n/aclass BZ2File(_compression.BaseStream):
33n/a
34n/a """A file object providing transparent bzip2 (de)compression.
35n/a
36n/a A BZ2File can act as a wrapper for an existing file object, or refer
37n/a directly to a named file on disk.
38n/a
39n/a Note that BZ2File provides a *binary* file interface - data read is
40n/a returned as bytes, and data to be written should be given as bytes.
41n/a """
42n/a
43n/a def __init__(self, filename, mode="r", buffering=None, compresslevel=9):
44n/a """Open a bzip2-compressed file.
45n/a
46n/a If filename is a str, bytes, or PathLike object, it gives the
47n/a name of the file to be opened. Otherwise, it should be a file
48n/a object, which will be used to read or write the compressed data.
49n/a
50n/a mode can be 'r' for reading (default), 'w' for (over)writing,
51n/a 'x' for creating exclusively, or 'a' for appending. These can
52n/a equivalently be given as 'rb', 'wb', 'xb', and 'ab'.
53n/a
54n/a buffering is ignored. Its use is deprecated.
55n/a
56n/a If mode is 'w', 'x' or 'a', compresslevel can be a number between 1
57n/a and 9 specifying the level of compression: 1 produces the least
58n/a compression, and 9 (default) produces the most compression.
59n/a
60n/a If mode is 'r', the input file may be the concatenation of
61n/a multiple compressed streams.
62n/a """
63n/a # This lock must be recursive, so that BufferedIOBase's
64n/a # writelines() does not deadlock.
65n/a self._lock = RLock()
66n/a self._fp = None
67n/a self._closefp = False
68n/a self._mode = _MODE_CLOSED
69n/a
70n/a if buffering is not None:
71n/a warnings.warn("Use of 'buffering' argument is deprecated",
72n/a DeprecationWarning)
73n/a
74n/a if not (1 <= compresslevel <= 9):
75n/a raise ValueError("compresslevel must be between 1 and 9")
76n/a
77n/a if mode in ("", "r", "rb"):
78n/a mode = "rb"
79n/a mode_code = _MODE_READ
80n/a elif mode in ("w", "wb"):
81n/a mode = "wb"
82n/a mode_code = _MODE_WRITE
83n/a self._compressor = BZ2Compressor(compresslevel)
84n/a elif mode in ("x", "xb"):
85n/a mode = "xb"
86n/a mode_code = _MODE_WRITE
87n/a self._compressor = BZ2Compressor(compresslevel)
88n/a elif mode in ("a", "ab"):
89n/a mode = "ab"
90n/a mode_code = _MODE_WRITE
91n/a self._compressor = BZ2Compressor(compresslevel)
92n/a else:
93n/a raise ValueError("Invalid mode: %r" % (mode,))
94n/a
95n/a if isinstance(filename, (str, bytes, os.PathLike)):
96n/a self._fp = _builtin_open(filename, mode)
97n/a self._closefp = True
98n/a self._mode = mode_code
99n/a elif hasattr(filename, "read") or hasattr(filename, "write"):
100n/a self._fp = filename
101n/a self._mode = mode_code
102n/a else:
103n/a raise TypeError("filename must be a str, bytes, file or PathLike object")
104n/a
105n/a if self._mode == _MODE_READ:
106n/a raw = _compression.DecompressReader(self._fp,
107n/a BZ2Decompressor, trailing_error=OSError)
108n/a self._buffer = io.BufferedReader(raw)
109n/a else:
110n/a self._pos = 0
111n/a
112n/a def close(self):
113n/a """Flush and close the file.
114n/a
115n/a May be called more than once without error. Once the file is
116n/a closed, any other operation on it will raise a ValueError.
117n/a """
118n/a with self._lock:
119n/a if self._mode == _MODE_CLOSED:
120n/a return
121n/a try:
122n/a if self._mode == _MODE_READ:
123n/a self._buffer.close()
124n/a elif self._mode == _MODE_WRITE:
125n/a self._fp.write(self._compressor.flush())
126n/a self._compressor = None
127n/a finally:
128n/a try:
129n/a if self._closefp:
130n/a self._fp.close()
131n/a finally:
132n/a self._fp = None
133n/a self._closefp = False
134n/a self._mode = _MODE_CLOSED
135n/a self._buffer = None
136n/a
137n/a @property
138n/a def closed(self):
139n/a """True if this file is closed."""
140n/a return self._mode == _MODE_CLOSED
141n/a
142n/a def fileno(self):
143n/a """Return the file descriptor for the underlying file."""
144n/a self._check_not_closed()
145n/a return self._fp.fileno()
146n/a
147n/a def seekable(self):
148n/a """Return whether the file supports seeking."""
149n/a return self.readable() and self._buffer.seekable()
150n/a
151n/a def readable(self):
152n/a """Return whether the file was opened for reading."""
153n/a self._check_not_closed()
154n/a return self._mode == _MODE_READ
155n/a
156n/a def writable(self):
157n/a """Return whether the file was opened for writing."""
158n/a self._check_not_closed()
159n/a return self._mode == _MODE_WRITE
160n/a
161n/a def peek(self, n=0):
162n/a """Return buffered data without advancing the file position.
163n/a
164n/a Always returns at least one byte of data, unless at EOF.
165n/a The exact number of bytes returned is unspecified.
166n/a """
167n/a with self._lock:
168n/a self._check_can_read()
169n/a # Relies on the undocumented fact that BufferedReader.peek()
170n/a # always returns at least one byte (except at EOF), independent
171n/a # of the value of n
172n/a return self._buffer.peek(n)
173n/a
174n/a def read(self, size=-1):
175n/a """Read up to size uncompressed bytes from the file.
176n/a
177n/a If size is negative or omitted, read until EOF is reached.
178n/a Returns b'' if the file is already at EOF.
179n/a """
180n/a with self._lock:
181n/a self._check_can_read()
182n/a return self._buffer.read(size)
183n/a
184n/a def read1(self, size=-1):
185n/a """Read up to size uncompressed bytes, while trying to avoid
186n/a making multiple reads from the underlying stream. Reads up to a
187n/a buffer's worth of data if size is negative.
188n/a
189n/a Returns b'' if the file is at EOF.
190n/a """
191n/a with self._lock:
192n/a self._check_can_read()
193n/a if size < 0:
194n/a size = io.DEFAULT_BUFFER_SIZE
195n/a return self._buffer.read1(size)
196n/a
197n/a def readinto(self, b):
198n/a """Read bytes into b.
199n/a
200n/a Returns the number of bytes read (0 for EOF).
201n/a """
202n/a with self._lock:
203n/a self._check_can_read()
204n/a return self._buffer.readinto(b)
205n/a
206n/a def readline(self, size=-1):
207n/a """Read a line of uncompressed bytes from the file.
208n/a
209n/a The terminating newline (if present) is retained. If size is
210n/a non-negative, no more than size bytes will be read (in which
211n/a case the line may be incomplete). Returns b'' if already at EOF.
212n/a """
213n/a if not isinstance(size, int):
214n/a if not hasattr(size, "__index__"):
215n/a raise TypeError("Integer argument expected")
216n/a size = size.__index__()
217n/a with self._lock:
218n/a self._check_can_read()
219n/a return self._buffer.readline(size)
220n/a
221n/a def readlines(self, size=-1):
222n/a """Read a list of lines of uncompressed bytes from the file.
223n/a
224n/a size can be specified to control the number of lines read: no
225n/a further lines will be read once the total size of the lines read
226n/a so far equals or exceeds size.
227n/a """
228n/a if not isinstance(size, int):
229n/a if not hasattr(size, "__index__"):
230n/a raise TypeError("Integer argument expected")
231n/a size = size.__index__()
232n/a with self._lock:
233n/a self._check_can_read()
234n/a return self._buffer.readlines(size)
235n/a
236n/a def write(self, data):
237n/a """Write a byte string to the file.
238n/a
239n/a Returns the number of uncompressed bytes written, which is
240n/a always len(data). Note that due to buffering, the file on disk
241n/a may not reflect the data written until close() is called.
242n/a """
243n/a with self._lock:
244n/a self._check_can_write()
245n/a compressed = self._compressor.compress(data)
246n/a self._fp.write(compressed)
247n/a self._pos += len(data)
248n/a return len(data)
249n/a
250n/a def writelines(self, seq):
251n/a """Write a sequence of byte strings to the file.
252n/a
253n/a Returns the number of uncompressed bytes written.
254n/a seq can be any iterable yielding byte strings.
255n/a
256n/a Line separators are not added between the written byte strings.
257n/a """
258n/a with self._lock:
259n/a return _compression.BaseStream.writelines(self, seq)
260n/a
261n/a def seek(self, offset, whence=io.SEEK_SET):
262n/a """Change the file position.
263n/a
264n/a The new position is specified by offset, relative to the
265n/a position indicated by whence. Values for whence are:
266n/a
267n/a 0: start of stream (default); offset must not be negative
268n/a 1: current stream position
269n/a 2: end of stream; offset must not be positive
270n/a
271n/a Returns the new file position.
272n/a
273n/a Note that seeking is emulated, so depending on the parameters,
274n/a this operation may be extremely slow.
275n/a """
276n/a with self._lock:
277n/a self._check_can_seek()
278n/a return self._buffer.seek(offset, whence)
279n/a
280n/a def tell(self):
281n/a """Return the current file position."""
282n/a with self._lock:
283n/a self._check_not_closed()
284n/a if self._mode == _MODE_READ:
285n/a return self._buffer.tell()
286n/a return self._pos
287n/a
288n/a
289n/adef open(filename, mode="rb", compresslevel=9,
290n/a encoding=None, errors=None, newline=None):
291n/a """Open a bzip2-compressed file in binary or text mode.
292n/a
293n/a The filename argument can be an actual filename (a str, bytes, or
294n/a PathLike object), or an existing file object to read from or write
295n/a to.
296n/a
297n/a The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or
298n/a "ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode.
299n/a The default mode is "rb", and the default compresslevel is 9.
300n/a
301n/a For binary mode, this function is equivalent to the BZ2File
302n/a constructor: BZ2File(filename, mode, compresslevel). In this case,
303n/a the encoding, errors and newline arguments must not be provided.
304n/a
305n/a For text mode, a BZ2File object is created, and wrapped in an
306n/a io.TextIOWrapper instance with the specified encoding, error
307n/a handling behavior, and line ending(s).
308n/a
309n/a """
310n/a if "t" in mode:
311n/a if "b" in mode:
312n/a raise ValueError("Invalid mode: %r" % (mode,))
313n/a else:
314n/a if encoding is not None:
315n/a raise ValueError("Argument 'encoding' not supported in binary mode")
316n/a if errors is not None:
317n/a raise ValueError("Argument 'errors' not supported in binary mode")
318n/a if newline is not None:
319n/a raise ValueError("Argument 'newline' not supported in binary mode")
320n/a
321n/a bz_mode = mode.replace("t", "")
322n/a binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel)
323n/a
324n/a if "t" in mode:
325n/a return io.TextIOWrapper(binary_file, encoding, errors, newline)
326n/a else:
327n/a return binary_file
328n/a
329n/a
330n/adef compress(data, compresslevel=9):
331n/a """Compress a block of data.
332n/a
333n/a compresslevel, if given, must be a number between 1 and 9.
334n/a
335n/a For incremental compression, use a BZ2Compressor object instead.
336n/a """
337n/a comp = BZ2Compressor(compresslevel)
338n/a return comp.compress(data) + comp.flush()
339n/a
340n/a
341n/adef decompress(data):
342n/a """Decompress a block of data.
343n/a
344n/a For incremental decompression, use a BZ2Decompressor object instead.
345n/a """
346n/a results = []
347n/a while data:
348n/a decomp = BZ2Decompressor()
349n/a try:
350n/a res = decomp.decompress(data)
351n/a except OSError:
352n/a if results:
353n/a break # Leftover data is not a valid bzip2 stream; ignore it.
354n/a else:
355n/a raise # Error on the first iteration; bail out.
356n/a results.append(res)
357n/a if not decomp.eof:
358n/a raise ValueError("Compressed data ended before the "
359n/a "end-of-stream marker was reached")
360n/a data = decomp.unused_data
361n/a return b"".join(results)