ยปCore Development>Code coverage>Lib/tarfile.py

Python code coverage for Lib/tarfile.py

#countcontent
1n/a#!/usr/bin/env python3
2n/a#-------------------------------------------------------------------
3n/a# tarfile.py
4n/a#-------------------------------------------------------------------
5n/a# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
6n/a# All rights reserved.
7n/a#
8n/a# Permission is hereby granted, free of charge, to any person
9n/a# obtaining a copy of this software and associated documentation
10n/a# files (the "Software"), to deal in the Software without
11n/a# restriction, including without limitation the rights to use,
12n/a# copy, modify, merge, publish, distribute, sublicense, and/or sell
13n/a# copies of the Software, and to permit persons to whom the
14n/a# Software is furnished to do so, subject to the following
15n/a# conditions:
16n/a#
17n/a# The above copyright notice and this permission notice shall be
18n/a# included in all copies or substantial portions of the Software.
19n/a#
20n/a# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21n/a# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22n/a# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23n/a# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24n/a# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25n/a# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26n/a# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27n/a# OTHER DEALINGS IN THE SOFTWARE.
28n/a#
29n/a"""Read from and write to tar format archives.
30n/a"""
31n/a
32n/aversion = "0.9.0"
33n/a__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
34n/a__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
35n/a__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
36n/a__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
37n/a
38n/a#---------
39n/a# Imports
40n/a#---------
41n/afrom builtins import open as bltn_open
42n/aimport sys
43n/aimport os
44n/aimport io
45n/aimport shutil
46n/aimport stat
47n/aimport time
48n/aimport struct
49n/aimport copy
50n/aimport re
51n/a
52n/atry:
53n/a import pwd
54n/aexcept ImportError:
55n/a pwd = None
56n/atry:
57n/a import grp
58n/aexcept ImportError:
59n/a grp = None
60n/a
61n/a# os.symlink on Windows prior to 6.0 raises NotImplementedError
62n/asymlink_exception = (AttributeError, NotImplementedError)
63n/atry:
64n/a # OSError (winerror=1314) will be raised if the caller does not hold the
65n/a # SeCreateSymbolicLinkPrivilege privilege
66n/a symlink_exception += (OSError,)
67n/aexcept NameError:
68n/a pass
69n/a
70n/a# from tarfile import *
71n/a__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
72n/a "CompressionError", "StreamError", "ExtractError", "HeaderError",
73n/a "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
74n/a "DEFAULT_FORMAT", "open"]
75n/a
76n/a#---------------------------------------------------------
77n/a# tar constants
78n/a#---------------------------------------------------------
79n/aNUL = b"\0" # the null character
80n/aBLOCKSIZE = 512 # length of processing blocks
81n/aRECORDSIZE = BLOCKSIZE * 20 # length of records
82n/aGNU_MAGIC = b"ustar \0" # magic gnu tar string
83n/aPOSIX_MAGIC = b"ustar\x0000" # magic posix tar string
84n/a
85n/aLENGTH_NAME = 100 # maximum length of a filename
86n/aLENGTH_LINK = 100 # maximum length of a linkname
87n/aLENGTH_PREFIX = 155 # maximum length of the prefix field
88n/a
89n/aREGTYPE = b"0" # regular file
90n/aAREGTYPE = b"\0" # regular file
91n/aLNKTYPE = b"1" # link (inside tarfile)
92n/aSYMTYPE = b"2" # symbolic link
93n/aCHRTYPE = b"3" # character special device
94n/aBLKTYPE = b"4" # block special device
95n/aDIRTYPE = b"5" # directory
96n/aFIFOTYPE = b"6" # fifo special device
97n/aCONTTYPE = b"7" # contiguous file
98n/a
99n/aGNUTYPE_LONGNAME = b"L" # GNU tar longname
100n/aGNUTYPE_LONGLINK = b"K" # GNU tar longlink
101n/aGNUTYPE_SPARSE = b"S" # GNU tar sparse file
102n/a
103n/aXHDTYPE = b"x" # POSIX.1-2001 extended header
104n/aXGLTYPE = b"g" # POSIX.1-2001 global header
105n/aSOLARIS_XHDTYPE = b"X" # Solaris extended header
106n/a
107n/aUSTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
108n/aGNU_FORMAT = 1 # GNU tar format
109n/aPAX_FORMAT = 2 # POSIX.1-2001 (pax) format
110n/aDEFAULT_FORMAT = GNU_FORMAT
111n/a
112n/a#---------------------------------------------------------
113n/a# tarfile constants
114n/a#---------------------------------------------------------
115n/a# File types that tarfile supports:
116n/aSUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
117n/a SYMTYPE, DIRTYPE, FIFOTYPE,
118n/a CONTTYPE, CHRTYPE, BLKTYPE,
119n/a GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
120n/a GNUTYPE_SPARSE)
121n/a
122n/a# File types that will be treated as a regular file.
123n/aREGULAR_TYPES = (REGTYPE, AREGTYPE,
124n/a CONTTYPE, GNUTYPE_SPARSE)
125n/a
126n/a# File types that are part of the GNU tar format.
127n/aGNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
128n/a GNUTYPE_SPARSE)
129n/a
130n/a# Fields from a pax header that override a TarInfo attribute.
131n/aPAX_FIELDS = ("path", "linkpath", "size", "mtime",
132n/a "uid", "gid", "uname", "gname")
133n/a
134n/a# Fields from a pax header that are affected by hdrcharset.
135n/aPAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
136n/a
137n/a# Fields in a pax header that are numbers, all other fields
138n/a# are treated as strings.
139n/aPAX_NUMBER_FIELDS = {
140n/a "atime": float,
141n/a "ctime": float,
142n/a "mtime": float,
143n/a "uid": int,
144n/a "gid": int,
145n/a "size": int
146n/a}
147n/a
148n/a#---------------------------------------------------------
149n/a# initialization
150n/a#---------------------------------------------------------
151n/aif os.name == "nt":
152n/a ENCODING = "utf-8"
153n/aelse:
154n/a ENCODING = sys.getfilesystemencoding()
155n/a
156n/a#---------------------------------------------------------
157n/a# Some useful functions
158n/a#---------------------------------------------------------
159n/a
160n/adef stn(s, length, encoding, errors):
161n/a """Convert a string to a null-terminated bytes object.
162n/a """
163n/a s = s.encode(encoding, errors)
164n/a return s[:length] + (length - len(s)) * NUL
165n/a
166n/adef nts(s, encoding, errors):
167n/a """Convert a null-terminated bytes object to a string.
168n/a """
169n/a p = s.find(b"\0")
170n/a if p != -1:
171n/a s = s[:p]
172n/a return s.decode(encoding, errors)
173n/a
174n/adef nti(s):
175n/a """Convert a number field to a python number.
176n/a """
177n/a # There are two possible encodings for a number field, see
178n/a # itn() below.
179n/a if s[0] in (0o200, 0o377):
180n/a n = 0
181n/a for i in range(len(s) - 1):
182n/a n <<= 8
183n/a n += s[i + 1]
184n/a if s[0] == 0o377:
185n/a n = -(256 ** (len(s) - 1) - n)
186n/a else:
187n/a try:
188n/a s = nts(s, "ascii", "strict")
189n/a n = int(s.strip() or "0", 8)
190n/a except ValueError:
191n/a raise InvalidHeaderError("invalid header")
192n/a return n
193n/a
194n/adef itn(n, digits=8, format=DEFAULT_FORMAT):
195n/a """Convert a python number to a number field.
196n/a """
197n/a # POSIX 1003.1-1988 requires numbers to be encoded as a string of
198n/a # octal digits followed by a null-byte, this allows values up to
199n/a # (8**(digits-1))-1. GNU tar allows storing numbers greater than
200n/a # that if necessary. A leading 0o200 or 0o377 byte indicate this
201n/a # particular encoding, the following digits-1 bytes are a big-endian
202n/a # base-256 representation. This allows values up to (256**(digits-1))-1.
203n/a # A 0o200 byte indicates a positive number, a 0o377 byte a negative
204n/a # number.
205n/a if 0 <= n < 8 ** (digits - 1):
206n/a s = bytes("%0*o" % (digits - 1, int(n)), "ascii") + NUL
207n/a elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
208n/a if n >= 0:
209n/a s = bytearray([0o200])
210n/a else:
211n/a s = bytearray([0o377])
212n/a n = 256 ** digits + n
213n/a
214n/a for i in range(digits - 1):
215n/a s.insert(1, n & 0o377)
216n/a n >>= 8
217n/a else:
218n/a raise ValueError("overflow in number field")
219n/a
220n/a return s
221n/a
222n/adef calc_chksums(buf):
223n/a """Calculate the checksum for a member's header by summing up all
224n/a characters except for the chksum field which is treated as if
225n/a it was filled with spaces. According to the GNU tar sources,
226n/a some tars (Sun and NeXT) calculate chksum with signed char,
227n/a which will be different if there are chars in the buffer with
228n/a the high bit set. So we calculate two checksums, unsigned and
229n/a signed.
230n/a """
231n/a unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
232n/a signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
233n/a return unsigned_chksum, signed_chksum
234n/a
235n/adef copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
236n/a """Copy length bytes from fileobj src to fileobj dst.
237n/a If length is None, copy the entire content.
238n/a """
239n/a bufsize = bufsize or 16 * 1024
240n/a if length == 0:
241n/a return
242n/a if length is None:
243n/a shutil.copyfileobj(src, dst, bufsize)
244n/a return
245n/a
246n/a blocks, remainder = divmod(length, bufsize)
247n/a for b in range(blocks):
248n/a buf = src.read(bufsize)
249n/a if len(buf) < bufsize:
250n/a raise exception("unexpected end of data")
251n/a dst.write(buf)
252n/a
253n/a if remainder != 0:
254n/a buf = src.read(remainder)
255n/a if len(buf) < remainder:
256n/a raise exception("unexpected end of data")
257n/a dst.write(buf)
258n/a return
259n/a
260n/adef filemode(mode):
261n/a """Deprecated in this location; use stat.filemode."""
262n/a import warnings
263n/a warnings.warn("deprecated in favor of stat.filemode",
264n/a DeprecationWarning, 2)
265n/a return stat.filemode(mode)
266n/a
267n/adef _safe_print(s):
268n/a encoding = getattr(sys.stdout, 'encoding', None)
269n/a if encoding is not None:
270n/a s = s.encode(encoding, 'backslashreplace').decode(encoding)
271n/a print(s, end=' ')
272n/a
273n/a
274n/aclass TarError(Exception):
275n/a """Base exception."""
276n/a pass
277n/aclass ExtractError(TarError):
278n/a """General exception for extract errors."""
279n/a pass
280n/aclass ReadError(TarError):
281n/a """Exception for unreadable tar archives."""
282n/a pass
283n/aclass CompressionError(TarError):
284n/a """Exception for unavailable compression methods."""
285n/a pass
286n/aclass StreamError(TarError):
287n/a """Exception for unsupported operations on stream-like TarFiles."""
288n/a pass
289n/aclass HeaderError(TarError):
290n/a """Base exception for header errors."""
291n/a pass
292n/aclass EmptyHeaderError(HeaderError):
293n/a """Exception for empty headers."""
294n/a pass
295n/aclass TruncatedHeaderError(HeaderError):
296n/a """Exception for truncated headers."""
297n/a pass
298n/aclass EOFHeaderError(HeaderError):
299n/a """Exception for end of file headers."""
300n/a pass
301n/aclass InvalidHeaderError(HeaderError):
302n/a """Exception for invalid headers."""
303n/a pass
304n/aclass SubsequentHeaderError(HeaderError):
305n/a """Exception for missing and invalid extended headers."""
306n/a pass
307n/a
308n/a#---------------------------
309n/a# internal stream interface
310n/a#---------------------------
311n/aclass _LowLevelFile:
312n/a """Low-level file object. Supports reading and writing.
313n/a It is used instead of a regular file object for streaming
314n/a access.
315n/a """
316n/a
317n/a def __init__(self, name, mode):
318n/a mode = {
319n/a "r": os.O_RDONLY,
320n/a "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
321n/a }[mode]
322n/a if hasattr(os, "O_BINARY"):
323n/a mode |= os.O_BINARY
324n/a self.fd = os.open(name, mode, 0o666)
325n/a
326n/a def close(self):
327n/a os.close(self.fd)
328n/a
329n/a def read(self, size):
330n/a return os.read(self.fd, size)
331n/a
332n/a def write(self, s):
333n/a os.write(self.fd, s)
334n/a
335n/aclass _Stream:
336n/a """Class that serves as an adapter between TarFile and
337n/a a stream-like object. The stream-like object only
338n/a needs to have a read() or write() method and is accessed
339n/a blockwise. Use of gzip or bzip2 compression is possible.
340n/a A stream-like object could be for example: sys.stdin,
341n/a sys.stdout, a socket, a tape device etc.
342n/a
343n/a _Stream is intended to be used only internally.
344n/a """
345n/a
346n/a def __init__(self, name, mode, comptype, fileobj, bufsize):
347n/a """Construct a _Stream object.
348n/a """
349n/a self._extfileobj = True
350n/a if fileobj is None:
351n/a fileobj = _LowLevelFile(name, mode)
352n/a self._extfileobj = False
353n/a
354n/a if comptype == '*':
355n/a # Enable transparent compression detection for the
356n/a # stream interface
357n/a fileobj = _StreamProxy(fileobj)
358n/a comptype = fileobj.getcomptype()
359n/a
360n/a self.name = name or ""
361n/a self.mode = mode
362n/a self.comptype = comptype
363n/a self.fileobj = fileobj
364n/a self.bufsize = bufsize
365n/a self.buf = b""
366n/a self.pos = 0
367n/a self.closed = False
368n/a
369n/a try:
370n/a if comptype == "gz":
371n/a try:
372n/a import zlib
373n/a except ImportError:
374n/a raise CompressionError("zlib module is not available")
375n/a self.zlib = zlib
376n/a self.crc = zlib.crc32(b"")
377n/a if mode == "r":
378n/a self._init_read_gz()
379n/a self.exception = zlib.error
380n/a else:
381n/a self._init_write_gz()
382n/a
383n/a elif comptype == "bz2":
384n/a try:
385n/a import bz2
386n/a except ImportError:
387n/a raise CompressionError("bz2 module is not available")
388n/a if mode == "r":
389n/a self.dbuf = b""
390n/a self.cmp = bz2.BZ2Decompressor()
391n/a self.exception = OSError
392n/a else:
393n/a self.cmp = bz2.BZ2Compressor()
394n/a
395n/a elif comptype == "xz":
396n/a try:
397n/a import lzma
398n/a except ImportError:
399n/a raise CompressionError("lzma module is not available")
400n/a if mode == "r":
401n/a self.dbuf = b""
402n/a self.cmp = lzma.LZMADecompressor()
403n/a self.exception = lzma.LZMAError
404n/a else:
405n/a self.cmp = lzma.LZMACompressor()
406n/a
407n/a elif comptype != "tar":
408n/a raise CompressionError("unknown compression type %r" % comptype)
409n/a
410n/a except:
411n/a if not self._extfileobj:
412n/a self.fileobj.close()
413n/a self.closed = True
414n/a raise
415n/a
416n/a def __del__(self):
417n/a if hasattr(self, "closed") and not self.closed:
418n/a self.close()
419n/a
420n/a def _init_write_gz(self):
421n/a """Initialize for writing with gzip compression.
422n/a """
423n/a self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
424n/a -self.zlib.MAX_WBITS,
425n/a self.zlib.DEF_MEM_LEVEL,
426n/a 0)
427n/a timestamp = struct.pack("<L", int(time.time()))
428n/a self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
429n/a if self.name.endswith(".gz"):
430n/a self.name = self.name[:-3]
431n/a # RFC1952 says we must use ISO-8859-1 for the FNAME field.
432n/a self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
433n/a
434n/a def write(self, s):
435n/a """Write string s to the stream.
436n/a """
437n/a if self.comptype == "gz":
438n/a self.crc = self.zlib.crc32(s, self.crc)
439n/a self.pos += len(s)
440n/a if self.comptype != "tar":
441n/a s = self.cmp.compress(s)
442n/a self.__write(s)
443n/a
444n/a def __write(self, s):
445n/a """Write string s to the stream if a whole new block
446n/a is ready to be written.
447n/a """
448n/a self.buf += s
449n/a while len(self.buf) > self.bufsize:
450n/a self.fileobj.write(self.buf[:self.bufsize])
451n/a self.buf = self.buf[self.bufsize:]
452n/a
453n/a def close(self):
454n/a """Close the _Stream object. No operation should be
455n/a done on it afterwards.
456n/a """
457n/a if self.closed:
458n/a return
459n/a
460n/a self.closed = True
461n/a try:
462n/a if self.mode == "w" and self.comptype != "tar":
463n/a self.buf += self.cmp.flush()
464n/a
465n/a if self.mode == "w" and self.buf:
466n/a self.fileobj.write(self.buf)
467n/a self.buf = b""
468n/a if self.comptype == "gz":
469n/a self.fileobj.write(struct.pack("<L", self.crc))
470n/a self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
471n/a finally:
472n/a if not self._extfileobj:
473n/a self.fileobj.close()
474n/a
475n/a def _init_read_gz(self):
476n/a """Initialize for reading a gzip compressed fileobj.
477n/a """
478n/a self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
479n/a self.dbuf = b""
480n/a
481n/a # taken from gzip.GzipFile with some alterations
482n/a if self.__read(2) != b"\037\213":
483n/a raise ReadError("not a gzip file")
484n/a if self.__read(1) != b"\010":
485n/a raise CompressionError("unsupported compression method")
486n/a
487n/a flag = ord(self.__read(1))
488n/a self.__read(6)
489n/a
490n/a if flag & 4:
491n/a xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
492n/a self.read(xlen)
493n/a if flag & 8:
494n/a while True:
495n/a s = self.__read(1)
496n/a if not s or s == NUL:
497n/a break
498n/a if flag & 16:
499n/a while True:
500n/a s = self.__read(1)
501n/a if not s or s == NUL:
502n/a break
503n/a if flag & 2:
504n/a self.__read(2)
505n/a
506n/a def tell(self):
507n/a """Return the stream's file pointer position.
508n/a """
509n/a return self.pos
510n/a
511n/a def seek(self, pos=0):
512n/a """Set the stream's file pointer to pos. Negative seeking
513n/a is forbidden.
514n/a """
515n/a if pos - self.pos >= 0:
516n/a blocks, remainder = divmod(pos - self.pos, self.bufsize)
517n/a for i in range(blocks):
518n/a self.read(self.bufsize)
519n/a self.read(remainder)
520n/a else:
521n/a raise StreamError("seeking backwards is not allowed")
522n/a return self.pos
523n/a
524n/a def read(self, size=None):
525n/a """Return the next size number of bytes from the stream.
526n/a If size is not defined, return all bytes of the stream
527n/a up to EOF.
528n/a """
529n/a if size is None:
530n/a t = []
531n/a while True:
532n/a buf = self._read(self.bufsize)
533n/a if not buf:
534n/a break
535n/a t.append(buf)
536n/a buf = "".join(t)
537n/a else:
538n/a buf = self._read(size)
539n/a self.pos += len(buf)
540n/a return buf
541n/a
542n/a def _read(self, size):
543n/a """Return size bytes from the stream.
544n/a """
545n/a if self.comptype == "tar":
546n/a return self.__read(size)
547n/a
548n/a c = len(self.dbuf)
549n/a while c < size:
550n/a buf = self.__read(self.bufsize)
551n/a if not buf:
552n/a break
553n/a try:
554n/a buf = self.cmp.decompress(buf)
555n/a except self.exception:
556n/a raise ReadError("invalid compressed data")
557n/a self.dbuf += buf
558n/a c += len(buf)
559n/a buf = self.dbuf[:size]
560n/a self.dbuf = self.dbuf[size:]
561n/a return buf
562n/a
563n/a def __read(self, size):
564n/a """Return size bytes from stream. If internal buffer is empty,
565n/a read another block from the stream.
566n/a """
567n/a c = len(self.buf)
568n/a while c < size:
569n/a buf = self.fileobj.read(self.bufsize)
570n/a if not buf:
571n/a break
572n/a self.buf += buf
573n/a c += len(buf)
574n/a buf = self.buf[:size]
575n/a self.buf = self.buf[size:]
576n/a return buf
577n/a# class _Stream
578n/a
579n/aclass _StreamProxy(object):
580n/a """Small proxy class that enables transparent compression
581n/a detection for the Stream interface (mode 'r|*').
582n/a """
583n/a
584n/a def __init__(self, fileobj):
585n/a self.fileobj = fileobj
586n/a self.buf = self.fileobj.read(BLOCKSIZE)
587n/a
588n/a def read(self, size):
589n/a self.read = self.fileobj.read
590n/a return self.buf
591n/a
592n/a def getcomptype(self):
593n/a if self.buf.startswith(b"\x1f\x8b\x08"):
594n/a return "gz"
595n/a elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
596n/a return "bz2"
597n/a elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
598n/a return "xz"
599n/a else:
600n/a return "tar"
601n/a
602n/a def close(self):
603n/a self.fileobj.close()
604n/a# class StreamProxy
605n/a
606n/a#------------------------
607n/a# Extraction file object
608n/a#------------------------
609n/aclass _FileInFile(object):
610n/a """A thin wrapper around an existing file object that
611n/a provides a part of its data as an individual file
612n/a object.
613n/a """
614n/a
615n/a def __init__(self, fileobj, offset, size, blockinfo=None):
616n/a self.fileobj = fileobj
617n/a self.offset = offset
618n/a self.size = size
619n/a self.position = 0
620n/a self.name = getattr(fileobj, "name", None)
621n/a self.closed = False
622n/a
623n/a if blockinfo is None:
624n/a blockinfo = [(0, size)]
625n/a
626n/a # Construct a map with data and zero blocks.
627n/a self.map_index = 0
628n/a self.map = []
629n/a lastpos = 0
630n/a realpos = self.offset
631n/a for offset, size in blockinfo:
632n/a if offset > lastpos:
633n/a self.map.append((False, lastpos, offset, None))
634n/a self.map.append((True, offset, offset + size, realpos))
635n/a realpos += size
636n/a lastpos = offset + size
637n/a if lastpos < self.size:
638n/a self.map.append((False, lastpos, self.size, None))
639n/a
640n/a def flush(self):
641n/a pass
642n/a
643n/a def readable(self):
644n/a return True
645n/a
646n/a def writable(self):
647n/a return False
648n/a
649n/a def seekable(self):
650n/a return self.fileobj.seekable()
651n/a
652n/a def tell(self):
653n/a """Return the current file position.
654n/a """
655n/a return self.position
656n/a
657n/a def seek(self, position, whence=io.SEEK_SET):
658n/a """Seek to a position in the file.
659n/a """
660n/a if whence == io.SEEK_SET:
661n/a self.position = min(max(position, 0), self.size)
662n/a elif whence == io.SEEK_CUR:
663n/a if position < 0:
664n/a self.position = max(self.position + position, 0)
665n/a else:
666n/a self.position = min(self.position + position, self.size)
667n/a elif whence == io.SEEK_END:
668n/a self.position = max(min(self.size + position, self.size), 0)
669n/a else:
670n/a raise ValueError("Invalid argument")
671n/a return self.position
672n/a
673n/a def read(self, size=None):
674n/a """Read data from the file.
675n/a """
676n/a if size is None:
677n/a size = self.size - self.position
678n/a else:
679n/a size = min(size, self.size - self.position)
680n/a
681n/a buf = b""
682n/a while size > 0:
683n/a while True:
684n/a data, start, stop, offset = self.map[self.map_index]
685n/a if start <= self.position < stop:
686n/a break
687n/a else:
688n/a self.map_index += 1
689n/a if self.map_index == len(self.map):
690n/a self.map_index = 0
691n/a length = min(size, stop - self.position)
692n/a if data:
693n/a self.fileobj.seek(offset + (self.position - start))
694n/a b = self.fileobj.read(length)
695n/a if len(b) != length:
696n/a raise ReadError("unexpected end of data")
697n/a buf += b
698n/a else:
699n/a buf += NUL * length
700n/a size -= length
701n/a self.position += length
702n/a return buf
703n/a
704n/a def readinto(self, b):
705n/a buf = self.read(len(b))
706n/a b[:len(buf)] = buf
707n/a return len(buf)
708n/a
709n/a def close(self):
710n/a self.closed = True
711n/a#class _FileInFile
712n/a
713n/aclass ExFileObject(io.BufferedReader):
714n/a
715n/a def __init__(self, tarfile, tarinfo):
716n/a fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
717n/a tarinfo.size, tarinfo.sparse)
718n/a super().__init__(fileobj)
719n/a#class ExFileObject
720n/a
721n/a#------------------
722n/a# Exported Classes
723n/a#------------------
724n/aclass TarInfo(object):
725n/a """Informational class which holds the details about an
726n/a archive member given by a tar header block.
727n/a TarInfo objects are returned by TarFile.getmember(),
728n/a TarFile.getmembers() and TarFile.gettarinfo() and are
729n/a usually created internally.
730n/a """
731n/a
732n/a __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
733n/a "chksum", "type", "linkname", "uname", "gname",
734n/a "devmajor", "devminor",
735n/a "offset", "offset_data", "pax_headers", "sparse",
736n/a "tarfile", "_sparse_structs", "_link_target")
737n/a
738n/a def __init__(self, name=""):
739n/a """Construct a TarInfo object. name is the optional name
740n/a of the member.
741n/a """
742n/a self.name = name # member name
743n/a self.mode = 0o644 # file permissions
744n/a self.uid = 0 # user id
745n/a self.gid = 0 # group id
746n/a self.size = 0 # file size
747n/a self.mtime = 0 # modification time
748n/a self.chksum = 0 # header checksum
749n/a self.type = REGTYPE # member type
750n/a self.linkname = "" # link name
751n/a self.uname = "" # user name
752n/a self.gname = "" # group name
753n/a self.devmajor = 0 # device major number
754n/a self.devminor = 0 # device minor number
755n/a
756n/a self.offset = 0 # the tar header starts here
757n/a self.offset_data = 0 # the file's data starts here
758n/a
759n/a self.sparse = None # sparse member information
760n/a self.pax_headers = {} # pax header information
761n/a
762n/a # In pax headers the "name" and "linkname" field are called
763n/a # "path" and "linkpath".
764n/a def _getpath(self):
765n/a return self.name
766n/a def _setpath(self, name):
767n/a self.name = name
768n/a path = property(_getpath, _setpath)
769n/a
770n/a def _getlinkpath(self):
771n/a return self.linkname
772n/a def _setlinkpath(self, linkname):
773n/a self.linkname = linkname
774n/a linkpath = property(_getlinkpath, _setlinkpath)
775n/a
776n/a def __repr__(self):
777n/a return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
778n/a
779n/a def get_info(self):
780n/a """Return the TarInfo's attributes as a dictionary.
781n/a """
782n/a info = {
783n/a "name": self.name,
784n/a "mode": self.mode & 0o7777,
785n/a "uid": self.uid,
786n/a "gid": self.gid,
787n/a "size": self.size,
788n/a "mtime": self.mtime,
789n/a "chksum": self.chksum,
790n/a "type": self.type,
791n/a "linkname": self.linkname,
792n/a "uname": self.uname,
793n/a "gname": self.gname,
794n/a "devmajor": self.devmajor,
795n/a "devminor": self.devminor
796n/a }
797n/a
798n/a if info["type"] == DIRTYPE and not info["name"].endswith("/"):
799n/a info["name"] += "/"
800n/a
801n/a return info
802n/a
803n/a def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
804n/a """Return a tar header as a string of 512 byte blocks.
805n/a """
806n/a info = self.get_info()
807n/a
808n/a if format == USTAR_FORMAT:
809n/a return self.create_ustar_header(info, encoding, errors)
810n/a elif format == GNU_FORMAT:
811n/a return self.create_gnu_header(info, encoding, errors)
812n/a elif format == PAX_FORMAT:
813n/a return self.create_pax_header(info, encoding)
814n/a else:
815n/a raise ValueError("invalid format")
816n/a
817n/a def create_ustar_header(self, info, encoding, errors):
818n/a """Return the object as a ustar header block.
819n/a """
820n/a info["magic"] = POSIX_MAGIC
821n/a
822n/a if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
823n/a raise ValueError("linkname is too long")
824n/a
825n/a if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
826n/a info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
827n/a
828n/a return self._create_header(info, USTAR_FORMAT, encoding, errors)
829n/a
830n/a def create_gnu_header(self, info, encoding, errors):
831n/a """Return the object as a GNU header block sequence.
832n/a """
833n/a info["magic"] = GNU_MAGIC
834n/a
835n/a buf = b""
836n/a if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
837n/a buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
838n/a
839n/a if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
840n/a buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
841n/a
842n/a return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
843n/a
844n/a def create_pax_header(self, info, encoding):
845n/a """Return the object as a ustar header block. If it cannot be
846n/a represented this way, prepend a pax extended header sequence
847n/a with supplement information.
848n/a """
849n/a info["magic"] = POSIX_MAGIC
850n/a pax_headers = self.pax_headers.copy()
851n/a
852n/a # Test string fields for values that exceed the field length or cannot
853n/a # be represented in ASCII encoding.
854n/a for name, hname, length in (
855n/a ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
856n/a ("uname", "uname", 32), ("gname", "gname", 32)):
857n/a
858n/a if hname in pax_headers:
859n/a # The pax header has priority.
860n/a continue
861n/a
862n/a # Try to encode the string as ASCII.
863n/a try:
864n/a info[name].encode("ascii", "strict")
865n/a except UnicodeEncodeError:
866n/a pax_headers[hname] = info[name]
867n/a continue
868n/a
869n/a if len(info[name]) > length:
870n/a pax_headers[hname] = info[name]
871n/a
872n/a # Test number fields for values that exceed the field limit or values
873n/a # that like to be stored as float.
874n/a for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
875n/a if name in pax_headers:
876n/a # The pax header has priority. Avoid overflow.
877n/a info[name] = 0
878n/a continue
879n/a
880n/a val = info[name]
881n/a if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
882n/a pax_headers[name] = str(val)
883n/a info[name] = 0
884n/a
885n/a # Create a pax extended header if necessary.
886n/a if pax_headers:
887n/a buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
888n/a else:
889n/a buf = b""
890n/a
891n/a return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
892n/a
893n/a @classmethod
894n/a def create_pax_global_header(cls, pax_headers):
895n/a """Return the object as a pax global header block sequence.
896n/a """
897n/a return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
898n/a
899n/a def _posix_split_name(self, name, encoding, errors):
900n/a """Split a name longer than 100 chars into a prefix
901n/a and a name part.
902n/a """
903n/a components = name.split("/")
904n/a for i in range(1, len(components)):
905n/a prefix = "/".join(components[:i])
906n/a name = "/".join(components[i:])
907n/a if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
908n/a len(name.encode(encoding, errors)) <= LENGTH_NAME:
909n/a break
910n/a else:
911n/a raise ValueError("name is too long")
912n/a
913n/a return prefix, name
914n/a
915n/a @staticmethod
916n/a def _create_header(info, format, encoding, errors):
917n/a """Return a header block. info is a dictionary with file
918n/a information, format must be one of the *_FORMAT constants.
919n/a """
920n/a parts = [
921n/a stn(info.get("name", ""), 100, encoding, errors),
922n/a itn(info.get("mode", 0) & 0o7777, 8, format),
923n/a itn(info.get("uid", 0), 8, format),
924n/a itn(info.get("gid", 0), 8, format),
925n/a itn(info.get("size", 0), 12, format),
926n/a itn(info.get("mtime", 0), 12, format),
927n/a b" ", # checksum field
928n/a info.get("type", REGTYPE),
929n/a stn(info.get("linkname", ""), 100, encoding, errors),
930n/a info.get("magic", POSIX_MAGIC),
931n/a stn(info.get("uname", ""), 32, encoding, errors),
932n/a stn(info.get("gname", ""), 32, encoding, errors),
933n/a itn(info.get("devmajor", 0), 8, format),
934n/a itn(info.get("devminor", 0), 8, format),
935n/a stn(info.get("prefix", ""), 155, encoding, errors)
936n/a ]
937n/a
938n/a buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
939n/a chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
940n/a buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
941n/a return buf
942n/a
943n/a @staticmethod
944n/a def _create_payload(payload):
945n/a """Return the string payload filled with zero bytes
946n/a up to the next 512 byte border.
947n/a """
948n/a blocks, remainder = divmod(len(payload), BLOCKSIZE)
949n/a if remainder > 0:
950n/a payload += (BLOCKSIZE - remainder) * NUL
951n/a return payload
952n/a
953n/a @classmethod
954n/a def _create_gnu_long_header(cls, name, type, encoding, errors):
955n/a """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
956n/a for name.
957n/a """
958n/a name = name.encode(encoding, errors) + NUL
959n/a
960n/a info = {}
961n/a info["name"] = "././@LongLink"
962n/a info["type"] = type
963n/a info["size"] = len(name)
964n/a info["magic"] = GNU_MAGIC
965n/a
966n/a # create extended header + name blocks.
967n/a return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
968n/a cls._create_payload(name)
969n/a
970n/a @classmethod
971n/a def _create_pax_generic_header(cls, pax_headers, type, encoding):
972n/a """Return a POSIX.1-2008 extended or global header sequence
973n/a that contains a list of keyword, value pairs. The values
974n/a must be strings.
975n/a """
976n/a # Check if one of the fields contains surrogate characters and thereby
977n/a # forces hdrcharset=BINARY, see _proc_pax() for more information.
978n/a binary = False
979n/a for keyword, value in pax_headers.items():
980n/a try:
981n/a value.encode("utf-8", "strict")
982n/a except UnicodeEncodeError:
983n/a binary = True
984n/a break
985n/a
986n/a records = b""
987n/a if binary:
988n/a # Put the hdrcharset field at the beginning of the header.
989n/a records += b"21 hdrcharset=BINARY\n"
990n/a
991n/a for keyword, value in pax_headers.items():
992n/a keyword = keyword.encode("utf-8")
993n/a if binary:
994n/a # Try to restore the original byte representation of `value'.
995n/a # Needless to say, that the encoding must match the string.
996n/a value = value.encode(encoding, "surrogateescape")
997n/a else:
998n/a value = value.encode("utf-8")
999n/a
1000n/a l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1001n/a n = p = 0
1002n/a while True:
1003n/a n = l + len(str(p))
1004n/a if n == p:
1005n/a break
1006n/a p = n
1007n/a records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
1008n/a
1009n/a # We use a hardcoded "././@PaxHeader" name like star does
1010n/a # instead of the one that POSIX recommends.
1011n/a info = {}
1012n/a info["name"] = "././@PaxHeader"
1013n/a info["type"] = type
1014n/a info["size"] = len(records)
1015n/a info["magic"] = POSIX_MAGIC
1016n/a
1017n/a # Create pax header + record blocks.
1018n/a return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
1019n/a cls._create_payload(records)
1020n/a
1021n/a @classmethod
1022n/a def frombuf(cls, buf, encoding, errors):
1023n/a """Construct a TarInfo object from a 512 byte bytes object.
1024n/a """
1025n/a if len(buf) == 0:
1026n/a raise EmptyHeaderError("empty header")
1027n/a if len(buf) != BLOCKSIZE:
1028n/a raise TruncatedHeaderError("truncated header")
1029n/a if buf.count(NUL) == BLOCKSIZE:
1030n/a raise EOFHeaderError("end of file header")
1031n/a
1032n/a chksum = nti(buf[148:156])
1033n/a if chksum not in calc_chksums(buf):
1034n/a raise InvalidHeaderError("bad checksum")
1035n/a
1036n/a obj = cls()
1037n/a obj.name = nts(buf[0:100], encoding, errors)
1038n/a obj.mode = nti(buf[100:108])
1039n/a obj.uid = nti(buf[108:116])
1040n/a obj.gid = nti(buf[116:124])
1041n/a obj.size = nti(buf[124:136])
1042n/a obj.mtime = nti(buf[136:148])
1043n/a obj.chksum = chksum
1044n/a obj.type = buf[156:157]
1045n/a obj.linkname = nts(buf[157:257], encoding, errors)
1046n/a obj.uname = nts(buf[265:297], encoding, errors)
1047n/a obj.gname = nts(buf[297:329], encoding, errors)
1048n/a obj.devmajor = nti(buf[329:337])
1049n/a obj.devminor = nti(buf[337:345])
1050n/a prefix = nts(buf[345:500], encoding, errors)
1051n/a
1052n/a # Old V7 tar format represents a directory as a regular
1053n/a # file with a trailing slash.
1054n/a if obj.type == AREGTYPE and obj.name.endswith("/"):
1055n/a obj.type = DIRTYPE
1056n/a
1057n/a # The old GNU sparse format occupies some of the unused
1058n/a # space in the buffer for up to 4 sparse structures.
1059n/a # Save the them for later processing in _proc_sparse().
1060n/a if obj.type == GNUTYPE_SPARSE:
1061n/a pos = 386
1062n/a structs = []
1063n/a for i in range(4):
1064n/a try:
1065n/a offset = nti(buf[pos:pos + 12])
1066n/a numbytes = nti(buf[pos + 12:pos + 24])
1067n/a except ValueError:
1068n/a break
1069n/a structs.append((offset, numbytes))
1070n/a pos += 24
1071n/a isextended = bool(buf[482])
1072n/a origsize = nti(buf[483:495])
1073n/a obj._sparse_structs = (structs, isextended, origsize)
1074n/a
1075n/a # Remove redundant slashes from directories.
1076n/a if obj.isdir():
1077n/a obj.name = obj.name.rstrip("/")
1078n/a
1079n/a # Reconstruct a ustar longname.
1080n/a if prefix and obj.type not in GNU_TYPES:
1081n/a obj.name = prefix + "/" + obj.name
1082n/a return obj
1083n/a
1084n/a @classmethod
1085n/a def fromtarfile(cls, tarfile):
1086n/a """Return the next TarInfo object from TarFile object
1087n/a tarfile.
1088n/a """
1089n/a buf = tarfile.fileobj.read(BLOCKSIZE)
1090n/a obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1091n/a obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1092n/a return obj._proc_member(tarfile)
1093n/a
1094n/a #--------------------------------------------------------------------------
1095n/a # The following are methods that are called depending on the type of a
1096n/a # member. The entry point is _proc_member() which can be overridden in a
1097n/a # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1098n/a # implement the following
1099n/a # operations:
1100n/a # 1. Set self.offset_data to the position where the data blocks begin,
1101n/a # if there is data that follows.
1102n/a # 2. Set tarfile.offset to the position where the next member's header will
1103n/a # begin.
1104n/a # 3. Return self or another valid TarInfo object.
1105n/a def _proc_member(self, tarfile):
1106n/a """Choose the right processing method depending on
1107n/a the type and call it.
1108n/a """
1109n/a if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1110n/a return self._proc_gnulong(tarfile)
1111n/a elif self.type == GNUTYPE_SPARSE:
1112n/a return self._proc_sparse(tarfile)
1113n/a elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1114n/a return self._proc_pax(tarfile)
1115n/a else:
1116n/a return self._proc_builtin(tarfile)
1117n/a
1118n/a def _proc_builtin(self, tarfile):
1119n/a """Process a builtin type or an unknown type which
1120n/a will be treated as a regular file.
1121n/a """
1122n/a self.offset_data = tarfile.fileobj.tell()
1123n/a offset = self.offset_data
1124n/a if self.isreg() or self.type not in SUPPORTED_TYPES:
1125n/a # Skip the following data blocks.
1126n/a offset += self._block(self.size)
1127n/a tarfile.offset = offset
1128n/a
1129n/a # Patch the TarInfo object with saved global
1130n/a # header information.
1131n/a self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1132n/a
1133n/a return self
1134n/a
1135n/a def _proc_gnulong(self, tarfile):
1136n/a """Process the blocks that hold a GNU longname
1137n/a or longlink member.
1138n/a """
1139n/a buf = tarfile.fileobj.read(self._block(self.size))
1140n/a
1141n/a # Fetch the next header and process it.
1142n/a try:
1143n/a next = self.fromtarfile(tarfile)
1144n/a except HeaderError:
1145n/a raise SubsequentHeaderError("missing or bad subsequent header")
1146n/a
1147n/a # Patch the TarInfo object from the next header with
1148n/a # the longname information.
1149n/a next.offset = self.offset
1150n/a if self.type == GNUTYPE_LONGNAME:
1151n/a next.name = nts(buf, tarfile.encoding, tarfile.errors)
1152n/a elif self.type == GNUTYPE_LONGLINK:
1153n/a next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
1154n/a
1155n/a return next
1156n/a
1157n/a def _proc_sparse(self, tarfile):
1158n/a """Process a GNU sparse header plus extra headers.
1159n/a """
1160n/a # We already collected some sparse structures in frombuf().
1161n/a structs, isextended, origsize = self._sparse_structs
1162n/a del self._sparse_structs
1163n/a
1164n/a # Collect sparse structures from extended header blocks.
1165n/a while isextended:
1166n/a buf = tarfile.fileobj.read(BLOCKSIZE)
1167n/a pos = 0
1168n/a for i in range(21):
1169n/a try:
1170n/a offset = nti(buf[pos:pos + 12])
1171n/a numbytes = nti(buf[pos + 12:pos + 24])
1172n/a except ValueError:
1173n/a break
1174n/a if offset and numbytes:
1175n/a structs.append((offset, numbytes))
1176n/a pos += 24
1177n/a isextended = bool(buf[504])
1178n/a self.sparse = structs
1179n/a
1180n/a self.offset_data = tarfile.fileobj.tell()
1181n/a tarfile.offset = self.offset_data + self._block(self.size)
1182n/a self.size = origsize
1183n/a return self
1184n/a
1185n/a def _proc_pax(self, tarfile):
1186n/a """Process an extended or global header as described in
1187n/a POSIX.1-2008.
1188n/a """
1189n/a # Read the header information.
1190n/a buf = tarfile.fileobj.read(self._block(self.size))
1191n/a
1192n/a # A pax header stores supplemental information for either
1193n/a # the following file (extended) or all following files
1194n/a # (global).
1195n/a if self.type == XGLTYPE:
1196n/a pax_headers = tarfile.pax_headers
1197n/a else:
1198n/a pax_headers = tarfile.pax_headers.copy()
1199n/a
1200n/a # Check if the pax header contains a hdrcharset field. This tells us
1201n/a # the encoding of the path, linkpath, uname and gname fields. Normally,
1202n/a # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1203n/a # implementations are allowed to store them as raw binary strings if
1204n/a # the translation to UTF-8 fails.
1205n/a match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1206n/a if match is not None:
1207n/a pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
1208n/a
1209n/a # For the time being, we don't care about anything other than "BINARY".
1210n/a # The only other value that is currently allowed by the standard is
1211n/a # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1212n/a hdrcharset = pax_headers.get("hdrcharset")
1213n/a if hdrcharset == "BINARY":
1214n/a encoding = tarfile.encoding
1215n/a else:
1216n/a encoding = "utf-8"
1217n/a
1218n/a # Parse pax header information. A record looks like that:
1219n/a # "%d %s=%s\n" % (length, keyword, value). length is the size
1220n/a # of the complete record including the length field itself and
1221n/a # the newline. keyword and value are both UTF-8 encoded strings.
1222n/a regex = re.compile(br"(\d+) ([^=]+)=")
1223n/a pos = 0
1224n/a while True:
1225n/a match = regex.match(buf, pos)
1226n/a if not match:
1227n/a break
1228n/a
1229n/a length, keyword = match.groups()
1230n/a length = int(length)
1231n/a value = buf[match.end(2) + 1:match.start(1) + length - 1]
1232n/a
1233n/a # Normally, we could just use "utf-8" as the encoding and "strict"
1234n/a # as the error handler, but we better not take the risk. For
1235n/a # example, GNU tar <= 1.23 is known to store filenames it cannot
1236n/a # translate to UTF-8 as raw strings (unfortunately without a
1237n/a # hdrcharset=BINARY header).
1238n/a # We first try the strict standard encoding, and if that fails we
1239n/a # fall back on the user's encoding and error handler.
1240n/a keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
1241n/a tarfile.errors)
1242n/a if keyword in PAX_NAME_FIELDS:
1243n/a value = self._decode_pax_field(value, encoding, tarfile.encoding,
1244n/a tarfile.errors)
1245n/a else:
1246n/a value = self._decode_pax_field(value, "utf-8", "utf-8",
1247n/a tarfile.errors)
1248n/a
1249n/a pax_headers[keyword] = value
1250n/a pos += length
1251n/a
1252n/a # Fetch the next header.
1253n/a try:
1254n/a next = self.fromtarfile(tarfile)
1255n/a except HeaderError:
1256n/a raise SubsequentHeaderError("missing or bad subsequent header")
1257n/a
1258n/a # Process GNU sparse information.
1259n/a if "GNU.sparse.map" in pax_headers:
1260n/a # GNU extended sparse format version 0.1.
1261n/a self._proc_gnusparse_01(next, pax_headers)
1262n/a
1263n/a elif "GNU.sparse.size" in pax_headers:
1264n/a # GNU extended sparse format version 0.0.
1265n/a self._proc_gnusparse_00(next, pax_headers, buf)
1266n/a
1267n/a elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1268n/a # GNU extended sparse format version 1.0.
1269n/a self._proc_gnusparse_10(next, pax_headers, tarfile)
1270n/a
1271n/a if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1272n/a # Patch the TarInfo object with the extended header info.
1273n/a next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1274n/a next.offset = self.offset
1275n/a
1276n/a if "size" in pax_headers:
1277n/a # If the extended header replaces the size field,
1278n/a # we need to recalculate the offset where the next
1279n/a # header starts.
1280n/a offset = next.offset_data
1281n/a if next.isreg() or next.type not in SUPPORTED_TYPES:
1282n/a offset += next._block(next.size)
1283n/a tarfile.offset = offset
1284n/a
1285n/a return next
1286n/a
1287n/a def _proc_gnusparse_00(self, next, pax_headers, buf):
1288n/a """Process a GNU tar extended sparse header, version 0.0.
1289n/a """
1290n/a offsets = []
1291n/a for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1292n/a offsets.append(int(match.group(1)))
1293n/a numbytes = []
1294n/a for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1295n/a numbytes.append(int(match.group(1)))
1296n/a next.sparse = list(zip(offsets, numbytes))
1297n/a
1298n/a def _proc_gnusparse_01(self, next, pax_headers):
1299n/a """Process a GNU tar extended sparse header, version 0.1.
1300n/a """
1301n/a sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1302n/a next.sparse = list(zip(sparse[::2], sparse[1::2]))
1303n/a
1304n/a def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1305n/a """Process a GNU tar extended sparse header, version 1.0.
1306n/a """
1307n/a fields = None
1308n/a sparse = []
1309n/a buf = tarfile.fileobj.read(BLOCKSIZE)
1310n/a fields, buf = buf.split(b"\n", 1)
1311n/a fields = int(fields)
1312n/a while len(sparse) < fields * 2:
1313n/a if b"\n" not in buf:
1314n/a buf += tarfile.fileobj.read(BLOCKSIZE)
1315n/a number, buf = buf.split(b"\n", 1)
1316n/a sparse.append(int(number))
1317n/a next.offset_data = tarfile.fileobj.tell()
1318n/a next.sparse = list(zip(sparse[::2], sparse[1::2]))
1319n/a
1320n/a def _apply_pax_info(self, pax_headers, encoding, errors):
1321n/a """Replace fields with supplemental information from a previous
1322n/a pax extended or global header.
1323n/a """
1324n/a for keyword, value in pax_headers.items():
1325n/a if keyword == "GNU.sparse.name":
1326n/a setattr(self, "path", value)
1327n/a elif keyword == "GNU.sparse.size":
1328n/a setattr(self, "size", int(value))
1329n/a elif keyword == "GNU.sparse.realsize":
1330n/a setattr(self, "size", int(value))
1331n/a elif keyword in PAX_FIELDS:
1332n/a if keyword in PAX_NUMBER_FIELDS:
1333n/a try:
1334n/a value = PAX_NUMBER_FIELDS[keyword](value)
1335n/a except ValueError:
1336n/a value = 0
1337n/a if keyword == "path":
1338n/a value = value.rstrip("/")
1339n/a setattr(self, keyword, value)
1340n/a
1341n/a self.pax_headers = pax_headers.copy()
1342n/a
1343n/a def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1344n/a """Decode a single field from a pax record.
1345n/a """
1346n/a try:
1347n/a return value.decode(encoding, "strict")
1348n/a except UnicodeDecodeError:
1349n/a return value.decode(fallback_encoding, fallback_errors)
1350n/a
1351n/a def _block(self, count):
1352n/a """Round up a byte count by BLOCKSIZE and return it,
1353n/a e.g. _block(834) => 1024.
1354n/a """
1355n/a blocks, remainder = divmod(count, BLOCKSIZE)
1356n/a if remainder:
1357n/a blocks += 1
1358n/a return blocks * BLOCKSIZE
1359n/a
1360n/a def isreg(self):
1361n/a return self.type in REGULAR_TYPES
1362n/a def isfile(self):
1363n/a return self.isreg()
1364n/a def isdir(self):
1365n/a return self.type == DIRTYPE
1366n/a def issym(self):
1367n/a return self.type == SYMTYPE
1368n/a def islnk(self):
1369n/a return self.type == LNKTYPE
1370n/a def ischr(self):
1371n/a return self.type == CHRTYPE
1372n/a def isblk(self):
1373n/a return self.type == BLKTYPE
1374n/a def isfifo(self):
1375n/a return self.type == FIFOTYPE
1376n/a def issparse(self):
1377n/a return self.sparse is not None
1378n/a def isdev(self):
1379n/a return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1380n/a# class TarInfo
1381n/a
1382n/aclass TarFile(object):
1383n/a """The TarFile Class provides an interface to tar archives.
1384n/a """
1385n/a
1386n/a debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1387n/a
1388n/a dereference = False # If true, add content of linked file to the
1389n/a # tar file, else the link.
1390n/a
1391n/a ignore_zeros = False # If true, skips empty or invalid blocks and
1392n/a # continues processing.
1393n/a
1394n/a errorlevel = 1 # If 0, fatal errors only appear in debug
1395n/a # messages (if debug >= 0). If > 0, errors
1396n/a # are passed to the caller as exceptions.
1397n/a
1398n/a format = DEFAULT_FORMAT # The format to use when creating an archive.
1399n/a
1400n/a encoding = ENCODING # Encoding for 8-bit character strings.
1401n/a
1402n/a errors = None # Error handler for unicode conversion.
1403n/a
1404n/a tarinfo = TarInfo # The default TarInfo class to use.
1405n/a
1406n/a fileobject = ExFileObject # The file-object for extractfile().
1407n/a
1408n/a def __init__(self, name=None, mode="r", fileobj=None, format=None,
1409n/a tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1410n/a errors="surrogateescape", pax_headers=None, debug=None,
1411n/a errorlevel=None, copybufsize=None):
1412n/a """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1413n/a read from an existing archive, 'a' to append data to an existing
1414n/a file or 'w' to create a new file overwriting an existing one. `mode'
1415n/a defaults to 'r'.
1416n/a If `fileobj' is given, it is used for reading or writing data. If it
1417n/a can be determined, `mode' is overridden by `fileobj's mode.
1418n/a `fileobj' is not closed, when TarFile is closed.
1419n/a """
1420n/a modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
1421n/a if mode not in modes:
1422n/a raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1423n/a self.mode = mode
1424n/a self._mode = modes[mode]
1425n/a
1426n/a if not fileobj:
1427n/a if self.mode == "a" and not os.path.exists(name):
1428n/a # Create nonexistent files in append mode.
1429n/a self.mode = "w"
1430n/a self._mode = "wb"
1431n/a fileobj = bltn_open(name, self._mode)
1432n/a self._extfileobj = False
1433n/a else:
1434n/a if (name is None and hasattr(fileobj, "name") and
1435n/a isinstance(fileobj.name, (str, bytes))):
1436n/a name = fileobj.name
1437n/a if hasattr(fileobj, "mode"):
1438n/a self._mode = fileobj.mode
1439n/a self._extfileobj = True
1440n/a self.name = os.path.abspath(name) if name else None
1441n/a self.fileobj = fileobj
1442n/a
1443n/a # Init attributes.
1444n/a if format is not None:
1445n/a self.format = format
1446n/a if tarinfo is not None:
1447n/a self.tarinfo = tarinfo
1448n/a if dereference is not None:
1449n/a self.dereference = dereference
1450n/a if ignore_zeros is not None:
1451n/a self.ignore_zeros = ignore_zeros
1452n/a if encoding is not None:
1453n/a self.encoding = encoding
1454n/a self.errors = errors
1455n/a
1456n/a if pax_headers is not None and self.format == PAX_FORMAT:
1457n/a self.pax_headers = pax_headers
1458n/a else:
1459n/a self.pax_headers = {}
1460n/a
1461n/a if debug is not None:
1462n/a self.debug = debug
1463n/a if errorlevel is not None:
1464n/a self.errorlevel = errorlevel
1465n/a
1466n/a # Init datastructures.
1467n/a self.copybufsize = copybufsize
1468n/a self.closed = False
1469n/a self.members = [] # list of members as TarInfo objects
1470n/a self._loaded = False # flag if all members have been read
1471n/a self.offset = self.fileobj.tell()
1472n/a # current position in the archive file
1473n/a self.inodes = {} # dictionary caching the inodes of
1474n/a # archive members already added
1475n/a
1476n/a try:
1477n/a if self.mode == "r":
1478n/a self.firstmember = None
1479n/a self.firstmember = self.next()
1480n/a
1481n/a if self.mode == "a":
1482n/a # Move to the end of the archive,
1483n/a # before the first empty block.
1484n/a while True:
1485n/a self.fileobj.seek(self.offset)
1486n/a try:
1487n/a tarinfo = self.tarinfo.fromtarfile(self)
1488n/a self.members.append(tarinfo)
1489n/a except EOFHeaderError:
1490n/a self.fileobj.seek(self.offset)
1491n/a break
1492n/a except HeaderError as e:
1493n/a raise ReadError(str(e))
1494n/a
1495n/a if self.mode in ("a", "w", "x"):
1496n/a self._loaded = True
1497n/a
1498n/a if self.pax_headers:
1499n/a buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1500n/a self.fileobj.write(buf)
1501n/a self.offset += len(buf)
1502n/a except:
1503n/a if not self._extfileobj:
1504n/a self.fileobj.close()
1505n/a self.closed = True
1506n/a raise
1507n/a
1508n/a #--------------------------------------------------------------------------
1509n/a # Below are the classmethods which act as alternate constructors to the
1510n/a # TarFile class. The open() method is the only one that is needed for
1511n/a # public use; it is the "super"-constructor and is able to select an
1512n/a # adequate "sub"-constructor for a particular compression using the mapping
1513n/a # from OPEN_METH.
1514n/a #
1515n/a # This concept allows one to subclass TarFile without losing the comfort of
1516n/a # the super-constructor. A sub-constructor is registered and made available
1517n/a # by adding it to the mapping in OPEN_METH.
1518n/a
1519n/a @classmethod
1520n/a def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1521n/a """Open a tar archive for reading, writing or appending. Return
1522n/a an appropriate TarFile class.
1523n/a
1524n/a mode:
1525n/a 'r' or 'r:*' open for reading with transparent compression
1526n/a 'r:' open for reading exclusively uncompressed
1527n/a 'r:gz' open for reading with gzip compression
1528n/a 'r:bz2' open for reading with bzip2 compression
1529n/a 'r:xz' open for reading with lzma compression
1530n/a 'a' or 'a:' open for appending, creating the file if necessary
1531n/a 'w' or 'w:' open for writing without compression
1532n/a 'w:gz' open for writing with gzip compression
1533n/a 'w:bz2' open for writing with bzip2 compression
1534n/a 'w:xz' open for writing with lzma compression
1535n/a
1536n/a 'x' or 'x:' create a tarfile exclusively without compression, raise
1537n/a an exception if the file is already created
1538n/a 'x:gz' create a gzip compressed tarfile, raise an exception
1539n/a if the file is already created
1540n/a 'x:bz2' create a bzip2 compressed tarfile, raise an exception
1541n/a if the file is already created
1542n/a 'x:xz' create an lzma compressed tarfile, raise an exception
1543n/a if the file is already created
1544n/a
1545n/a 'r|*' open a stream of tar blocks with transparent compression
1546n/a 'r|' open an uncompressed stream of tar blocks for reading
1547n/a 'r|gz' open a gzip compressed stream of tar blocks
1548n/a 'r|bz2' open a bzip2 compressed stream of tar blocks
1549n/a 'r|xz' open an lzma compressed stream of tar blocks
1550n/a 'w|' open an uncompressed stream for writing
1551n/a 'w|gz' open a gzip compressed stream for writing
1552n/a 'w|bz2' open a bzip2 compressed stream for writing
1553n/a 'w|xz' open an lzma compressed stream for writing
1554n/a """
1555n/a
1556n/a if not name and not fileobj:
1557n/a raise ValueError("nothing to open")
1558n/a
1559n/a if mode in ("r", "r:*"):
1560n/a # Find out which *open() is appropriate for opening the file.
1561n/a def not_compressed(comptype):
1562n/a return cls.OPEN_METH[comptype] == 'taropen'
1563n/a for comptype in sorted(cls.OPEN_METH, key=not_compressed):
1564n/a func = getattr(cls, cls.OPEN_METH[comptype])
1565n/a if fileobj is not None:
1566n/a saved_pos = fileobj.tell()
1567n/a try:
1568n/a return func(name, "r", fileobj, **kwargs)
1569n/a except (ReadError, CompressionError):
1570n/a if fileobj is not None:
1571n/a fileobj.seek(saved_pos)
1572n/a continue
1573n/a raise ReadError("file could not be opened successfully")
1574n/a
1575n/a elif ":" in mode:
1576n/a filemode, comptype = mode.split(":", 1)
1577n/a filemode = filemode or "r"
1578n/a comptype = comptype or "tar"
1579n/a
1580n/a # Select the *open() function according to
1581n/a # given compression.
1582n/a if comptype in cls.OPEN_METH:
1583n/a func = getattr(cls, cls.OPEN_METH[comptype])
1584n/a else:
1585n/a raise CompressionError("unknown compression type %r" % comptype)
1586n/a return func(name, filemode, fileobj, **kwargs)
1587n/a
1588n/a elif "|" in mode:
1589n/a filemode, comptype = mode.split("|", 1)
1590n/a filemode = filemode or "r"
1591n/a comptype = comptype or "tar"
1592n/a
1593n/a if filemode not in ("r", "w"):
1594n/a raise ValueError("mode must be 'r' or 'w'")
1595n/a
1596n/a stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1597n/a try:
1598n/a t = cls(name, filemode, stream, **kwargs)
1599n/a except:
1600n/a stream.close()
1601n/a raise
1602n/a t._extfileobj = False
1603n/a return t
1604n/a
1605n/a elif mode in ("a", "w", "x"):
1606n/a return cls.taropen(name, mode, fileobj, **kwargs)
1607n/a
1608n/a raise ValueError("undiscernible mode")
1609n/a
1610n/a @classmethod
1611n/a def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1612n/a """Open uncompressed tar archive name for reading or writing.
1613n/a """
1614n/a if mode not in ("r", "a", "w", "x"):
1615n/a raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1616n/a return cls(name, mode, fileobj, **kwargs)
1617n/a
1618n/a @classmethod
1619n/a def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1620n/a """Open gzip compressed tar archive name for reading or writing.
1621n/a Appending is not allowed.
1622n/a """
1623n/a if mode not in ("r", "w", "x"):
1624n/a raise ValueError("mode must be 'r', 'w' or 'x'")
1625n/a
1626n/a try:
1627n/a import gzip
1628n/a gzip.GzipFile
1629n/a except (ImportError, AttributeError):
1630n/a raise CompressionError("gzip module is not available")
1631n/a
1632n/a try:
1633n/a fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
1634n/a except OSError:
1635n/a if fileobj is not None and mode == 'r':
1636n/a raise ReadError("not a gzip file")
1637n/a raise
1638n/a
1639n/a try:
1640n/a t = cls.taropen(name, mode, fileobj, **kwargs)
1641n/a except OSError:
1642n/a fileobj.close()
1643n/a if mode == 'r':
1644n/a raise ReadError("not a gzip file")
1645n/a raise
1646n/a except:
1647n/a fileobj.close()
1648n/a raise
1649n/a t._extfileobj = False
1650n/a return t
1651n/a
1652n/a @classmethod
1653n/a def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1654n/a """Open bzip2 compressed tar archive name for reading or writing.
1655n/a Appending is not allowed.
1656n/a """
1657n/a if mode not in ("r", "w", "x"):
1658n/a raise ValueError("mode must be 'r', 'w' or 'x'")
1659n/a
1660n/a try:
1661n/a import bz2
1662n/a except ImportError:
1663n/a raise CompressionError("bz2 module is not available")
1664n/a
1665n/a fileobj = bz2.BZ2File(fileobj or name, mode,
1666n/a compresslevel=compresslevel)
1667n/a
1668n/a try:
1669n/a t = cls.taropen(name, mode, fileobj, **kwargs)
1670n/a except (OSError, EOFError):
1671n/a fileobj.close()
1672n/a if mode == 'r':
1673n/a raise ReadError("not a bzip2 file")
1674n/a raise
1675n/a except:
1676n/a fileobj.close()
1677n/a raise
1678n/a t._extfileobj = False
1679n/a return t
1680n/a
1681n/a @classmethod
1682n/a def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
1683n/a """Open lzma compressed tar archive name for reading or writing.
1684n/a Appending is not allowed.
1685n/a """
1686n/a if mode not in ("r", "w", "x"):
1687n/a raise ValueError("mode must be 'r', 'w' or 'x'")
1688n/a
1689n/a try:
1690n/a import lzma
1691n/a except ImportError:
1692n/a raise CompressionError("lzma module is not available")
1693n/a
1694n/a fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
1695n/a
1696n/a try:
1697n/a t = cls.taropen(name, mode, fileobj, **kwargs)
1698n/a except (lzma.LZMAError, EOFError):
1699n/a fileobj.close()
1700n/a if mode == 'r':
1701n/a raise ReadError("not an lzma file")
1702n/a raise
1703n/a except:
1704n/a fileobj.close()
1705n/a raise
1706n/a t._extfileobj = False
1707n/a return t
1708n/a
1709n/a # All *open() methods are registered here.
1710n/a OPEN_METH = {
1711n/a "tar": "taropen", # uncompressed tar
1712n/a "gz": "gzopen", # gzip compressed tar
1713n/a "bz2": "bz2open", # bzip2 compressed tar
1714n/a "xz": "xzopen" # lzma compressed tar
1715n/a }
1716n/a
1717n/a #--------------------------------------------------------------------------
1718n/a # The public methods which TarFile provides:
1719n/a
1720n/a def close(self):
1721n/a """Close the TarFile. In write-mode, two finishing zero blocks are
1722n/a appended to the archive.
1723n/a """
1724n/a if self.closed:
1725n/a return
1726n/a
1727n/a self.closed = True
1728n/a try:
1729n/a if self.mode in ("a", "w", "x"):
1730n/a self.fileobj.write(NUL * (BLOCKSIZE * 2))
1731n/a self.offset += (BLOCKSIZE * 2)
1732n/a # fill up the end with zero-blocks
1733n/a # (like option -b20 for tar does)
1734n/a blocks, remainder = divmod(self.offset, RECORDSIZE)
1735n/a if remainder > 0:
1736n/a self.fileobj.write(NUL * (RECORDSIZE - remainder))
1737n/a finally:
1738n/a if not self._extfileobj:
1739n/a self.fileobj.close()
1740n/a
1741n/a def getmember(self, name):
1742n/a """Return a TarInfo object for member `name'. If `name' can not be
1743n/a found in the archive, KeyError is raised. If a member occurs more
1744n/a than once in the archive, its last occurrence is assumed to be the
1745n/a most up-to-date version.
1746n/a """
1747n/a tarinfo = self._getmember(name)
1748n/a if tarinfo is None:
1749n/a raise KeyError("filename %r not found" % name)
1750n/a return tarinfo
1751n/a
1752n/a def getmembers(self):
1753n/a """Return the members of the archive as a list of TarInfo objects. The
1754n/a list has the same order as the members in the archive.
1755n/a """
1756n/a self._check()
1757n/a if not self._loaded: # if we want to obtain a list of
1758n/a self._load() # all members, we first have to
1759n/a # scan the whole archive.
1760n/a return self.members
1761n/a
1762n/a def getnames(self):
1763n/a """Return the members of the archive as a list of their names. It has
1764n/a the same order as the list returned by getmembers().
1765n/a """
1766n/a return [tarinfo.name for tarinfo in self.getmembers()]
1767n/a
1768n/a def gettarinfo(self, name=None, arcname=None, fileobj=None):
1769n/a """Create a TarInfo object from the result of os.stat or equivalent
1770n/a on an existing file. The file is either named by `name', or
1771n/a specified as a file object `fileobj' with a file descriptor. If
1772n/a given, `arcname' specifies an alternative name for the file in the
1773n/a archive, otherwise, the name is taken from the 'name' attribute of
1774n/a 'fileobj', or the 'name' argument. The name should be a text
1775n/a string.
1776n/a """
1777n/a self._check("awx")
1778n/a
1779n/a # When fileobj is given, replace name by
1780n/a # fileobj's real name.
1781n/a if fileobj is not None:
1782n/a name = fileobj.name
1783n/a
1784n/a # Building the name of the member in the archive.
1785n/a # Backward slashes are converted to forward slashes,
1786n/a # Absolute paths are turned to relative paths.
1787n/a if arcname is None:
1788n/a arcname = name
1789n/a drv, arcname = os.path.splitdrive(arcname)
1790n/a arcname = arcname.replace(os.sep, "/")
1791n/a arcname = arcname.lstrip("/")
1792n/a
1793n/a # Now, fill the TarInfo object with
1794n/a # information specific for the file.
1795n/a tarinfo = self.tarinfo()
1796n/a tarinfo.tarfile = self # Not needed
1797n/a
1798n/a # Use os.stat or os.lstat, depending on platform
1799n/a # and if symlinks shall be resolved.
1800n/a if fileobj is None:
1801n/a if hasattr(os, "lstat") and not self.dereference:
1802n/a statres = os.lstat(name)
1803n/a else:
1804n/a statres = os.stat(name)
1805n/a else:
1806n/a statres = os.fstat(fileobj.fileno())
1807n/a linkname = ""
1808n/a
1809n/a stmd = statres.st_mode
1810n/a if stat.S_ISREG(stmd):
1811n/a inode = (statres.st_ino, statres.st_dev)
1812n/a if not self.dereference and statres.st_nlink > 1 and \
1813n/a inode in self.inodes and arcname != self.inodes[inode]:
1814n/a # Is it a hardlink to an already
1815n/a # archived file?
1816n/a type = LNKTYPE
1817n/a linkname = self.inodes[inode]
1818n/a else:
1819n/a # The inode is added only if its valid.
1820n/a # For win32 it is always 0.
1821n/a type = REGTYPE
1822n/a if inode[0]:
1823n/a self.inodes[inode] = arcname
1824n/a elif stat.S_ISDIR(stmd):
1825n/a type = DIRTYPE
1826n/a elif stat.S_ISFIFO(stmd):
1827n/a type = FIFOTYPE
1828n/a elif stat.S_ISLNK(stmd):
1829n/a type = SYMTYPE
1830n/a linkname = os.readlink(name)
1831n/a elif stat.S_ISCHR(stmd):
1832n/a type = CHRTYPE
1833n/a elif stat.S_ISBLK(stmd):
1834n/a type = BLKTYPE
1835n/a else:
1836n/a return None
1837n/a
1838n/a # Fill the TarInfo object with all
1839n/a # information we can get.
1840n/a tarinfo.name = arcname
1841n/a tarinfo.mode = stmd
1842n/a tarinfo.uid = statres.st_uid
1843n/a tarinfo.gid = statres.st_gid
1844n/a if type == REGTYPE:
1845n/a tarinfo.size = statres.st_size
1846n/a else:
1847n/a tarinfo.size = 0
1848n/a tarinfo.mtime = statres.st_mtime
1849n/a tarinfo.type = type
1850n/a tarinfo.linkname = linkname
1851n/a if pwd:
1852n/a try:
1853n/a tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1854n/a except KeyError:
1855n/a pass
1856n/a if grp:
1857n/a try:
1858n/a tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1859n/a except KeyError:
1860n/a pass
1861n/a
1862n/a if type in (CHRTYPE, BLKTYPE):
1863n/a if hasattr(os, "major") and hasattr(os, "minor"):
1864n/a tarinfo.devmajor = os.major(statres.st_rdev)
1865n/a tarinfo.devminor = os.minor(statres.st_rdev)
1866n/a return tarinfo
1867n/a
1868n/a def list(self, verbose=True, *, members=None):
1869n/a """Print a table of contents to sys.stdout. If `verbose' is False, only
1870n/a the names of the members are printed. If it is True, an `ls -l'-like
1871n/a output is produced. `members' is optional and must be a subset of the
1872n/a list returned by getmembers().
1873n/a """
1874n/a self._check()
1875n/a
1876n/a if members is None:
1877n/a members = self
1878n/a for tarinfo in members:
1879n/a if verbose:
1880n/a _safe_print(stat.filemode(tarinfo.mode))
1881n/a _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1882n/a tarinfo.gname or tarinfo.gid))
1883n/a if tarinfo.ischr() or tarinfo.isblk():
1884n/a _safe_print("%10s" %
1885n/a ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
1886n/a else:
1887n/a _safe_print("%10d" % tarinfo.size)
1888n/a _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1889n/a % time.localtime(tarinfo.mtime)[:6])
1890n/a
1891n/a _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
1892n/a
1893n/a if verbose:
1894n/a if tarinfo.issym():
1895n/a _safe_print("-> " + tarinfo.linkname)
1896n/a if tarinfo.islnk():
1897n/a _safe_print("link to " + tarinfo.linkname)
1898n/a print()
1899n/a
1900n/a def add(self, name, arcname=None, recursive=True, *, filter=None):
1901n/a """Add the file `name' to the archive. `name' may be any type of file
1902n/a (directory, fifo, symbolic link, etc.). If given, `arcname'
1903n/a specifies an alternative name for the file in the archive.
1904n/a Directories are added recursively by default. This can be avoided by
1905n/a setting `recursive' to False. `filter' is a function
1906n/a that expects a TarInfo object argument and returns the changed
1907n/a TarInfo object, if it returns None the TarInfo object will be
1908n/a excluded from the archive.
1909n/a """
1910n/a self._check("awx")
1911n/a
1912n/a if arcname is None:
1913n/a arcname = name
1914n/a
1915n/a # Skip if somebody tries to archive the archive...
1916n/a if self.name is not None and os.path.abspath(name) == self.name:
1917n/a self._dbg(2, "tarfile: Skipped %r" % name)
1918n/a return
1919n/a
1920n/a self._dbg(1, name)
1921n/a
1922n/a # Create a TarInfo object from the file.
1923n/a tarinfo = self.gettarinfo(name, arcname)
1924n/a
1925n/a if tarinfo is None:
1926n/a self._dbg(1, "tarfile: Unsupported type %r" % name)
1927n/a return
1928n/a
1929n/a # Change or exclude the TarInfo object.
1930n/a if filter is not None:
1931n/a tarinfo = filter(tarinfo)
1932n/a if tarinfo is None:
1933n/a self._dbg(2, "tarfile: Excluded %r" % name)
1934n/a return
1935n/a
1936n/a # Append the tar header and data to the archive.
1937n/a if tarinfo.isreg():
1938n/a with bltn_open(name, "rb") as f:
1939n/a self.addfile(tarinfo, f)
1940n/a
1941n/a elif tarinfo.isdir():
1942n/a self.addfile(tarinfo)
1943n/a if recursive:
1944n/a for f in os.listdir(name):
1945n/a self.add(os.path.join(name, f), os.path.join(arcname, f),
1946n/a recursive, filter=filter)
1947n/a
1948n/a else:
1949n/a self.addfile(tarinfo)
1950n/a
1951n/a def addfile(self, tarinfo, fileobj=None):
1952n/a """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1953n/a given, it should be a binary file, and tarinfo.size bytes are read
1954n/a from it and added to the archive. You can create TarInfo objects
1955n/a directly, or by using gettarinfo().
1956n/a """
1957n/a self._check("awx")
1958n/a
1959n/a tarinfo = copy.copy(tarinfo)
1960n/a
1961n/a buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
1962n/a self.fileobj.write(buf)
1963n/a self.offset += len(buf)
1964n/a bufsize=self.copybufsize
1965n/a # If there's data to follow, append it.
1966n/a if fileobj is not None:
1967n/a copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
1968n/a blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1969n/a if remainder > 0:
1970n/a self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1971n/a blocks += 1
1972n/a self.offset += blocks * BLOCKSIZE
1973n/a
1974n/a self.members.append(tarinfo)
1975n/a
1976n/a def extractall(self, path=".", members=None, *, numeric_owner=False):
1977n/a """Extract all members from the archive to the current working
1978n/a directory and set owner, modification time and permissions on
1979n/a directories afterwards. `path' specifies a different directory
1980n/a to extract to. `members' is optional and must be a subset of the
1981n/a list returned by getmembers(). If `numeric_owner` is True, only
1982n/a the numbers for user/group names are used and not the names.
1983n/a """
1984n/a directories = []
1985n/a
1986n/a if members is None:
1987n/a members = self
1988n/a
1989n/a for tarinfo in members:
1990n/a if tarinfo.isdir():
1991n/a # Extract directories with a safe mode.
1992n/a directories.append(tarinfo)
1993n/a tarinfo = copy.copy(tarinfo)
1994n/a tarinfo.mode = 0o700
1995n/a # Do not set_attrs directories, as we will do that further down
1996n/a self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
1997n/a numeric_owner=numeric_owner)
1998n/a
1999n/a # Reverse sort directories.
2000n/a directories.sort(key=lambda a: a.name)
2001n/a directories.reverse()
2002n/a
2003n/a # Set correct owner, mtime and filemode on directories.
2004n/a for tarinfo in directories:
2005n/a dirpath = os.path.join(path, tarinfo.name)
2006n/a try:
2007n/a self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
2008n/a self.utime(tarinfo, dirpath)
2009n/a self.chmod(tarinfo, dirpath)
2010n/a except ExtractError as e:
2011n/a if self.errorlevel > 1:
2012n/a raise
2013n/a else:
2014n/a self._dbg(1, "tarfile: %s" % e)
2015n/a
2016n/a def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
2017n/a """Extract a member from the archive to the current working directory,
2018n/a using its full name. Its file information is extracted as accurately
2019n/a as possible. `member' may be a filename or a TarInfo object. You can
2020n/a specify a different directory using `path'. File attributes (owner,
2021n/a mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2022n/a is True, only the numbers for user/group names are used and not
2023n/a the names.
2024n/a """
2025n/a self._check("r")
2026n/a
2027n/a if isinstance(member, str):
2028n/a tarinfo = self.getmember(member)
2029n/a else:
2030n/a tarinfo = member
2031n/a
2032n/a # Prepare the link target for makelink().
2033n/a if tarinfo.islnk():
2034n/a tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2035n/a
2036n/a try:
2037n/a self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2038n/a set_attrs=set_attrs,
2039n/a numeric_owner=numeric_owner)
2040n/a except OSError as e:
2041n/a if self.errorlevel > 0:
2042n/a raise
2043n/a else:
2044n/a if e.filename is None:
2045n/a self._dbg(1, "tarfile: %s" % e.strerror)
2046n/a else:
2047n/a self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2048n/a except ExtractError as e:
2049n/a if self.errorlevel > 1:
2050n/a raise
2051n/a else:
2052n/a self._dbg(1, "tarfile: %s" % e)
2053n/a
2054n/a def extractfile(self, member):
2055n/a """Extract a member from the archive as a file object. `member' may be
2056n/a a filename or a TarInfo object. If `member' is a regular file or a
2057n/a link, an io.BufferedReader object is returned. Otherwise, None is
2058n/a returned.
2059n/a """
2060n/a self._check("r")
2061n/a
2062n/a if isinstance(member, str):
2063n/a tarinfo = self.getmember(member)
2064n/a else:
2065n/a tarinfo = member
2066n/a
2067n/a if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2068n/a # Members with unknown types are treated as regular files.
2069n/a return self.fileobject(self, tarinfo)
2070n/a
2071n/a elif tarinfo.islnk() or tarinfo.issym():
2072n/a if isinstance(self.fileobj, _Stream):
2073n/a # A small but ugly workaround for the case that someone tries
2074n/a # to extract a (sym)link as a file-object from a non-seekable
2075n/a # stream of tar blocks.
2076n/a raise StreamError("cannot extract (sym)link as file object")
2077n/a else:
2078n/a # A (sym)link's file object is its target's file object.
2079n/a return self.extractfile(self._find_link_target(tarinfo))
2080n/a else:
2081n/a # If there's no data associated with the member (directory, chrdev,
2082n/a # blkdev, etc.), return None instead of a file object.
2083n/a return None
2084n/a
2085n/a def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2086n/a numeric_owner=False):
2087n/a """Extract the TarInfo object tarinfo to a physical
2088n/a file called targetpath.
2089n/a """
2090n/a # Fetch the TarInfo object for the given name
2091n/a # and build the destination pathname, replacing
2092n/a # forward slashes to platform specific separators.
2093n/a targetpath = targetpath.rstrip("/")
2094n/a targetpath = targetpath.replace("/", os.sep)
2095n/a
2096n/a # Create all upper directories.
2097n/a upperdirs = os.path.dirname(targetpath)
2098n/a if upperdirs and not os.path.exists(upperdirs):
2099n/a # Create directories that are not part of the archive with
2100n/a # default permissions.
2101n/a os.makedirs(upperdirs)
2102n/a
2103n/a if tarinfo.islnk() or tarinfo.issym():
2104n/a self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2105n/a else:
2106n/a self._dbg(1, tarinfo.name)
2107n/a
2108n/a if tarinfo.isreg():
2109n/a self.makefile(tarinfo, targetpath)
2110n/a elif tarinfo.isdir():
2111n/a self.makedir(tarinfo, targetpath)
2112n/a elif tarinfo.isfifo():
2113n/a self.makefifo(tarinfo, targetpath)
2114n/a elif tarinfo.ischr() or tarinfo.isblk():
2115n/a self.makedev(tarinfo, targetpath)
2116n/a elif tarinfo.islnk() or tarinfo.issym():
2117n/a self.makelink(tarinfo, targetpath)
2118n/a elif tarinfo.type not in SUPPORTED_TYPES:
2119n/a self.makeunknown(tarinfo, targetpath)
2120n/a else:
2121n/a self.makefile(tarinfo, targetpath)
2122n/a
2123n/a if set_attrs:
2124n/a self.chown(tarinfo, targetpath, numeric_owner)
2125n/a if not tarinfo.issym():
2126n/a self.chmod(tarinfo, targetpath)
2127n/a self.utime(tarinfo, targetpath)
2128n/a
2129n/a #--------------------------------------------------------------------------
2130n/a # Below are the different file methods. They are called via
2131n/a # _extract_member() when extract() is called. They can be replaced in a
2132n/a # subclass to implement other functionality.
2133n/a
2134n/a def makedir(self, tarinfo, targetpath):
2135n/a """Make a directory called targetpath.
2136n/a """
2137n/a try:
2138n/a # Use a safe mode for the directory, the real mode is set
2139n/a # later in _extract_member().
2140n/a os.mkdir(targetpath, 0o700)
2141n/a except FileExistsError:
2142n/a pass
2143n/a
2144n/a def makefile(self, tarinfo, targetpath):
2145n/a """Make a file called targetpath.
2146n/a """
2147n/a source = self.fileobj
2148n/a source.seek(tarinfo.offset_data)
2149n/a bufsize = self.copybufsize
2150n/a with bltn_open(targetpath, "wb") as target:
2151n/a if tarinfo.sparse is not None:
2152n/a for offset, size in tarinfo.sparse:
2153n/a target.seek(offset)
2154n/a copyfileobj(source, target, size, ReadError, bufsize)
2155n/a target.seek(tarinfo.size)
2156n/a target.truncate()
2157n/a else:
2158n/a copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
2159n/a
2160n/a def makeunknown(self, tarinfo, targetpath):
2161n/a """Make a file from a TarInfo object with an unknown type
2162n/a at targetpath.
2163n/a """
2164n/a self.makefile(tarinfo, targetpath)
2165n/a self._dbg(1, "tarfile: Unknown file type %r, " \
2166n/a "extracted as regular file." % tarinfo.type)
2167n/a
2168n/a def makefifo(self, tarinfo, targetpath):
2169n/a """Make a fifo called targetpath.
2170n/a """
2171n/a if hasattr(os, "mkfifo"):
2172n/a os.mkfifo(targetpath)
2173n/a else:
2174n/a raise ExtractError("fifo not supported by system")
2175n/a
2176n/a def makedev(self, tarinfo, targetpath):
2177n/a """Make a character or block device called targetpath.
2178n/a """
2179n/a if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2180n/a raise ExtractError("special devices not supported by system")
2181n/a
2182n/a mode = tarinfo.mode
2183n/a if tarinfo.isblk():
2184n/a mode |= stat.S_IFBLK
2185n/a else:
2186n/a mode |= stat.S_IFCHR
2187n/a
2188n/a os.mknod(targetpath, mode,
2189n/a os.makedev(tarinfo.devmajor, tarinfo.devminor))
2190n/a
2191n/a def makelink(self, tarinfo, targetpath):
2192n/a """Make a (symbolic) link called targetpath. If it cannot be created
2193n/a (platform limitation), we try to make a copy of the referenced file
2194n/a instead of a link.
2195n/a """
2196n/a try:
2197n/a # For systems that support symbolic and hard links.
2198n/a if tarinfo.issym():
2199n/a os.symlink(tarinfo.linkname, targetpath)
2200n/a else:
2201n/a # See extract().
2202n/a if os.path.exists(tarinfo._link_target):
2203n/a os.link(tarinfo._link_target, targetpath)
2204n/a else:
2205n/a self._extract_member(self._find_link_target(tarinfo),
2206n/a targetpath)
2207n/a except symlink_exception:
2208n/a try:
2209n/a self._extract_member(self._find_link_target(tarinfo),
2210n/a targetpath)
2211n/a except KeyError:
2212n/a raise ExtractError("unable to resolve link inside archive")
2213n/a
2214n/a def chown(self, tarinfo, targetpath, numeric_owner):
2215n/a """Set owner of targetpath according to tarinfo. If numeric_owner
2216n/a is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2217n/a is False, fall back to .gid/.uid when the search based on name
2218n/a fails.
2219n/a """
2220n/a if hasattr(os, "geteuid") and os.geteuid() == 0:
2221n/a # We have to be root to do so.
2222n/a g = tarinfo.gid
2223n/a u = tarinfo.uid
2224n/a if not numeric_owner:
2225n/a try:
2226n/a if grp:
2227n/a g = grp.getgrnam(tarinfo.gname)[2]
2228n/a except KeyError:
2229n/a pass
2230n/a try:
2231n/a if pwd:
2232n/a u = pwd.getpwnam(tarinfo.uname)[2]
2233n/a except KeyError:
2234n/a pass
2235n/a try:
2236n/a if tarinfo.issym() and hasattr(os, "lchown"):
2237n/a os.lchown(targetpath, u, g)
2238n/a else:
2239n/a os.chown(targetpath, u, g)
2240n/a except OSError:
2241n/a raise ExtractError("could not change owner")
2242n/a
2243n/a def chmod(self, tarinfo, targetpath):
2244n/a """Set file permissions of targetpath according to tarinfo.
2245n/a """
2246n/a if hasattr(os, 'chmod'):
2247n/a try:
2248n/a os.chmod(targetpath, tarinfo.mode)
2249n/a except OSError:
2250n/a raise ExtractError("could not change mode")
2251n/a
2252n/a def utime(self, tarinfo, targetpath):
2253n/a """Set modification time of targetpath according to tarinfo.
2254n/a """
2255n/a if not hasattr(os, 'utime'):
2256n/a return
2257n/a try:
2258n/a os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2259n/a except OSError:
2260n/a raise ExtractError("could not change modification time")
2261n/a
2262n/a #--------------------------------------------------------------------------
2263n/a def next(self):
2264n/a """Return the next member of the archive as a TarInfo object, when
2265n/a TarFile is opened for reading. Return None if there is no more
2266n/a available.
2267n/a """
2268n/a self._check("ra")
2269n/a if self.firstmember is not None:
2270n/a m = self.firstmember
2271n/a self.firstmember = None
2272n/a return m
2273n/a
2274n/a # Advance the file pointer.
2275n/a if self.offset != self.fileobj.tell():
2276n/a self.fileobj.seek(self.offset - 1)
2277n/a if not self.fileobj.read(1):
2278n/a raise ReadError("unexpected end of data")
2279n/a
2280n/a # Read the next block.
2281n/a tarinfo = None
2282n/a while True:
2283n/a try:
2284n/a tarinfo = self.tarinfo.fromtarfile(self)
2285n/a except EOFHeaderError as e:
2286n/a if self.ignore_zeros:
2287n/a self._dbg(2, "0x%X: %s" % (self.offset, e))
2288n/a self.offset += BLOCKSIZE
2289n/a continue
2290n/a except InvalidHeaderError as e:
2291n/a if self.ignore_zeros:
2292n/a self._dbg(2, "0x%X: %s" % (self.offset, e))
2293n/a self.offset += BLOCKSIZE
2294n/a continue
2295n/a elif self.offset == 0:
2296n/a raise ReadError(str(e))
2297n/a except EmptyHeaderError:
2298n/a if self.offset == 0:
2299n/a raise ReadError("empty file")
2300n/a except TruncatedHeaderError as e:
2301n/a if self.offset == 0:
2302n/a raise ReadError(str(e))
2303n/a except SubsequentHeaderError as e:
2304n/a raise ReadError(str(e))
2305n/a break
2306n/a
2307n/a if tarinfo is not None:
2308n/a self.members.append(tarinfo)
2309n/a else:
2310n/a self._loaded = True
2311n/a
2312n/a return tarinfo
2313n/a
2314n/a #--------------------------------------------------------------------------
2315n/a # Little helper methods:
2316n/a
2317n/a def _getmember(self, name, tarinfo=None, normalize=False):
2318n/a """Find an archive member by name from bottom to top.
2319n/a If tarinfo is given, it is used as the starting point.
2320n/a """
2321n/a # Ensure that all members have been loaded.
2322n/a members = self.getmembers()
2323n/a
2324n/a # Limit the member search list up to tarinfo.
2325n/a if tarinfo is not None:
2326n/a members = members[:members.index(tarinfo)]
2327n/a
2328n/a if normalize:
2329n/a name = os.path.normpath(name)
2330n/a
2331n/a for member in reversed(members):
2332n/a if normalize:
2333n/a member_name = os.path.normpath(member.name)
2334n/a else:
2335n/a member_name = member.name
2336n/a
2337n/a if name == member_name:
2338n/a return member
2339n/a
2340n/a def _load(self):
2341n/a """Read through the entire archive file and look for readable
2342n/a members.
2343n/a """
2344n/a while True:
2345n/a tarinfo = self.next()
2346n/a if tarinfo is None:
2347n/a break
2348n/a self._loaded = True
2349n/a
2350n/a def _check(self, mode=None):
2351n/a """Check if TarFile is still open, and if the operation's mode
2352n/a corresponds to TarFile's mode.
2353n/a """
2354n/a if self.closed:
2355n/a raise OSError("%s is closed" % self.__class__.__name__)
2356n/a if mode is not None and self.mode not in mode:
2357n/a raise OSError("bad operation for mode %r" % self.mode)
2358n/a
2359n/a def _find_link_target(self, tarinfo):
2360n/a """Find the target member of a symlink or hardlink member in the
2361n/a archive.
2362n/a """
2363n/a if tarinfo.issym():
2364n/a # Always search the entire archive.
2365n/a linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
2366n/a limit = None
2367n/a else:
2368n/a # Search the archive before the link, because a hard link is
2369n/a # just a reference to an already archived file.
2370n/a linkname = tarinfo.linkname
2371n/a limit = tarinfo
2372n/a
2373n/a member = self._getmember(linkname, tarinfo=limit, normalize=True)
2374n/a if member is None:
2375n/a raise KeyError("linkname %r not found" % linkname)
2376n/a return member
2377n/a
2378n/a def __iter__(self):
2379n/a """Provide an iterator object.
2380n/a """
2381n/a if self._loaded:
2382n/a yield from self.members
2383n/a return
2384n/a
2385n/a # Yield items using TarFile's next() method.
2386n/a # When all members have been read, set TarFile as _loaded.
2387n/a index = 0
2388n/a # Fix for SF #1100429: Under rare circumstances it can
2389n/a # happen that getmembers() is called during iteration,
2390n/a # which will have already exhausted the next() method.
2391n/a if self.firstmember is not None:
2392n/a tarinfo = self.next()
2393n/a index += 1
2394n/a yield tarinfo
2395n/a
2396n/a while True:
2397n/a if index < len(self.members):
2398n/a tarinfo = self.members[index]
2399n/a elif not self._loaded:
2400n/a tarinfo = self.next()
2401n/a if not tarinfo:
2402n/a self._loaded = True
2403n/a return
2404n/a else:
2405n/a return
2406n/a index += 1
2407n/a yield tarinfo
2408n/a
2409n/a def _dbg(self, level, msg):
2410n/a """Write debugging output to sys.stderr.
2411n/a """
2412n/a if level <= self.debug:
2413n/a print(msg, file=sys.stderr)
2414n/a
2415n/a def __enter__(self):
2416n/a self._check()
2417n/a return self
2418n/a
2419n/a def __exit__(self, type, value, traceback):
2420n/a if type is None:
2421n/a self.close()
2422n/a else:
2423n/a # An exception occurred. We must not call close() because
2424n/a # it would try to write end-of-archive blocks and padding.
2425n/a if not self._extfileobj:
2426n/a self.fileobj.close()
2427n/a self.closed = True
2428n/a
2429n/a#--------------------
2430n/a# exported functions
2431n/a#--------------------
2432n/adef is_tarfile(name):
2433n/a """Return True if name points to a tar archive that we
2434n/a are able to handle, else return False.
2435n/a """
2436n/a try:
2437n/a t = open(name)
2438n/a t.close()
2439n/a return True
2440n/a except TarError:
2441n/a return False
2442n/a
2443n/aopen = TarFile.open
2444n/a
2445n/a
2446n/adef main():
2447n/a import argparse
2448n/a
2449n/a description = 'A simple command line interface for tarfile module.'
2450n/a parser = argparse.ArgumentParser(description=description)
2451n/a parser.add_argument('-v', '--verbose', action='store_true', default=False,
2452n/a help='Verbose output')
2453n/a group = parser.add_mutually_exclusive_group()
2454n/a group.add_argument('-l', '--list', metavar='<tarfile>',
2455n/a help='Show listing of a tarfile')
2456n/a group.add_argument('-e', '--extract', nargs='+',
2457n/a metavar=('<tarfile>', '<output_dir>'),
2458n/a help='Extract tarfile into target dir')
2459n/a group.add_argument('-c', '--create', nargs='+',
2460n/a metavar=('<name>', '<file>'),
2461n/a help='Create tarfile from sources')
2462n/a group.add_argument('-t', '--test', metavar='<tarfile>',
2463n/a help='Test if a tarfile is valid')
2464n/a args = parser.parse_args()
2465n/a
2466n/a if args.test:
2467n/a src = args.test
2468n/a if is_tarfile(src):
2469n/a with open(src, 'r') as tar:
2470n/a tar.getmembers()
2471n/a print(tar.getmembers(), file=sys.stderr)
2472n/a if args.verbose:
2473n/a print('{!r} is a tar archive.'.format(src))
2474n/a else:
2475n/a parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2476n/a
2477n/a elif args.list:
2478n/a src = args.list
2479n/a if is_tarfile(src):
2480n/a with TarFile.open(src, 'r:*') as tf:
2481n/a tf.list(verbose=args.verbose)
2482n/a else:
2483n/a parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2484n/a
2485n/a elif args.extract:
2486n/a if len(args.extract) == 1:
2487n/a src = args.extract[0]
2488n/a curdir = os.curdir
2489n/a elif len(args.extract) == 2:
2490n/a src, curdir = args.extract
2491n/a else:
2492n/a parser.exit(1, parser.format_help())
2493n/a
2494n/a if is_tarfile(src):
2495n/a with TarFile.open(src, 'r:*') as tf:
2496n/a tf.extractall(path=curdir)
2497n/a if args.verbose:
2498n/a if curdir == '.':
2499n/a msg = '{!r} file is extracted.'.format(src)
2500n/a else:
2501n/a msg = ('{!r} file is extracted '
2502n/a 'into {!r} directory.').format(src, curdir)
2503n/a print(msg)
2504n/a else:
2505n/a parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2506n/a
2507n/a elif args.create:
2508n/a tar_name = args.create.pop(0)
2509n/a _, ext = os.path.splitext(tar_name)
2510n/a compressions = {
2511n/a # gz
2512n/a '.gz': 'gz',
2513n/a '.tgz': 'gz',
2514n/a # xz
2515n/a '.xz': 'xz',
2516n/a '.txz': 'xz',
2517n/a # bz2
2518n/a '.bz2': 'bz2',
2519n/a '.tbz': 'bz2',
2520n/a '.tbz2': 'bz2',
2521n/a '.tb2': 'bz2',
2522n/a }
2523n/a tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2524n/a tar_files = args.create
2525n/a
2526n/a with TarFile.open(tar_name, tar_mode) as tf:
2527n/a for file_name in tar_files:
2528n/a tf.add(file_name)
2529n/a
2530n/a if args.verbose:
2531n/a print('{!r} file created.'.format(tar_name))
2532n/a
2533n/a else:
2534n/a parser.exit(1, parser.format_help())
2535n/a
2536n/aif __name__ == '__main__':
2537n/a main()