ยปCore Development>Code coverage>Lib/mimetypes.py

Python code coverage for Lib/mimetypes.py

#countcontent
1n/a"""Guess the MIME type of a file.
2n/a
3n/aThis module defines two useful functions:
4n/a
5n/aguess_type(url, strict=True) -- guess the MIME type and encoding of a URL.
6n/a
7n/aguess_extension(type, strict=True) -- guess the extension for a given MIME type.
8n/a
9n/aIt also contains the following, for tuning the behavior:
10n/a
11n/aData:
12n/a
13n/aknownfiles -- list of files to parse
14n/ainited -- flag set when init() has been called
15n/asuffix_map -- dictionary mapping suffixes to suffixes
16n/aencodings_map -- dictionary mapping suffixes to encodings
17n/atypes_map -- dictionary mapping suffixes to types
18n/a
19n/aFunctions:
20n/a
21n/ainit([files]) -- parse a list of files, default knownfiles (on Windows, the
22n/a default values are taken from the registry)
23n/aread_mime_types(file) -- parse one file, return a dictionary or None
24n/a"""
25n/a
26n/aimport os
27n/aimport sys
28n/aimport posixpath
29n/aimport urllib.parse
30n/atry:
31n/a import winreg as _winreg
32n/aexcept ImportError:
33n/a _winreg = None
34n/a
35n/a__all__ = [
36n/a "knownfiles", "inited", "MimeTypes",
37n/a "guess_type", "guess_all_extensions", "guess_extension",
38n/a "add_type", "init", "read_mime_types",
39n/a "suffix_map", "encodings_map", "types_map", "common_types"
40n/a]
41n/a
42n/aknownfiles = [
43n/a "/etc/mime.types",
44n/a "/etc/httpd/mime.types", # Mac OS X
45n/a "/etc/httpd/conf/mime.types", # Apache
46n/a "/etc/apache/mime.types", # Apache 1
47n/a "/etc/apache2/mime.types", # Apache 2
48n/a "/usr/local/etc/httpd/conf/mime.types",
49n/a "/usr/local/lib/netscape/mime.types",
50n/a "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
51n/a "/usr/local/etc/mime.types", # Apache 1.3
52n/a ]
53n/a
54n/ainited = False
55n/a_db = None
56n/a
57n/a
58n/aclass MimeTypes:
59n/a """MIME-types datastore.
60n/a
61n/a This datastore can handle information from mime.types-style files
62n/a and supports basic determination of MIME type from a filename or
63n/a URL, and can guess a reasonable extension given a MIME type.
64n/a """
65n/a
66n/a def __init__(self, filenames=(), strict=True):
67n/a if not inited:
68n/a init()
69n/a self.encodings_map = encodings_map.copy()
70n/a self.suffix_map = suffix_map.copy()
71n/a self.types_map = ({}, {}) # dict for (non-strict, strict)
72n/a self.types_map_inv = ({}, {})
73n/a for (ext, type) in types_map.items():
74n/a self.add_type(type, ext, True)
75n/a for (ext, type) in common_types.items():
76n/a self.add_type(type, ext, False)
77n/a for name in filenames:
78n/a self.read(name, strict)
79n/a
80n/a def add_type(self, type, ext, strict=True):
81n/a """Add a mapping between a type and an extension.
82n/a
83n/a When the extension is already known, the new
84n/a type will replace the old one. When the type
85n/a is already known the extension will be added
86n/a to the list of known extensions.
87n/a
88n/a If strict is true, information will be added to
89n/a list of standard types, else to the list of non-standard
90n/a types.
91n/a """
92n/a self.types_map[strict][ext] = type
93n/a exts = self.types_map_inv[strict].setdefault(type, [])
94n/a if ext not in exts:
95n/a exts.append(ext)
96n/a
97n/a def guess_type(self, url, strict=True):
98n/a """Guess the type of a file based on its URL.
99n/a
100n/a Return value is a tuple (type, encoding) where type is None if
101n/a the type can't be guessed (no or unknown suffix) or a string
102n/a of the form type/subtype, usable for a MIME Content-type
103n/a header; and encoding is None for no encoding or the name of
104n/a the program used to encode (e.g. compress or gzip). The
105n/a mappings are table driven. Encoding suffixes are case
106n/a sensitive; type suffixes are first tried case sensitive, then
107n/a case insensitive.
108n/a
109n/a The suffixes .tgz, .taz and .tz (case sensitive!) are all
110n/a mapped to '.tar.gz'. (This is table-driven too, using the
111n/a dictionary suffix_map.)
112n/a
113n/a Optional `strict' argument when False adds a bunch of commonly found,
114n/a but non-standard types.
115n/a """
116n/a scheme, url = urllib.parse.splittype(url)
117n/a if scheme == 'data':
118n/a # syntax of data URLs:
119n/a # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
120n/a # mediatype := [ type "/" subtype ] *( ";" parameter )
121n/a # data := *urlchar
122n/a # parameter := attribute "=" value
123n/a # type/subtype defaults to "text/plain"
124n/a comma = url.find(',')
125n/a if comma < 0:
126n/a # bad data URL
127n/a return None, None
128n/a semi = url.find(';', 0, comma)
129n/a if semi >= 0:
130n/a type = url[:semi]
131n/a else:
132n/a type = url[:comma]
133n/a if '=' in type or '/' not in type:
134n/a type = 'text/plain'
135n/a return type, None # never compressed, so encoding is None
136n/a base, ext = posixpath.splitext(url)
137n/a while ext in self.suffix_map:
138n/a base, ext = posixpath.splitext(base + self.suffix_map[ext])
139n/a if ext in self.encodings_map:
140n/a encoding = self.encodings_map[ext]
141n/a base, ext = posixpath.splitext(base)
142n/a else:
143n/a encoding = None
144n/a types_map = self.types_map[True]
145n/a if ext in types_map:
146n/a return types_map[ext], encoding
147n/a elif ext.lower() in types_map:
148n/a return types_map[ext.lower()], encoding
149n/a elif strict:
150n/a return None, encoding
151n/a types_map = self.types_map[False]
152n/a if ext in types_map:
153n/a return types_map[ext], encoding
154n/a elif ext.lower() in types_map:
155n/a return types_map[ext.lower()], encoding
156n/a else:
157n/a return None, encoding
158n/a
159n/a def guess_all_extensions(self, type, strict=True):
160n/a """Guess the extensions for a file based on its MIME type.
161n/a
162n/a Return value is a list of strings giving the possible filename
163n/a extensions, including the leading dot ('.'). The extension is not
164n/a guaranteed to have been associated with any particular data stream,
165n/a but would be mapped to the MIME type `type' by guess_type().
166n/a
167n/a Optional `strict' argument when false adds a bunch of commonly found,
168n/a but non-standard types.
169n/a """
170n/a type = type.lower()
171n/a extensions = self.types_map_inv[True].get(type, [])
172n/a if not strict:
173n/a for ext in self.types_map_inv[False].get(type, []):
174n/a if ext not in extensions:
175n/a extensions.append(ext)
176n/a return extensions
177n/a
178n/a def guess_extension(self, type, strict=True):
179n/a """Guess the extension for a file based on its MIME type.
180n/a
181n/a Return value is a string giving a filename extension,
182n/a including the leading dot ('.'). The extension is not
183n/a guaranteed to have been associated with any particular data
184n/a stream, but would be mapped to the MIME type `type' by
185n/a guess_type(). If no extension can be guessed for `type', None
186n/a is returned.
187n/a
188n/a Optional `strict' argument when false adds a bunch of commonly found,
189n/a but non-standard types.
190n/a """
191n/a extensions = self.guess_all_extensions(type, strict)
192n/a if not extensions:
193n/a return None
194n/a return extensions[0]
195n/a
196n/a def read(self, filename, strict=True):
197n/a """
198n/a Read a single mime.types-format file, specified by pathname.
199n/a
200n/a If strict is true, information will be added to
201n/a list of standard types, else to the list of non-standard
202n/a types.
203n/a """
204n/a with open(filename, encoding='utf-8') as fp:
205n/a self.readfp(fp, strict)
206n/a
207n/a def readfp(self, fp, strict=True):
208n/a """
209n/a Read a single mime.types-format file.
210n/a
211n/a If strict is true, information will be added to
212n/a list of standard types, else to the list of non-standard
213n/a types.
214n/a """
215n/a while 1:
216n/a line = fp.readline()
217n/a if not line:
218n/a break
219n/a words = line.split()
220n/a for i in range(len(words)):
221n/a if words[i][0] == '#':
222n/a del words[i:]
223n/a break
224n/a if not words:
225n/a continue
226n/a type, suffixes = words[0], words[1:]
227n/a for suff in suffixes:
228n/a self.add_type(type, '.' + suff, strict)
229n/a
230n/a def read_windows_registry(self, strict=True):
231n/a """
232n/a Load the MIME types database from Windows registry.
233n/a
234n/a If strict is true, information will be added to
235n/a list of standard types, else to the list of non-standard
236n/a types.
237n/a """
238n/a
239n/a # Windows only
240n/a if not _winreg:
241n/a return
242n/a
243n/a def enum_types(mimedb):
244n/a i = 0
245n/a while True:
246n/a try:
247n/a ctype = _winreg.EnumKey(mimedb, i)
248n/a except EnvironmentError:
249n/a break
250n/a else:
251n/a if '\0' not in ctype:
252n/a yield ctype
253n/a i += 1
254n/a
255n/a with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '') as hkcr:
256n/a for subkeyname in enum_types(hkcr):
257n/a try:
258n/a with _winreg.OpenKey(hkcr, subkeyname) as subkey:
259n/a # Only check file extensions
260n/a if not subkeyname.startswith("."):
261n/a continue
262n/a # raises EnvironmentError if no 'Content Type' value
263n/a mimetype, datatype = _winreg.QueryValueEx(
264n/a subkey, 'Content Type')
265n/a if datatype != _winreg.REG_SZ:
266n/a continue
267n/a self.add_type(mimetype, subkeyname, strict)
268n/a except EnvironmentError:
269n/a continue
270n/a
271n/adef guess_type(url, strict=True):
272n/a """Guess the type of a file based on its URL.
273n/a
274n/a Return value is a tuple (type, encoding) where type is None if the
275n/a type can't be guessed (no or unknown suffix) or a string of the
276n/a form type/subtype, usable for a MIME Content-type header; and
277n/a encoding is None for no encoding or the name of the program used
278n/a to encode (e.g. compress or gzip). The mappings are table
279n/a driven. Encoding suffixes are case sensitive; type suffixes are
280n/a first tried case sensitive, then case insensitive.
281n/a
282n/a The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
283n/a to ".tar.gz". (This is table-driven too, using the dictionary
284n/a suffix_map).
285n/a
286n/a Optional `strict' argument when false adds a bunch of commonly found, but
287n/a non-standard types.
288n/a """
289n/a if _db is None:
290n/a init()
291n/a return _db.guess_type(url, strict)
292n/a
293n/a
294n/adef guess_all_extensions(type, strict=True):
295n/a """Guess the extensions for a file based on its MIME type.
296n/a
297n/a Return value is a list of strings giving the possible filename
298n/a extensions, including the leading dot ('.'). The extension is not
299n/a guaranteed to have been associated with any particular data
300n/a stream, but would be mapped to the MIME type `type' by
301n/a guess_type(). If no extension can be guessed for `type', None
302n/a is returned.
303n/a
304n/a Optional `strict' argument when false adds a bunch of commonly found,
305n/a but non-standard types.
306n/a """
307n/a if _db is None:
308n/a init()
309n/a return _db.guess_all_extensions(type, strict)
310n/a
311n/adef guess_extension(type, strict=True):
312n/a """Guess the extension for a file based on its MIME type.
313n/a
314n/a Return value is a string giving a filename extension, including the
315n/a leading dot ('.'). The extension is not guaranteed to have been
316n/a associated with any particular data stream, but would be mapped to the
317n/a MIME type `type' by guess_type(). If no extension can be guessed for
318n/a `type', None is returned.
319n/a
320n/a Optional `strict' argument when false adds a bunch of commonly found,
321n/a but non-standard types.
322n/a """
323n/a if _db is None:
324n/a init()
325n/a return _db.guess_extension(type, strict)
326n/a
327n/adef add_type(type, ext, strict=True):
328n/a """Add a mapping between a type and an extension.
329n/a
330n/a When the extension is already known, the new
331n/a type will replace the old one. When the type
332n/a is already known the extension will be added
333n/a to the list of known extensions.
334n/a
335n/a If strict is true, information will be added to
336n/a list of standard types, else to the list of non-standard
337n/a types.
338n/a """
339n/a if _db is None:
340n/a init()
341n/a return _db.add_type(type, ext, strict)
342n/a
343n/a
344n/adef init(files=None):
345n/a global suffix_map, types_map, encodings_map, common_types
346n/a global inited, _db
347n/a inited = True # so that MimeTypes.__init__() doesn't call us again
348n/a db = MimeTypes()
349n/a if files is None:
350n/a if _winreg:
351n/a db.read_windows_registry()
352n/a files = knownfiles
353n/a for file in files:
354n/a if os.path.isfile(file):
355n/a db.read(file)
356n/a encodings_map = db.encodings_map
357n/a suffix_map = db.suffix_map
358n/a types_map = db.types_map[True]
359n/a common_types = db.types_map[False]
360n/a # Make the DB a global variable now that it is fully initialized
361n/a _db = db
362n/a
363n/a
364n/adef read_mime_types(file):
365n/a try:
366n/a f = open(file)
367n/a except OSError:
368n/a return None
369n/a with f:
370n/a db = MimeTypes()
371n/a db.readfp(f, True)
372n/a return db.types_map[True]
373n/a
374n/a
375n/adef _default_mime_types():
376n/a global suffix_map
377n/a global encodings_map
378n/a global types_map
379n/a global common_types
380n/a
381n/a suffix_map = {
382n/a '.svgz': '.svg.gz',
383n/a '.tgz': '.tar.gz',
384n/a '.taz': '.tar.gz',
385n/a '.tz': '.tar.gz',
386n/a '.tbz2': '.tar.bz2',
387n/a '.txz': '.tar.xz',
388n/a }
389n/a
390n/a encodings_map = {
391n/a '.gz': 'gzip',
392n/a '.Z': 'compress',
393n/a '.bz2': 'bzip2',
394n/a '.xz': 'xz',
395n/a }
396n/a
397n/a # Before adding new types, make sure they are either registered with IANA,
398n/a # at http://www.iana.org/assignments/media-types
399n/a # or extensions, i.e. using the x- prefix
400n/a
401n/a # If you add to these, please keep them sorted!
402n/a types_map = {
403n/a '.a' : 'application/octet-stream',
404n/a '.ai' : 'application/postscript',
405n/a '.aif' : 'audio/x-aiff',
406n/a '.aifc' : 'audio/x-aiff',
407n/a '.aiff' : 'audio/x-aiff',
408n/a '.au' : 'audio/basic',
409n/a '.avi' : 'video/x-msvideo',
410n/a '.bat' : 'text/plain',
411n/a '.bcpio' : 'application/x-bcpio',
412n/a '.bin' : 'application/octet-stream',
413n/a '.bmp' : 'image/x-ms-bmp',
414n/a '.c' : 'text/plain',
415n/a # Duplicates :(
416n/a '.cdf' : 'application/x-cdf',
417n/a '.cdf' : 'application/x-netcdf',
418n/a '.cpio' : 'application/x-cpio',
419n/a '.csh' : 'application/x-csh',
420n/a '.css' : 'text/css',
421n/a '.csv' : 'text/csv',
422n/a '.dll' : 'application/octet-stream',
423n/a '.doc' : 'application/msword',
424n/a '.dot' : 'application/msword',
425n/a '.dvi' : 'application/x-dvi',
426n/a '.eml' : 'message/rfc822',
427n/a '.eps' : 'application/postscript',
428n/a '.etx' : 'text/x-setext',
429n/a '.exe' : 'application/octet-stream',
430n/a '.gif' : 'image/gif',
431n/a '.gtar' : 'application/x-gtar',
432n/a '.h' : 'text/plain',
433n/a '.hdf' : 'application/x-hdf',
434n/a '.htm' : 'text/html',
435n/a '.html' : 'text/html',
436n/a '.ico' : 'image/vnd.microsoft.icon',
437n/a '.ief' : 'image/ief',
438n/a '.jpe' : 'image/jpeg',
439n/a '.jpeg' : 'image/jpeg',
440n/a '.jpg' : 'image/jpeg',
441n/a '.js' : 'application/javascript',
442n/a '.ksh' : 'text/plain',
443n/a '.latex' : 'application/x-latex',
444n/a '.m1v' : 'video/mpeg',
445n/a '.m3u' : 'application/vnd.apple.mpegurl',
446n/a '.m3u8' : 'application/vnd.apple.mpegurl',
447n/a '.man' : 'application/x-troff-man',
448n/a '.me' : 'application/x-troff-me',
449n/a '.mht' : 'message/rfc822',
450n/a '.mhtml' : 'message/rfc822',
451n/a '.mif' : 'application/x-mif',
452n/a '.mov' : 'video/quicktime',
453n/a '.movie' : 'video/x-sgi-movie',
454n/a '.mp2' : 'audio/mpeg',
455n/a '.mp3' : 'audio/mpeg',
456n/a '.mp4' : 'video/mp4',
457n/a '.mpa' : 'video/mpeg',
458n/a '.mpe' : 'video/mpeg',
459n/a '.mpeg' : 'video/mpeg',
460n/a '.mpg' : 'video/mpeg',
461n/a '.ms' : 'application/x-troff-ms',
462n/a '.nc' : 'application/x-netcdf',
463n/a '.nws' : 'message/rfc822',
464n/a '.o' : 'application/octet-stream',
465n/a '.obj' : 'application/octet-stream',
466n/a '.oda' : 'application/oda',
467n/a '.p12' : 'application/x-pkcs12',
468n/a '.p7c' : 'application/pkcs7-mime',
469n/a '.pbm' : 'image/x-portable-bitmap',
470n/a '.pdf' : 'application/pdf',
471n/a '.pfx' : 'application/x-pkcs12',
472n/a '.pgm' : 'image/x-portable-graymap',
473n/a '.pl' : 'text/plain',
474n/a '.png' : 'image/png',
475n/a '.pnm' : 'image/x-portable-anymap',
476n/a '.pot' : 'application/vnd.ms-powerpoint',
477n/a '.ppa' : 'application/vnd.ms-powerpoint',
478n/a '.ppm' : 'image/x-portable-pixmap',
479n/a '.pps' : 'application/vnd.ms-powerpoint',
480n/a '.ppt' : 'application/vnd.ms-powerpoint',
481n/a '.ps' : 'application/postscript',
482n/a '.pwz' : 'application/vnd.ms-powerpoint',
483n/a '.py' : 'text/x-python',
484n/a '.pyc' : 'application/x-python-code',
485n/a '.pyo' : 'application/x-python-code',
486n/a '.qt' : 'video/quicktime',
487n/a '.ra' : 'audio/x-pn-realaudio',
488n/a '.ram' : 'application/x-pn-realaudio',
489n/a '.ras' : 'image/x-cmu-raster',
490n/a '.rdf' : 'application/xml',
491n/a '.rgb' : 'image/x-rgb',
492n/a '.roff' : 'application/x-troff',
493n/a '.rtx' : 'text/richtext',
494n/a '.sgm' : 'text/x-sgml',
495n/a '.sgml' : 'text/x-sgml',
496n/a '.sh' : 'application/x-sh',
497n/a '.shar' : 'application/x-shar',
498n/a '.snd' : 'audio/basic',
499n/a '.so' : 'application/octet-stream',
500n/a '.src' : 'application/x-wais-source',
501n/a '.sv4cpio': 'application/x-sv4cpio',
502n/a '.sv4crc' : 'application/x-sv4crc',
503n/a '.svg' : 'image/svg+xml',
504n/a '.swf' : 'application/x-shockwave-flash',
505n/a '.t' : 'application/x-troff',
506n/a '.tar' : 'application/x-tar',
507n/a '.tcl' : 'application/x-tcl',
508n/a '.tex' : 'application/x-tex',
509n/a '.texi' : 'application/x-texinfo',
510n/a '.texinfo': 'application/x-texinfo',
511n/a '.tif' : 'image/tiff',
512n/a '.tiff' : 'image/tiff',
513n/a '.tr' : 'application/x-troff',
514n/a '.tsv' : 'text/tab-separated-values',
515n/a '.txt' : 'text/plain',
516n/a '.ustar' : 'application/x-ustar',
517n/a '.vcf' : 'text/x-vcard',
518n/a '.wav' : 'audio/x-wav',
519n/a '.webm' : 'video/webm',
520n/a '.wiz' : 'application/msword',
521n/a '.wsdl' : 'application/xml',
522n/a '.xbm' : 'image/x-xbitmap',
523n/a '.xlb' : 'application/vnd.ms-excel',
524n/a # Duplicates :(
525n/a '.xls' : 'application/excel',
526n/a '.xls' : 'application/vnd.ms-excel',
527n/a '.xml' : 'text/xml',
528n/a '.xpdl' : 'application/xml',
529n/a '.xpm' : 'image/x-xpixmap',
530n/a '.xsl' : 'application/xml',
531n/a '.xwd' : 'image/x-xwindowdump',
532n/a '.zip' : 'application/zip',
533n/a }
534n/a
535n/a # These are non-standard types, commonly found in the wild. They will
536n/a # only match if strict=0 flag is given to the API methods.
537n/a
538n/a # Please sort these too
539n/a common_types = {
540n/a '.jpg' : 'image/jpg',
541n/a '.mid' : 'audio/midi',
542n/a '.midi': 'audio/midi',
543n/a '.pct' : 'image/pict',
544n/a '.pic' : 'image/pict',
545n/a '.pict': 'image/pict',
546n/a '.rtf' : 'application/rtf',
547n/a '.xul' : 'text/xul'
548n/a }
549n/a
550n/a
551n/a_default_mime_types()
552n/a
553n/a
554n/aif __name__ == '__main__':
555n/a import getopt
556n/a
557n/a USAGE = """\
558n/aUsage: mimetypes.py [options] type
559n/a
560n/aOptions:
561n/a --help / -h -- print this message and exit
562n/a --lenient / -l -- additionally search of some common, but non-standard
563n/a types.
564n/a --extension / -e -- guess extension instead of type
565n/a
566n/aMore than one type argument may be given.
567n/a"""
568n/a
569n/a def usage(code, msg=''):
570n/a print(USAGE)
571n/a if msg: print(msg)
572n/a sys.exit(code)
573n/a
574n/a try:
575n/a opts, args = getopt.getopt(sys.argv[1:], 'hle',
576n/a ['help', 'lenient', 'extension'])
577n/a except getopt.error as msg:
578n/a usage(1, msg)
579n/a
580n/a strict = 1
581n/a extension = 0
582n/a for opt, arg in opts:
583n/a if opt in ('-h', '--help'):
584n/a usage(0)
585n/a elif opt in ('-l', '--lenient'):
586n/a strict = 0
587n/a elif opt in ('-e', '--extension'):
588n/a extension = 1
589n/a for gtype in args:
590n/a if extension:
591n/a guess = guess_extension(gtype, strict)
592n/a if not guess: print("I don't know anything about type", gtype)
593n/a else: print(guess)
594n/a else:
595n/a guess, encoding = guess_type(gtype, strict)
596n/a if not guess: print("I don't know anything about type", gtype)
597n/a else: print('type:', guess, 'encoding:', encoding)