ยปCore Development>Code coverage>Lib/encodings/__init__.py

Python code coverage for Lib/encodings/__init__.py

#countcontent
1n/a""" Standard "encodings" Package
2n/a
3n/a Standard Python encoding modules are stored in this package
4n/a directory.
5n/a
6n/a Codec modules must have names corresponding to normalized encoding
7n/a names as defined in the normalize_encoding() function below, e.g.
8n/a 'utf-8' must be implemented by the module 'utf_8.py'.
9n/a
10n/a Each codec module must export the following interface:
11n/a
12n/a * getregentry() -> codecs.CodecInfo object
13n/a The getregentry() API must return a CodecInfo object with encoder, decoder,
14n/a incrementalencoder, incrementaldecoder, streamwriter and streamreader
15n/a atttributes which adhere to the Python Codec Interface Standard.
16n/a
17n/a In addition, a module may optionally also define the following
18n/a APIs which are then used by the package's codec search function:
19n/a
20n/a * getaliases() -> sequence of encoding name strings to use as aliases
21n/a
22n/a Alias names returned by getaliases() must be normalized encoding
23n/a names as defined by normalize_encoding().
24n/a
25n/aWritten by Marc-Andre Lemburg (mal@lemburg.com).
26n/a
27n/a(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
28n/a
29n/a"""#"
30n/a
31n/aimport codecs
32n/aimport sys
33n/afrom . import aliases
34n/a
35n/a_cache = {}
36n/a_unknown = '--unknown--'
37n/a_import_tail = ['*']
38n/a_aliases = aliases.aliases
39n/a
40n/aclass CodecRegistryError(LookupError, SystemError):
41n/a pass
42n/a
43n/adef normalize_encoding(encoding):
44n/a
45n/a """ Normalize an encoding name.
46n/a
47n/a Normalization works as follows: all non-alphanumeric
48n/a characters except the dot used for Python package names are
49n/a collapsed and replaced with a single underscore, e.g. ' -;#'
50n/a becomes '_'. Leading and trailing underscores are removed.
51n/a
52n/a Note that encoding names should be ASCII only; if they do use
53n/a non-ASCII characters, these must be Latin-1 compatible.
54n/a
55n/a """
56n/a if isinstance(encoding, bytes):
57n/a encoding = str(encoding, "ascii")
58n/a
59n/a chars = []
60n/a punct = False
61n/a for c in encoding:
62n/a if c.isalnum() or c == '.':
63n/a if punct and chars:
64n/a chars.append('_')
65n/a chars.append(c)
66n/a punct = False
67n/a else:
68n/a punct = True
69n/a return ''.join(chars)
70n/a
71n/adef search_function(encoding):
72n/a
73n/a # Cache lookup
74n/a entry = _cache.get(encoding, _unknown)
75n/a if entry is not _unknown:
76n/a return entry
77n/a
78n/a # Import the module:
79n/a #
80n/a # First try to find an alias for the normalized encoding
81n/a # name and lookup the module using the aliased name, then try to
82n/a # lookup the module using the standard import scheme, i.e. first
83n/a # try in the encodings package, then at top-level.
84n/a #
85n/a norm_encoding = normalize_encoding(encoding)
86n/a aliased_encoding = _aliases.get(norm_encoding) or \
87n/a _aliases.get(norm_encoding.replace('.', '_'))
88n/a if aliased_encoding is not None:
89n/a modnames = [aliased_encoding,
90n/a norm_encoding]
91n/a else:
92n/a modnames = [norm_encoding]
93n/a for modname in modnames:
94n/a if not modname or '.' in modname:
95n/a continue
96n/a try:
97n/a # Import is absolute to prevent the possibly malicious import of a
98n/a # module with side-effects that is not in the 'encodings' package.
99n/a mod = __import__('encodings.' + modname, fromlist=_import_tail,
100n/a level=0)
101n/a except ImportError:
102n/a # ImportError may occur because 'encodings.(modname)' does not exist,
103n/a # or because it imports a name that does not exist (see mbcs and oem)
104n/a pass
105n/a else:
106n/a break
107n/a else:
108n/a mod = None
109n/a
110n/a try:
111n/a getregentry = mod.getregentry
112n/a except AttributeError:
113n/a # Not a codec module
114n/a mod = None
115n/a
116n/a if mod is None:
117n/a # Cache misses
118n/a _cache[encoding] = None
119n/a return None
120n/a
121n/a # Now ask the module for the registry entry
122n/a entry = getregentry()
123n/a if not isinstance(entry, codecs.CodecInfo):
124n/a if not 4 <= len(entry) <= 7:
125n/a raise CodecRegistryError('module "%s" (%s) failed to register'
126n/a % (mod.__name__, mod.__file__))
127n/a if not callable(entry[0]) or not callable(entry[1]) or \
128n/a (entry[2] is not None and not callable(entry[2])) or \
129n/a (entry[3] is not None and not callable(entry[3])) or \
130n/a (len(entry) > 4 and entry[4] is not None and not callable(entry[4])) or \
131n/a (len(entry) > 5 and entry[5] is not None and not callable(entry[5])):
132n/a raise CodecRegistryError('incompatible codecs in module "%s" (%s)'
133n/a % (mod.__name__, mod.__file__))
134n/a if len(entry)<7 or entry[6] is None:
135n/a entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],)
136n/a entry = codecs.CodecInfo(*entry)
137n/a
138n/a # Cache the codec registry entry
139n/a _cache[encoding] = entry
140n/a
141n/a # Register its aliases (without overwriting previously registered
142n/a # aliases)
143n/a try:
144n/a codecaliases = mod.getaliases()
145n/a except AttributeError:
146n/a pass
147n/a else:
148n/a for alias in codecaliases:
149n/a if alias not in _aliases:
150n/a _aliases[alias] = modname
151n/a
152n/a # Return the registry entry
153n/a return entry
154n/a
155n/a# Register the search_function in the Python codec registry
156n/acodecs.register(search_function)
157n/a
158n/aif sys.platform == 'win32':
159n/a def _alias_mbcs(encoding):
160n/a try:
161n/a import _bootlocale
162n/a if encoding == _bootlocale.getpreferredencoding(False):
163n/a import encodings.mbcs
164n/a return encodings.mbcs.getregentry()
165n/a except ImportError:
166n/a # Imports may fail while we are shutting down
167n/a pass
168n/a
169n/a codecs.register(_alias_mbcs)