ยปCore Development>Code coverage>Lib/gettext.py

Python code coverage for Lib/gettext.py

#countcontent
1n/a"""Internationalization and localization support.
2n/a
3n/aThis module provides internationalization (I18N) and localization (L10N)
4n/asupport for your Python programs by providing an interface to the GNU gettext
5n/amessage catalog library.
6n/a
7n/aI18N refers to the operation by which a program is made aware of multiple
8n/alanguages. L10N refers to the adaptation of your program, once
9n/ainternationalized, to the local language and cultural habits.
10n/a
11n/a"""
12n/a
13n/a# This module represents the integration of work, contributions, feedback, and
14n/a# suggestions from the following people:
15n/a#
16n/a# Martin von Loewis, who wrote the initial implementation of the underlying
17n/a# C-based libintlmodule (later renamed _gettext), along with a skeletal
18n/a# gettext.py implementation.
19n/a#
20n/a# Peter Funk, who wrote fintl.py, a fairly complete wrapper around intlmodule,
21n/a# which also included a pure-Python implementation to read .mo files if
22n/a# intlmodule wasn't available.
23n/a#
24n/a# James Henstridge, who also wrote a gettext.py module, which has some
25n/a# interesting, but currently unsupported experimental features: the notion of
26n/a# a Catalog class and instances, and the ability to add to a catalog file via
27n/a# a Python API.
28n/a#
29n/a# Barry Warsaw integrated these modules, wrote the .install() API and code,
30n/a# and conformed all C and Python code to Python's coding standards.
31n/a#
32n/a# Francois Pinard and Marc-Andre Lemburg also contributed valuably to this
33n/a# module.
34n/a#
35n/a# J. David Ibanez implemented plural forms. Bruno Haible fixed some bugs.
36n/a#
37n/a# TODO:
38n/a# - Lazy loading of .mo files. Currently the entire catalog is loaded into
39n/a# memory, but that's probably bad for large translated programs. Instead,
40n/a# the lexical sort of original strings in GNU .mo files should be exploited
41n/a# to do binary searches and lazy initializations. Or you might want to use
42n/a# the undocumented double-hash algorithm for .mo files with hash tables, but
43n/a# you'll need to study the GNU gettext code to do this.
44n/a#
45n/a# - Support Solaris .mo file formats. Unfortunately, we've been unable to
46n/a# find this format documented anywhere.
47n/a
48n/a
49n/aimport locale, copy, io, os, re, struct, sys
50n/afrom errno import ENOENT
51n/a
52n/a
53n/a__all__ = ['NullTranslations', 'GNUTranslations', 'Catalog',
54n/a 'find', 'translation', 'install', 'textdomain', 'bindtextdomain',
55n/a 'bind_textdomain_codeset',
56n/a 'dgettext', 'dngettext', 'gettext', 'lgettext', 'ldgettext',
57n/a 'ldngettext', 'lngettext', 'ngettext',
58n/a ]
59n/a
60n/a_default_localedir = os.path.join(sys.base_prefix, 'share', 'locale')
61n/a
62n/a# Expression parsing for plural form selection.
63n/a#
64n/a# The gettext library supports a small subset of C syntax. The only
65n/a# incompatible difference is that integer literals starting with zero are
66n/a# decimal.
67n/a#
68n/a# https://www.gnu.org/software/gettext/manual/gettext.html#Plural-forms
69n/a# http://git.savannah.gnu.org/cgit/gettext.git/tree/gettext-runtime/intl/plural.y
70n/a
71n/a_token_pattern = re.compile(r"""
72n/a (?P<WHITESPACES>[ \t]+) | # spaces and horizontal tabs
73n/a (?P<NUMBER>[0-9]+\b) | # decimal integer
74n/a (?P<NAME>n\b) | # only n is allowed
75n/a (?P<PARENTHESIS>[()]) |
76n/a (?P<OPERATOR>[-*/%+?:]|[><!]=?|==|&&|\|\|) | # !, *, /, %, +, -, <, >,
77n/a # <=, >=, ==, !=, &&, ||,
78n/a # ? :
79n/a # unary and bitwise ops
80n/a # not allowed
81n/a (?P<INVALID>\w+|.) # invalid token
82n/a """, re.VERBOSE|re.DOTALL)
83n/a
84n/adef _tokenize(plural):
85n/a for mo in re.finditer(_token_pattern, plural):
86n/a kind = mo.lastgroup
87n/a if kind == 'WHITESPACES':
88n/a continue
89n/a value = mo.group(kind)
90n/a if kind == 'INVALID':
91n/a raise ValueError('invalid token in plural form: %s' % value)
92n/a yield value
93n/a yield ''
94n/a
95n/adef _error(value):
96n/a if value:
97n/a return ValueError('unexpected token in plural form: %s' % value)
98n/a else:
99n/a return ValueError('unexpected end of plural form')
100n/a
101n/a_binary_ops = (
102n/a ('||',),
103n/a ('&&',),
104n/a ('==', '!='),
105n/a ('<', '>', '<=', '>='),
106n/a ('+', '-'),
107n/a ('*', '/', '%'),
108n/a)
109n/a_binary_ops = {op: i for i, ops in enumerate(_binary_ops, 1) for op in ops}
110n/a_c2py_ops = {'||': 'or', '&&': 'and', '/': '//'}
111n/a
112n/adef _parse(tokens, priority=-1):
113n/a result = ''
114n/a nexttok = next(tokens)
115n/a while nexttok == '!':
116n/a result += 'not '
117n/a nexttok = next(tokens)
118n/a
119n/a if nexttok == '(':
120n/a sub, nexttok = _parse(tokens)
121n/a result = '%s(%s)' % (result, sub)
122n/a if nexttok != ')':
123n/a raise ValueError('unbalanced parenthesis in plural form')
124n/a elif nexttok == 'n':
125n/a result = '%s%s' % (result, nexttok)
126n/a else:
127n/a try:
128n/a value = int(nexttok, 10)
129n/a except ValueError:
130n/a raise _error(nexttok) from None
131n/a result = '%s%d' % (result, value)
132n/a nexttok = next(tokens)
133n/a
134n/a j = 100
135n/a while nexttok in _binary_ops:
136n/a i = _binary_ops[nexttok]
137n/a if i < priority:
138n/a break
139n/a # Break chained comparisons
140n/a if i in (3, 4) and j in (3, 4): # '==', '!=', '<', '>', '<=', '>='
141n/a result = '(%s)' % result
142n/a # Replace some C operators by their Python equivalents
143n/a op = _c2py_ops.get(nexttok, nexttok)
144n/a right, nexttok = _parse(tokens, i + 1)
145n/a result = '%s %s %s' % (result, op, right)
146n/a j = i
147n/a if j == priority == 4: # '<', '>', '<=', '>='
148n/a result = '(%s)' % result
149n/a
150n/a if nexttok == '?' and priority <= 0:
151n/a if_true, nexttok = _parse(tokens, 0)
152n/a if nexttok != ':':
153n/a raise _error(nexttok)
154n/a if_false, nexttok = _parse(tokens)
155n/a result = '%s if %s else %s' % (if_true, result, if_false)
156n/a if priority == 0:
157n/a result = '(%s)' % result
158n/a
159n/a return result, nexttok
160n/a
161n/adef _as_int(n):
162n/a try:
163n/a i = round(n)
164n/a except TypeError:
165n/a raise TypeError('Plural value must be an integer, got %s' %
166n/a (n.__class__.__name__,)) from None
167n/a return n
168n/a
169n/adef c2py(plural):
170n/a """Gets a C expression as used in PO files for plural forms and returns a
171n/a Python function that implements an equivalent expression.
172n/a """
173n/a
174n/a if len(plural) > 1000:
175n/a raise ValueError('plural form expression is too long')
176n/a try:
177n/a result, nexttok = _parse(_tokenize(plural))
178n/a if nexttok:
179n/a raise _error(nexttok)
180n/a
181n/a depth = 0
182n/a for c in result:
183n/a if c == '(':
184n/a depth += 1
185n/a if depth > 20:
186n/a # Python compiler limit is about 90.
187n/a # The most complex example has 2.
188n/a raise ValueError('plural form expression is too complex')
189n/a elif c == ')':
190n/a depth -= 1
191n/a
192n/a ns = {'_as_int': _as_int}
193n/a exec('''if True:
194n/a def func(n):
195n/a if not isinstance(n, int):
196n/a n = _as_int(n)
197n/a return int(%s)
198n/a ''' % result, ns)
199n/a return ns['func']
200n/a except RecursionError:
201n/a # Recursion error can be raised in _parse() or exec().
202n/a raise ValueError('plural form expression is too complex')
203n/a
204n/a
205n/adef _expand_lang(loc):
206n/a loc = locale.normalize(loc)
207n/a COMPONENT_CODESET = 1 << 0
208n/a COMPONENT_TERRITORY = 1 << 1
209n/a COMPONENT_MODIFIER = 1 << 2
210n/a # split up the locale into its base components
211n/a mask = 0
212n/a pos = loc.find('@')
213n/a if pos >= 0:
214n/a modifier = loc[pos:]
215n/a loc = loc[:pos]
216n/a mask |= COMPONENT_MODIFIER
217n/a else:
218n/a modifier = ''
219n/a pos = loc.find('.')
220n/a if pos >= 0:
221n/a codeset = loc[pos:]
222n/a loc = loc[:pos]
223n/a mask |= COMPONENT_CODESET
224n/a else:
225n/a codeset = ''
226n/a pos = loc.find('_')
227n/a if pos >= 0:
228n/a territory = loc[pos:]
229n/a loc = loc[:pos]
230n/a mask |= COMPONENT_TERRITORY
231n/a else:
232n/a territory = ''
233n/a language = loc
234n/a ret = []
235n/a for i in range(mask+1):
236n/a if not (i & ~mask): # if all components for this combo exist ...
237n/a val = language
238n/a if i & COMPONENT_TERRITORY: val += territory
239n/a if i & COMPONENT_CODESET: val += codeset
240n/a if i & COMPONENT_MODIFIER: val += modifier
241n/a ret.append(val)
242n/a ret.reverse()
243n/a return ret
244n/a
245n/a
246n/a
247n/aclass NullTranslations:
248n/a def __init__(self, fp=None):
249n/a self._info = {}
250n/a self._charset = None
251n/a self._output_charset = None
252n/a self._fallback = None
253n/a if fp is not None:
254n/a self._parse(fp)
255n/a
256n/a def _parse(self, fp):
257n/a pass
258n/a
259n/a def add_fallback(self, fallback):
260n/a if self._fallback:
261n/a self._fallback.add_fallback(fallback)
262n/a else:
263n/a self._fallback = fallback
264n/a
265n/a def gettext(self, message):
266n/a if self._fallback:
267n/a return self._fallback.gettext(message)
268n/a return message
269n/a
270n/a def lgettext(self, message):
271n/a if self._fallback:
272n/a return self._fallback.lgettext(message)
273n/a return message
274n/a
275n/a def ngettext(self, msgid1, msgid2, n):
276n/a if self._fallback:
277n/a return self._fallback.ngettext(msgid1, msgid2, n)
278n/a if n == 1:
279n/a return msgid1
280n/a else:
281n/a return msgid2
282n/a
283n/a def lngettext(self, msgid1, msgid2, n):
284n/a if self._fallback:
285n/a return self._fallback.lngettext(msgid1, msgid2, n)
286n/a if n == 1:
287n/a return msgid1
288n/a else:
289n/a return msgid2
290n/a
291n/a def info(self):
292n/a return self._info
293n/a
294n/a def charset(self):
295n/a return self._charset
296n/a
297n/a def output_charset(self):
298n/a return self._output_charset
299n/a
300n/a def set_output_charset(self, charset):
301n/a self._output_charset = charset
302n/a
303n/a def install(self, names=None):
304n/a import builtins
305n/a builtins.__dict__['_'] = self.gettext
306n/a if hasattr(names, "__contains__"):
307n/a if "gettext" in names:
308n/a builtins.__dict__['gettext'] = builtins.__dict__['_']
309n/a if "ngettext" in names:
310n/a builtins.__dict__['ngettext'] = self.ngettext
311n/a if "lgettext" in names:
312n/a builtins.__dict__['lgettext'] = self.lgettext
313n/a if "lngettext" in names:
314n/a builtins.__dict__['lngettext'] = self.lngettext
315n/a
316n/a
317n/aclass GNUTranslations(NullTranslations):
318n/a # Magic number of .mo files
319n/a LE_MAGIC = 0x950412de
320n/a BE_MAGIC = 0xde120495
321n/a
322n/a # Acceptable .mo versions
323n/a VERSIONS = (0, 1)
324n/a
325n/a def _get_versions(self, version):
326n/a """Returns a tuple of major version, minor version"""
327n/a return (version >> 16, version & 0xffff)
328n/a
329n/a def _parse(self, fp):
330n/a """Override this method to support alternative .mo formats."""
331n/a unpack = struct.unpack
332n/a filename = getattr(fp, 'name', '')
333n/a # Parse the .mo file header, which consists of 5 little endian 32
334n/a # bit words.
335n/a self._catalog = catalog = {}
336n/a self.plural = lambda n: int(n != 1) # germanic plural by default
337n/a buf = fp.read()
338n/a buflen = len(buf)
339n/a # Are we big endian or little endian?
340n/a magic = unpack('<I', buf[:4])[0]
341n/a if magic == self.LE_MAGIC:
342n/a version, msgcount, masteridx, transidx = unpack('<4I', buf[4:20])
343n/a ii = '<II'
344n/a elif magic == self.BE_MAGIC:
345n/a version, msgcount, masteridx, transidx = unpack('>4I', buf[4:20])
346n/a ii = '>II'
347n/a else:
348n/a raise OSError(0, 'Bad magic number', filename)
349n/a
350n/a major_version, minor_version = self._get_versions(version)
351n/a
352n/a if major_version not in self.VERSIONS:
353n/a raise OSError(0, 'Bad version number ' + str(major_version), filename)
354n/a
355n/a # Now put all messages from the .mo file buffer into the catalog
356n/a # dictionary.
357n/a for i in range(0, msgcount):
358n/a mlen, moff = unpack(ii, buf[masteridx:masteridx+8])
359n/a mend = moff + mlen
360n/a tlen, toff = unpack(ii, buf[transidx:transidx+8])
361n/a tend = toff + tlen
362n/a if mend < buflen and tend < buflen:
363n/a msg = buf[moff:mend]
364n/a tmsg = buf[toff:tend]
365n/a else:
366n/a raise OSError(0, 'File is corrupt', filename)
367n/a # See if we're looking at GNU .mo conventions for metadata
368n/a if mlen == 0:
369n/a # Catalog description
370n/a lastk = None
371n/a for b_item in tmsg.split('\n'.encode("ascii")):
372n/a item = b_item.decode().strip()
373n/a if not item:
374n/a continue
375n/a k = v = None
376n/a if ':' in item:
377n/a k, v = item.split(':', 1)
378n/a k = k.strip().lower()
379n/a v = v.strip()
380n/a self._info[k] = v
381n/a lastk = k
382n/a elif lastk:
383n/a self._info[lastk] += '\n' + item
384n/a if k == 'content-type':
385n/a self._charset = v.split('charset=')[1]
386n/a elif k == 'plural-forms':
387n/a v = v.split(';')
388n/a plural = v[1].split('plural=')[1]
389n/a self.plural = c2py(plural)
390n/a # Note: we unconditionally convert both msgids and msgstrs to
391n/a # Unicode using the character encoding specified in the charset
392n/a # parameter of the Content-Type header. The gettext documentation
393n/a # strongly encourages msgids to be us-ascii, but some applications
394n/a # require alternative encodings (e.g. Zope's ZCML and ZPT). For
395n/a # traditional gettext applications, the msgid conversion will
396n/a # cause no problems since us-ascii should always be a subset of
397n/a # the charset encoding. We may want to fall back to 8-bit msgids
398n/a # if the Unicode conversion fails.
399n/a charset = self._charset or 'ascii'
400n/a if b'\x00' in msg:
401n/a # Plural forms
402n/a msgid1, msgid2 = msg.split(b'\x00')
403n/a tmsg = tmsg.split(b'\x00')
404n/a msgid1 = str(msgid1, charset)
405n/a for i, x in enumerate(tmsg):
406n/a catalog[(msgid1, i)] = str(x, charset)
407n/a else:
408n/a catalog[str(msg, charset)] = str(tmsg, charset)
409n/a # advance to next entry in the seek tables
410n/a masteridx += 8
411n/a transidx += 8
412n/a
413n/a def lgettext(self, message):
414n/a missing = object()
415n/a tmsg = self._catalog.get(message, missing)
416n/a if tmsg is missing:
417n/a if self._fallback:
418n/a return self._fallback.lgettext(message)
419n/a return message
420n/a if self._output_charset:
421n/a return tmsg.encode(self._output_charset)
422n/a return tmsg.encode(locale.getpreferredencoding())
423n/a
424n/a def lngettext(self, msgid1, msgid2, n):
425n/a try:
426n/a tmsg = self._catalog[(msgid1, self.plural(n))]
427n/a if self._output_charset:
428n/a return tmsg.encode(self._output_charset)
429n/a return tmsg.encode(locale.getpreferredencoding())
430n/a except KeyError:
431n/a if self._fallback:
432n/a return self._fallback.lngettext(msgid1, msgid2, n)
433n/a if n == 1:
434n/a return msgid1
435n/a else:
436n/a return msgid2
437n/a
438n/a def gettext(self, message):
439n/a missing = object()
440n/a tmsg = self._catalog.get(message, missing)
441n/a if tmsg is missing:
442n/a if self._fallback:
443n/a return self._fallback.gettext(message)
444n/a return message
445n/a return tmsg
446n/a
447n/a def ngettext(self, msgid1, msgid2, n):
448n/a try:
449n/a tmsg = self._catalog[(msgid1, self.plural(n))]
450n/a except KeyError:
451n/a if self._fallback:
452n/a return self._fallback.ngettext(msgid1, msgid2, n)
453n/a if n == 1:
454n/a tmsg = msgid1
455n/a else:
456n/a tmsg = msgid2
457n/a return tmsg
458n/a
459n/a
460n/a# Locate a .mo file using the gettext strategy
461n/adef find(domain, localedir=None, languages=None, all=False):
462n/a # Get some reasonable defaults for arguments that were not supplied
463n/a if localedir is None:
464n/a localedir = _default_localedir
465n/a if languages is None:
466n/a languages = []
467n/a for envar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
468n/a val = os.environ.get(envar)
469n/a if val:
470n/a languages = val.split(':')
471n/a break
472n/a if 'C' not in languages:
473n/a languages.append('C')
474n/a # now normalize and expand the languages
475n/a nelangs = []
476n/a for lang in languages:
477n/a for nelang in _expand_lang(lang):
478n/a if nelang not in nelangs:
479n/a nelangs.append(nelang)
480n/a # select a language
481n/a if all:
482n/a result = []
483n/a else:
484n/a result = None
485n/a for lang in nelangs:
486n/a if lang == 'C':
487n/a break
488n/a mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)
489n/a if os.path.exists(mofile):
490n/a if all:
491n/a result.append(mofile)
492n/a else:
493n/a return mofile
494n/a return result
495n/a
496n/a
497n/a
498n/a# a mapping between absolute .mo file path and Translation object
499n/a_translations = {}
500n/a
501n/adef translation(domain, localedir=None, languages=None,
502n/a class_=None, fallback=False, codeset=None):
503n/a if class_ is None:
504n/a class_ = GNUTranslations
505n/a mofiles = find(domain, localedir, languages, all=True)
506n/a if not mofiles:
507n/a if fallback:
508n/a return NullTranslations()
509n/a raise OSError(ENOENT, 'No translation file found for domain', domain)
510n/a # Avoid opening, reading, and parsing the .mo file after it's been done
511n/a # once.
512n/a result = None
513n/a for mofile in mofiles:
514n/a key = (class_, os.path.abspath(mofile))
515n/a t = _translations.get(key)
516n/a if t is None:
517n/a with open(mofile, 'rb') as fp:
518n/a t = _translations.setdefault(key, class_(fp))
519n/a # Copy the translation object to allow setting fallbacks and
520n/a # output charset. All other instance data is shared with the
521n/a # cached object.
522n/a t = copy.copy(t)
523n/a if codeset:
524n/a t.set_output_charset(codeset)
525n/a if result is None:
526n/a result = t
527n/a else:
528n/a result.add_fallback(t)
529n/a return result
530n/a
531n/a
532n/adef install(domain, localedir=None, codeset=None, names=None):
533n/a t = translation(domain, localedir, fallback=True, codeset=codeset)
534n/a t.install(names)
535n/a
536n/a
537n/a
538n/a# a mapping b/w domains and locale directories
539n/a_localedirs = {}
540n/a# a mapping b/w domains and codesets
541n/a_localecodesets = {}
542n/a# current global domain, `messages' used for compatibility w/ GNU gettext
543n/a_current_domain = 'messages'
544n/a
545n/a
546n/adef textdomain(domain=None):
547n/a global _current_domain
548n/a if domain is not None:
549n/a _current_domain = domain
550n/a return _current_domain
551n/a
552n/a
553n/adef bindtextdomain(domain, localedir=None):
554n/a global _localedirs
555n/a if localedir is not None:
556n/a _localedirs[domain] = localedir
557n/a return _localedirs.get(domain, _default_localedir)
558n/a
559n/a
560n/adef bind_textdomain_codeset(domain, codeset=None):
561n/a global _localecodesets
562n/a if codeset is not None:
563n/a _localecodesets[domain] = codeset
564n/a return _localecodesets.get(domain)
565n/a
566n/a
567n/adef dgettext(domain, message):
568n/a try:
569n/a t = translation(domain, _localedirs.get(domain, None),
570n/a codeset=_localecodesets.get(domain))
571n/a except OSError:
572n/a return message
573n/a return t.gettext(message)
574n/a
575n/adef ldgettext(domain, message):
576n/a try:
577n/a t = translation(domain, _localedirs.get(domain, None),
578n/a codeset=_localecodesets.get(domain))
579n/a except OSError:
580n/a return message
581n/a return t.lgettext(message)
582n/a
583n/adef dngettext(domain, msgid1, msgid2, n):
584n/a try:
585n/a t = translation(domain, _localedirs.get(domain, None),
586n/a codeset=_localecodesets.get(domain))
587n/a except OSError:
588n/a if n == 1:
589n/a return msgid1
590n/a else:
591n/a return msgid2
592n/a return t.ngettext(msgid1, msgid2, n)
593n/a
594n/adef ldngettext(domain, msgid1, msgid2, n):
595n/a try:
596n/a t = translation(domain, _localedirs.get(domain, None),
597n/a codeset=_localecodesets.get(domain))
598n/a except OSError:
599n/a if n == 1:
600n/a return msgid1
601n/a else:
602n/a return msgid2
603n/a return t.lngettext(msgid1, msgid2, n)
604n/a
605n/adef gettext(message):
606n/a return dgettext(_current_domain, message)
607n/a
608n/adef lgettext(message):
609n/a return ldgettext(_current_domain, message)
610n/a
611n/adef ngettext(msgid1, msgid2, n):
612n/a return dngettext(_current_domain, msgid1, msgid2, n)
613n/a
614n/adef lngettext(msgid1, msgid2, n):
615n/a return ldngettext(_current_domain, msgid1, msgid2, n)
616n/a
617n/a# dcgettext() has been deemed unnecessary and is not implemented.
618n/a
619n/a# James Henstridge's Catalog constructor from GNOME gettext. Documented usage
620n/a# was:
621n/a#
622n/a# import gettext
623n/a# cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR)
624n/a# _ = cat.gettext
625n/a# print _('Hello World')
626n/a
627n/a# The resulting catalog object currently don't support access through a
628n/a# dictionary API, which was supported (but apparently unused) in GNOME
629n/a# gettext.
630n/a
631n/aCatalog = translation