ยปCore Development>Code coverage>Lib/locale.py

Python code coverage for Lib/locale.py

#countcontent
1n/a"""Locale support module.
2n/a
3n/aThe module provides low-level access to the C lib's locale APIs and adds high
4n/alevel number formatting APIs as well as a locale aliasing engine to complement
5n/athese.
6n/a
7n/aThe aliasing engine includes support for many commonly used locale names and
8n/amaps them to values suitable for passing to the C lib's setlocale() function. It
9n/aalso includes default encodings for all supported locale names.
10n/a
11n/a"""
12n/a
13n/aimport sys
14n/aimport encodings
15n/aimport encodings.aliases
16n/aimport re
17n/aimport collections
18n/afrom builtins import str as _builtin_str
19n/aimport functools
20n/a
21n/a# Try importing the _locale module.
22n/a#
23n/a# If this fails, fall back on a basic 'C' locale emulation.
24n/a
25n/a# Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
26n/a# trying the import. So __all__ is also fiddled at the end of the file.
27n/a__all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error",
28n/a "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
29n/a "str", "atof", "atoi", "format", "format_string", "currency",
30n/a "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
31n/a "LC_NUMERIC", "LC_ALL", "CHAR_MAX"]
32n/a
33n/adef _strcoll(a,b):
34n/a """ strcoll(string,string) -> int.
35n/a Compares two strings according to the locale.
36n/a """
37n/a return (a > b) - (a < b)
38n/a
39n/adef _strxfrm(s):
40n/a """ strxfrm(string) -> string.
41n/a Returns a string that behaves for cmp locale-aware.
42n/a """
43n/a return s
44n/a
45n/atry:
46n/a
47n/a from _locale import *
48n/a
49n/aexcept ImportError:
50n/a
51n/a # Locale emulation
52n/a
53n/a CHAR_MAX = 127
54n/a LC_ALL = 6
55n/a LC_COLLATE = 3
56n/a LC_CTYPE = 0
57n/a LC_MESSAGES = 5
58n/a LC_MONETARY = 4
59n/a LC_NUMERIC = 1
60n/a LC_TIME = 2
61n/a Error = ValueError
62n/a
63n/a def localeconv():
64n/a """ localeconv() -> dict.
65n/a Returns numeric and monetary locale-specific parameters.
66n/a """
67n/a # 'C' locale default values
68n/a return {'grouping': [127],
69n/a 'currency_symbol': '',
70n/a 'n_sign_posn': 127,
71n/a 'p_cs_precedes': 127,
72n/a 'n_cs_precedes': 127,
73n/a 'mon_grouping': [],
74n/a 'n_sep_by_space': 127,
75n/a 'decimal_point': '.',
76n/a 'negative_sign': '',
77n/a 'positive_sign': '',
78n/a 'p_sep_by_space': 127,
79n/a 'int_curr_symbol': '',
80n/a 'p_sign_posn': 127,
81n/a 'thousands_sep': '',
82n/a 'mon_thousands_sep': '',
83n/a 'frac_digits': 127,
84n/a 'mon_decimal_point': '',
85n/a 'int_frac_digits': 127}
86n/a
87n/a def setlocale(category, value=None):
88n/a """ setlocale(integer,string=None) -> string.
89n/a Activates/queries locale processing.
90n/a """
91n/a if value not in (None, '', 'C'):
92n/a raise Error('_locale emulation only supports "C" locale')
93n/a return 'C'
94n/a
95n/a# These may or may not exist in _locale, so be sure to set them.
96n/aif 'strxfrm' not in globals():
97n/a strxfrm = _strxfrm
98n/aif 'strcoll' not in globals():
99n/a strcoll = _strcoll
100n/a
101n/a
102n/a_localeconv = localeconv
103n/a
104n/a# With this dict, you can override some items of localeconv's return value.
105n/a# This is useful for testing purposes.
106n/a_override_localeconv = {}
107n/a
108n/a@functools.wraps(_localeconv)
109n/adef localeconv():
110n/a d = _localeconv()
111n/a if _override_localeconv:
112n/a d.update(_override_localeconv)
113n/a return d
114n/a
115n/a
116n/a### Number formatting APIs
117n/a
118n/a# Author: Martin von Loewis
119n/a# improved by Georg Brandl
120n/a
121n/a# Iterate over grouping intervals
122n/adef _grouping_intervals(grouping):
123n/a last_interval = None
124n/a for interval in grouping:
125n/a # if grouping is -1, we are done
126n/a if interval == CHAR_MAX:
127n/a return
128n/a # 0: re-use last group ad infinitum
129n/a if interval == 0:
130n/a if last_interval is None:
131n/a raise ValueError("invalid grouping")
132n/a while True:
133n/a yield last_interval
134n/a yield interval
135n/a last_interval = interval
136n/a
137n/a#perform the grouping from right to left
138n/adef _group(s, monetary=False):
139n/a conv = localeconv()
140n/a thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
141n/a grouping = conv[monetary and 'mon_grouping' or 'grouping']
142n/a if not grouping:
143n/a return (s, 0)
144n/a if s[-1] == ' ':
145n/a stripped = s.rstrip()
146n/a right_spaces = s[len(stripped):]
147n/a s = stripped
148n/a else:
149n/a right_spaces = ''
150n/a left_spaces = ''
151n/a groups = []
152n/a for interval in _grouping_intervals(grouping):
153n/a if not s or s[-1] not in "0123456789":
154n/a # only non-digit characters remain (sign, spaces)
155n/a left_spaces = s
156n/a s = ''
157n/a break
158n/a groups.append(s[-interval:])
159n/a s = s[:-interval]
160n/a if s:
161n/a groups.append(s)
162n/a groups.reverse()
163n/a return (
164n/a left_spaces + thousands_sep.join(groups) + right_spaces,
165n/a len(thousands_sep) * (len(groups) - 1)
166n/a )
167n/a
168n/a# Strip a given amount of excess padding from the given string
169n/adef _strip_padding(s, amount):
170n/a lpos = 0
171n/a while amount and s[lpos] == ' ':
172n/a lpos += 1
173n/a amount -= 1
174n/a rpos = len(s) - 1
175n/a while amount and s[rpos] == ' ':
176n/a rpos -= 1
177n/a amount -= 1
178n/a return s[lpos:rpos+1]
179n/a
180n/a_percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
181n/a r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
182n/a
183n/adef format(percent, value, grouping=False, monetary=False, *additional):
184n/a """Returns the locale-aware substitution of a %? specifier
185n/a (percent).
186n/a
187n/a additional is for format strings which contain one or more
188n/a '*' modifiers."""
189n/a # this is only for one-percent-specifier strings and this should be checked
190n/a match = _percent_re.match(percent)
191n/a if not match or len(match.group())!= len(percent):
192n/a raise ValueError(("format() must be given exactly one %%char "
193n/a "format specifier, %s not valid") % repr(percent))
194n/a return _format(percent, value, grouping, monetary, *additional)
195n/a
196n/adef _format(percent, value, grouping=False, monetary=False, *additional):
197n/a if additional:
198n/a formatted = percent % ((value,) + additional)
199n/a else:
200n/a formatted = percent % value
201n/a # floats and decimal ints need special action!
202n/a if percent[-1] in 'eEfFgG':
203n/a seps = 0
204n/a parts = formatted.split('.')
205n/a if grouping:
206n/a parts[0], seps = _group(parts[0], monetary=monetary)
207n/a decimal_point = localeconv()[monetary and 'mon_decimal_point'
208n/a or 'decimal_point']
209n/a formatted = decimal_point.join(parts)
210n/a if seps:
211n/a formatted = _strip_padding(formatted, seps)
212n/a elif percent[-1] in 'diu':
213n/a seps = 0
214n/a if grouping:
215n/a formatted, seps = _group(formatted, monetary=monetary)
216n/a if seps:
217n/a formatted = _strip_padding(formatted, seps)
218n/a return formatted
219n/a
220n/adef format_string(f, val, grouping=False):
221n/a """Formats a string in the same way that the % formatting would use,
222n/a but takes the current locale into account.
223n/a Grouping is applied if the third parameter is true."""
224n/a percents = list(_percent_re.finditer(f))
225n/a new_f = _percent_re.sub('%s', f)
226n/a
227n/a if isinstance(val, collections.Mapping):
228n/a new_val = []
229n/a for perc in percents:
230n/a if perc.group()[-1]=='%':
231n/a new_val.append('%')
232n/a else:
233n/a new_val.append(format(perc.group(), val, grouping))
234n/a else:
235n/a if not isinstance(val, tuple):
236n/a val = (val,)
237n/a new_val = []
238n/a i = 0
239n/a for perc in percents:
240n/a if perc.group()[-1]=='%':
241n/a new_val.append('%')
242n/a else:
243n/a starcount = perc.group('modifiers').count('*')
244n/a new_val.append(_format(perc.group(),
245n/a val[i],
246n/a grouping,
247n/a False,
248n/a *val[i+1:i+1+starcount]))
249n/a i += (1 + starcount)
250n/a val = tuple(new_val)
251n/a
252n/a return new_f % val
253n/a
254n/adef currency(val, symbol=True, grouping=False, international=False):
255n/a """Formats val according to the currency settings
256n/a in the current locale."""
257n/a conv = localeconv()
258n/a
259n/a # check for illegal values
260n/a digits = conv[international and 'int_frac_digits' or 'frac_digits']
261n/a if digits == 127:
262n/a raise ValueError("Currency formatting is not possible using "
263n/a "the 'C' locale.")
264n/a
265n/a s = format('%%.%if' % digits, abs(val), grouping, monetary=True)
266n/a # '<' and '>' are markers if the sign must be inserted between symbol and value
267n/a s = '<' + s + '>'
268n/a
269n/a if symbol:
270n/a smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
271n/a precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
272n/a separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
273n/a
274n/a if precedes:
275n/a s = smb + (separated and ' ' or '') + s
276n/a else:
277n/a s = s + (separated and ' ' or '') + smb
278n/a
279n/a sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
280n/a sign = conv[val<0 and 'negative_sign' or 'positive_sign']
281n/a
282n/a if sign_pos == 0:
283n/a s = '(' + s + ')'
284n/a elif sign_pos == 1:
285n/a s = sign + s
286n/a elif sign_pos == 2:
287n/a s = s + sign
288n/a elif sign_pos == 3:
289n/a s = s.replace('<', sign)
290n/a elif sign_pos == 4:
291n/a s = s.replace('>', sign)
292n/a else:
293n/a # the default if nothing specified;
294n/a # this should be the most fitting sign position
295n/a s = sign + s
296n/a
297n/a return s.replace('<', '').replace('>', '')
298n/a
299n/adef str(val):
300n/a """Convert float to string, taking the locale into account."""
301n/a return format("%.12g", val)
302n/a
303n/adef delocalize(string):
304n/a "Parses a string as a normalized number according to the locale settings."
305n/a
306n/a conv = localeconv()
307n/a
308n/a #First, get rid of the grouping
309n/a ts = conv['thousands_sep']
310n/a if ts:
311n/a string = string.replace(ts, '')
312n/a
313n/a #next, replace the decimal point with a dot
314n/a dd = conv['decimal_point']
315n/a if dd:
316n/a string = string.replace(dd, '.')
317n/a return string
318n/a
319n/adef atof(string, func=float):
320n/a "Parses a string as a float according to the locale settings."
321n/a return func(delocalize(string))
322n/a
323n/adef atoi(string):
324n/a "Converts a string to an integer according to the locale settings."
325n/a return int(delocalize(string))
326n/a
327n/adef _test():
328n/a setlocale(LC_ALL, "")
329n/a #do grouping
330n/a s1 = format("%d", 123456789,1)
331n/a print(s1, "is", atoi(s1))
332n/a #standard formatting
333n/a s1 = str(3.14)
334n/a print(s1, "is", atof(s1))
335n/a
336n/a### Locale name aliasing engine
337n/a
338n/a# Author: Marc-Andre Lemburg, mal@lemburg.com
339n/a# Various tweaks by Fredrik Lundh <fredrik@pythonware.com>
340n/a
341n/a# store away the low-level version of setlocale (it's
342n/a# overridden below)
343n/a_setlocale = setlocale
344n/a
345n/adef _replace_encoding(code, encoding):
346n/a if '.' in code:
347n/a langname = code[:code.index('.')]
348n/a else:
349n/a langname = code
350n/a # Convert the encoding to a C lib compatible encoding string
351n/a norm_encoding = encodings.normalize_encoding(encoding)
352n/a #print('norm encoding: %r' % norm_encoding)
353n/a norm_encoding = encodings.aliases.aliases.get(norm_encoding.lower(),
354n/a norm_encoding)
355n/a #print('aliased encoding: %r' % norm_encoding)
356n/a encoding = norm_encoding
357n/a norm_encoding = norm_encoding.lower()
358n/a if norm_encoding in locale_encoding_alias:
359n/a encoding = locale_encoding_alias[norm_encoding]
360n/a else:
361n/a norm_encoding = norm_encoding.replace('_', '')
362n/a norm_encoding = norm_encoding.replace('-', '')
363n/a if norm_encoding in locale_encoding_alias:
364n/a encoding = locale_encoding_alias[norm_encoding]
365n/a #print('found encoding %r' % encoding)
366n/a return langname + '.' + encoding
367n/a
368n/adef _append_modifier(code, modifier):
369n/a if modifier == 'euro':
370n/a if '.' not in code:
371n/a return code + '.ISO8859-15'
372n/a _, _, encoding = code.partition('.')
373n/a if encoding in ('ISO8859-15', 'UTF-8'):
374n/a return code
375n/a if encoding == 'ISO8859-1':
376n/a return _replace_encoding(code, 'ISO8859-15')
377n/a return code + '@' + modifier
378n/a
379n/adef normalize(localename):
380n/a
381n/a """ Returns a normalized locale code for the given locale
382n/a name.
383n/a
384n/a The returned locale code is formatted for use with
385n/a setlocale().
386n/a
387n/a If normalization fails, the original name is returned
388n/a unchanged.
389n/a
390n/a If the given encoding is not known, the function defaults to
391n/a the default encoding for the locale code just like setlocale()
392n/a does.
393n/a
394n/a """
395n/a # Normalize the locale name and extract the encoding and modifier
396n/a code = localename.lower()
397n/a if ':' in code:
398n/a # ':' is sometimes used as encoding delimiter.
399n/a code = code.replace(':', '.')
400n/a if '@' in code:
401n/a code, modifier = code.split('@', 1)
402n/a else:
403n/a modifier = ''
404n/a if '.' in code:
405n/a langname, encoding = code.split('.')[:2]
406n/a else:
407n/a langname = code
408n/a encoding = ''
409n/a
410n/a # First lookup: fullname (possibly with encoding and modifier)
411n/a lang_enc = langname
412n/a if encoding:
413n/a norm_encoding = encoding.replace('-', '')
414n/a norm_encoding = norm_encoding.replace('_', '')
415n/a lang_enc += '.' + norm_encoding
416n/a lookup_name = lang_enc
417n/a if modifier:
418n/a lookup_name += '@' + modifier
419n/a code = locale_alias.get(lookup_name, None)
420n/a if code is not None:
421n/a return code
422n/a #print('first lookup failed')
423n/a
424n/a if modifier:
425n/a # Second try: fullname without modifier (possibly with encoding)
426n/a code = locale_alias.get(lang_enc, None)
427n/a if code is not None:
428n/a #print('lookup without modifier succeeded')
429n/a if '@' not in code:
430n/a return _append_modifier(code, modifier)
431n/a if code.split('@', 1)[1].lower() == modifier:
432n/a return code
433n/a #print('second lookup failed')
434n/a
435n/a if encoding:
436n/a # Third try: langname (without encoding, possibly with modifier)
437n/a lookup_name = langname
438n/a if modifier:
439n/a lookup_name += '@' + modifier
440n/a code = locale_alias.get(lookup_name, None)
441n/a if code is not None:
442n/a #print('lookup without encoding succeeded')
443n/a if '@' not in code:
444n/a return _replace_encoding(code, encoding)
445n/a code, modifier = code.split('@', 1)
446n/a return _replace_encoding(code, encoding) + '@' + modifier
447n/a
448n/a if modifier:
449n/a # Fourth try: langname (without encoding and modifier)
450n/a code = locale_alias.get(langname, None)
451n/a if code is not None:
452n/a #print('lookup without modifier and encoding succeeded')
453n/a if '@' not in code:
454n/a code = _replace_encoding(code, encoding)
455n/a return _append_modifier(code, modifier)
456n/a code, defmod = code.split('@', 1)
457n/a if defmod.lower() == modifier:
458n/a return _replace_encoding(code, encoding) + '@' + defmod
459n/a
460n/a return localename
461n/a
462n/adef _parse_localename(localename):
463n/a
464n/a """ Parses the locale code for localename and returns the
465n/a result as tuple (language code, encoding).
466n/a
467n/a The localename is normalized and passed through the locale
468n/a alias engine. A ValueError is raised in case the locale name
469n/a cannot be parsed.
470n/a
471n/a The language code corresponds to RFC 1766. code and encoding
472n/a can be None in case the values cannot be determined or are
473n/a unknown to this implementation.
474n/a
475n/a """
476n/a code = normalize(localename)
477n/a if '@' in code:
478n/a # Deal with locale modifiers
479n/a code, modifier = code.split('@', 1)
480n/a if modifier == 'euro' and '.' not in code:
481n/a # Assume Latin-9 for @euro locales. This is bogus,
482n/a # since some systems may use other encodings for these
483n/a # locales. Also, we ignore other modifiers.
484n/a return code, 'iso-8859-15'
485n/a
486n/a if '.' in code:
487n/a return tuple(code.split('.')[:2])
488n/a elif code == 'C':
489n/a return None, None
490n/a raise ValueError('unknown locale: %s' % localename)
491n/a
492n/adef _build_localename(localetuple):
493n/a
494n/a """ Builds a locale code from the given tuple (language code,
495n/a encoding).
496n/a
497n/a No aliasing or normalizing takes place.
498n/a
499n/a """
500n/a try:
501n/a language, encoding = localetuple
502n/a
503n/a if language is None:
504n/a language = 'C'
505n/a if encoding is None:
506n/a return language
507n/a else:
508n/a return language + '.' + encoding
509n/a except (TypeError, ValueError):
510n/a raise TypeError('Locale must be None, a string, or an iterable of two strings -- language code, encoding.')
511n/a
512n/adef getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
513n/a
514n/a """ Tries to determine the default locale settings and returns
515n/a them as tuple (language code, encoding).
516n/a
517n/a According to POSIX, a program which has not called
518n/a setlocale(LC_ALL, "") runs using the portable 'C' locale.
519n/a Calling setlocale(LC_ALL, "") lets it use the default locale as
520n/a defined by the LANG variable. Since we don't want to interfere
521n/a with the current locale setting we thus emulate the behavior
522n/a in the way described above.
523n/a
524n/a To maintain compatibility with other platforms, not only the
525n/a LANG variable is tested, but a list of variables given as
526n/a envvars parameter. The first found to be defined will be
527n/a used. envvars defaults to the search path used in GNU gettext;
528n/a it must always contain the variable name 'LANG'.
529n/a
530n/a Except for the code 'C', the language code corresponds to RFC
531n/a 1766. code and encoding can be None in case the values cannot
532n/a be determined.
533n/a
534n/a """
535n/a
536n/a try:
537n/a # check if it's supported by the _locale module
538n/a import _locale
539n/a code, encoding = _locale._getdefaultlocale()
540n/a except (ImportError, AttributeError):
541n/a pass
542n/a else:
543n/a # make sure the code/encoding values are valid
544n/a if sys.platform == "win32" and code and code[:2] == "0x":
545n/a # map windows language identifier to language name
546n/a code = windows_locale.get(int(code, 0))
547n/a # ...add other platform-specific processing here, if
548n/a # necessary...
549n/a return code, encoding
550n/a
551n/a # fall back on POSIX behaviour
552n/a import os
553n/a lookup = os.environ.get
554n/a for variable in envvars:
555n/a localename = lookup(variable,None)
556n/a if localename:
557n/a if variable == 'LANGUAGE':
558n/a localename = localename.split(':')[0]
559n/a break
560n/a else:
561n/a localename = 'C'
562n/a return _parse_localename(localename)
563n/a
564n/a
565n/adef getlocale(category=LC_CTYPE):
566n/a
567n/a """ Returns the current setting for the given locale category as
568n/a tuple (language code, encoding).
569n/a
570n/a category may be one of the LC_* value except LC_ALL. It
571n/a defaults to LC_CTYPE.
572n/a
573n/a Except for the code 'C', the language code corresponds to RFC
574n/a 1766. code and encoding can be None in case the values cannot
575n/a be determined.
576n/a
577n/a """
578n/a localename = _setlocale(category)
579n/a if category == LC_ALL and ';' in localename:
580n/a raise TypeError('category LC_ALL is not supported')
581n/a return _parse_localename(localename)
582n/a
583n/adef setlocale(category, locale=None):
584n/a
585n/a """ Set the locale for the given category. The locale can be
586n/a a string, an iterable of two strings (language code and encoding),
587n/a or None.
588n/a
589n/a Iterables are converted to strings using the locale aliasing
590n/a engine. Locale strings are passed directly to the C lib.
591n/a
592n/a category may be given as one of the LC_* values.
593n/a
594n/a """
595n/a if locale and not isinstance(locale, _builtin_str):
596n/a # convert to string
597n/a locale = normalize(_build_localename(locale))
598n/a return _setlocale(category, locale)
599n/a
600n/adef resetlocale(category=LC_ALL):
601n/a
602n/a """ Sets the locale for category to the default setting.
603n/a
604n/a The default setting is determined by calling
605n/a getdefaultlocale(). category defaults to LC_ALL.
606n/a
607n/a """
608n/a _setlocale(category, _build_localename(getdefaultlocale()))
609n/a
610n/aif sys.platform.startswith("win"):
611n/a # On Win32, this will return the ANSI code page
612n/a def getpreferredencoding(do_setlocale = True):
613n/a """Return the charset that the user is likely using."""
614n/a import _bootlocale
615n/a return _bootlocale.getpreferredencoding(False)
616n/aelse:
617n/a # On Unix, if CODESET is available, use that.
618n/a try:
619n/a CODESET
620n/a except NameError:
621n/a if hasattr(sys, 'getandroidapilevel'):
622n/a # On Android langinfo.h and CODESET are missing, and UTF-8 is
623n/a # always used in mbstowcs() and wcstombs().
624n/a def getpreferredencoding(do_setlocale = True):
625n/a return 'UTF-8'
626n/a else:
627n/a # Fall back to parsing environment variables :-(
628n/a def getpreferredencoding(do_setlocale = True):
629n/a """Return the charset that the user is likely using,
630n/a by looking at environment variables."""
631n/a res = getdefaultlocale()[1]
632n/a if res is None:
633n/a # LANG not set, default conservatively to ASCII
634n/a res = 'ascii'
635n/a return res
636n/a else:
637n/a def getpreferredencoding(do_setlocale = True):
638n/a """Return the charset that the user is likely using,
639n/a according to the system configuration."""
640n/a import _bootlocale
641n/a if do_setlocale:
642n/a oldloc = setlocale(LC_CTYPE)
643n/a try:
644n/a setlocale(LC_CTYPE, "")
645n/a except Error:
646n/a pass
647n/a result = _bootlocale.getpreferredencoding(False)
648n/a if do_setlocale:
649n/a setlocale(LC_CTYPE, oldloc)
650n/a return result
651n/a
652n/a
653n/a### Database
654n/a#
655n/a# The following data was extracted from the locale.alias file which
656n/a# comes with X11 and then hand edited removing the explicit encoding
657n/a# definitions and adding some more aliases. The file is usually
658n/a# available as /usr/lib/X11/locale/locale.alias.
659n/a#
660n/a
661n/a#
662n/a# The local_encoding_alias table maps lowercase encoding alias names
663n/a# to C locale encoding names (case-sensitive). Note that normalize()
664n/a# first looks up the encoding in the encodings.aliases dictionary and
665n/a# then applies this mapping to find the correct C lib name for the
666n/a# encoding.
667n/a#
668n/alocale_encoding_alias = {
669n/a
670n/a # Mappings for non-standard encoding names used in locale names
671n/a '437': 'C',
672n/a 'c': 'C',
673n/a 'en': 'ISO8859-1',
674n/a 'jis': 'JIS7',
675n/a 'jis7': 'JIS7',
676n/a 'ajec': 'eucJP',
677n/a 'koi8c': 'KOI8-C',
678n/a 'microsoftcp1251': 'CP1251',
679n/a 'microsoftcp1255': 'CP1255',
680n/a 'microsoftcp1256': 'CP1256',
681n/a '88591': 'ISO8859-1',
682n/a '88592': 'ISO8859-2',
683n/a '88595': 'ISO8859-5',
684n/a '885915': 'ISO8859-15',
685n/a
686n/a # Mappings from Python codec names to C lib encoding names
687n/a 'ascii': 'ISO8859-1',
688n/a 'latin_1': 'ISO8859-1',
689n/a 'iso8859_1': 'ISO8859-1',
690n/a 'iso8859_10': 'ISO8859-10',
691n/a 'iso8859_11': 'ISO8859-11',
692n/a 'iso8859_13': 'ISO8859-13',
693n/a 'iso8859_14': 'ISO8859-14',
694n/a 'iso8859_15': 'ISO8859-15',
695n/a 'iso8859_16': 'ISO8859-16',
696n/a 'iso8859_2': 'ISO8859-2',
697n/a 'iso8859_3': 'ISO8859-3',
698n/a 'iso8859_4': 'ISO8859-4',
699n/a 'iso8859_5': 'ISO8859-5',
700n/a 'iso8859_6': 'ISO8859-6',
701n/a 'iso8859_7': 'ISO8859-7',
702n/a 'iso8859_8': 'ISO8859-8',
703n/a 'iso8859_9': 'ISO8859-9',
704n/a 'iso2022_jp': 'JIS7',
705n/a 'shift_jis': 'SJIS',
706n/a 'tactis': 'TACTIS',
707n/a 'euc_jp': 'eucJP',
708n/a 'euc_kr': 'eucKR',
709n/a 'utf_8': 'UTF-8',
710n/a 'koi8_r': 'KOI8-R',
711n/a 'koi8_t': 'KOI8-T',
712n/a 'koi8_u': 'KOI8-U',
713n/a 'kz1048': 'RK1048',
714n/a 'cp1251': 'CP1251',
715n/a 'cp1255': 'CP1255',
716n/a 'cp1256': 'CP1256',
717n/a
718n/a # XXX This list is still incomplete. If you know more
719n/a # mappings, please file a bug report. Thanks.
720n/a}
721n/a
722n/afor k, v in sorted(locale_encoding_alias.items()):
723n/a k = k.replace('_', '')
724n/a locale_encoding_alias.setdefault(k, v)
725n/a
726n/a#
727n/a# The locale_alias table maps lowercase alias names to C locale names
728n/a# (case-sensitive). Encodings are always separated from the locale
729n/a# name using a dot ('.'); they should only be given in case the
730n/a# language name is needed to interpret the given encoding alias
731n/a# correctly (CJK codes often have this need).
732n/a#
733n/a# Note that the normalize() function which uses this tables
734n/a# removes '_' and '-' characters from the encoding part of the
735n/a# locale name before doing the lookup. This saves a lot of
736n/a# space in the table.
737n/a#
738n/a# MAL 2004-12-10:
739n/a# Updated alias mapping to most recent locale.alias file
740n/a# from X.org distribution using makelocalealias.py.
741n/a#
742n/a# These are the differences compared to the old mapping (Python 2.4
743n/a# and older):
744n/a#
745n/a# updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
746n/a# updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
747n/a# updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
748n/a# updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
749n/a# updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
750n/a# updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
751n/a# updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1'
752n/a# updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
753n/a# updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
754n/a# updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
755n/a# updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
756n/a# updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
757n/a# updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
758n/a# updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP'
759n/a# updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13'
760n/a# updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13'
761n/a# updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
762n/a# updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
763n/a# updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11'
764n/a# updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312'
765n/a# updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5'
766n/a# updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5'
767n/a#
768n/a# MAL 2008-05-30:
769n/a# Updated alias mapping to most recent locale.alias file
770n/a# from X.org distribution using makelocalealias.py.
771n/a#
772n/a# These are the differences compared to the old mapping (Python 2.5
773n/a# and older):
774n/a#
775n/a# updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2'
776n/a# updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
777n/a# updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
778n/a# updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2'
779n/a# updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
780n/a# updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
781n/a# updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
782n/a# updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
783n/a# updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
784n/a# updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
785n/a# updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2'
786n/a# updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
787n/a# updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
788n/a# updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
789n/a# updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
790n/a# updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
791n/a# updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
792n/a# updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8'
793n/a# updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
794n/a#
795n/a# AP 2010-04-12:
796n/a# Updated alias mapping to most recent locale.alias file
797n/a# from X.org distribution using makelocalealias.py.
798n/a#
799n/a# These are the differences compared to the old mapping (Python 2.6.5
800n/a# and older):
801n/a#
802n/a# updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
803n/a# updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
804n/a# updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
805n/a# updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
806n/a# updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
807n/a# updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
808n/a# updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
809n/a# updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
810n/a# updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin'
811n/a# updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
812n/a# updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin'
813n/a# updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8'
814n/a# updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
815n/a#
816n/a# SS 2013-12-20:
817n/a# Updated alias mapping to most recent locale.alias file
818n/a# from X.org distribution using makelocalealias.py.
819n/a#
820n/a# These are the differences compared to the old mapping (Python 3.3.3
821n/a# and older):
822n/a#
823n/a# updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
824n/a# updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
825n/a# updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
826n/a# updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
827n/a# updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
828n/a# updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
829n/a# updated 'sd' -> 'sd_IN@devanagari.UTF-8' to 'sd_IN.UTF-8'
830n/a# updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
831n/a# updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8'
832n/a# updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
833n/a# updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
834n/a#
835n/a# SS 2014-10-01:
836n/a# Updated alias mapping with glibc 2.19 supported locales.
837n/a
838n/alocale_alias = {
839n/a 'a3': 'az_AZ.KOI8-C',
840n/a 'a3_az': 'az_AZ.KOI8-C',
841n/a 'a3_az.koic': 'az_AZ.KOI8-C',
842n/a 'aa_dj': 'aa_DJ.ISO8859-1',
843n/a 'aa_er': 'aa_ER.UTF-8',
844n/a 'aa_et': 'aa_ET.UTF-8',
845n/a 'af': 'af_ZA.ISO8859-1',
846n/a 'af_za': 'af_ZA.ISO8859-1',
847n/a 'am': 'am_ET.UTF-8',
848n/a 'am_et': 'am_ET.UTF-8',
849n/a 'american': 'en_US.ISO8859-1',
850n/a 'an_es': 'an_ES.ISO8859-15',
851n/a 'ar': 'ar_AA.ISO8859-6',
852n/a 'ar_aa': 'ar_AA.ISO8859-6',
853n/a 'ar_ae': 'ar_AE.ISO8859-6',
854n/a 'ar_bh': 'ar_BH.ISO8859-6',
855n/a 'ar_dz': 'ar_DZ.ISO8859-6',
856n/a 'ar_eg': 'ar_EG.ISO8859-6',
857n/a 'ar_in': 'ar_IN.UTF-8',
858n/a 'ar_iq': 'ar_IQ.ISO8859-6',
859n/a 'ar_jo': 'ar_JO.ISO8859-6',
860n/a 'ar_kw': 'ar_KW.ISO8859-6',
861n/a 'ar_lb': 'ar_LB.ISO8859-6',
862n/a 'ar_ly': 'ar_LY.ISO8859-6',
863n/a 'ar_ma': 'ar_MA.ISO8859-6',
864n/a 'ar_om': 'ar_OM.ISO8859-6',
865n/a 'ar_qa': 'ar_QA.ISO8859-6',
866n/a 'ar_sa': 'ar_SA.ISO8859-6',
867n/a 'ar_sd': 'ar_SD.ISO8859-6',
868n/a 'ar_sy': 'ar_SY.ISO8859-6',
869n/a 'ar_tn': 'ar_TN.ISO8859-6',
870n/a 'ar_ye': 'ar_YE.ISO8859-6',
871n/a 'arabic': 'ar_AA.ISO8859-6',
872n/a 'as': 'as_IN.UTF-8',
873n/a 'as_in': 'as_IN.UTF-8',
874n/a 'ast_es': 'ast_ES.ISO8859-15',
875n/a 'ayc_pe': 'ayc_PE.UTF-8',
876n/a 'az': 'az_AZ.ISO8859-9E',
877n/a 'az_az': 'az_AZ.ISO8859-9E',
878n/a 'az_az.iso88599e': 'az_AZ.ISO8859-9E',
879n/a 'be': 'be_BY.CP1251',
880n/a 'be@latin': 'be_BY.UTF-8@latin',
881n/a 'be_bg.utf8': 'bg_BG.UTF-8',
882n/a 'be_by': 'be_BY.CP1251',
883n/a 'be_by@latin': 'be_BY.UTF-8@latin',
884n/a 'bem_zm': 'bem_ZM.UTF-8',
885n/a 'ber_dz': 'ber_DZ.UTF-8',
886n/a 'ber_ma': 'ber_MA.UTF-8',
887n/a 'bg': 'bg_BG.CP1251',
888n/a 'bg_bg': 'bg_BG.CP1251',
889n/a 'bho_in': 'bho_IN.UTF-8',
890n/a 'bn_bd': 'bn_BD.UTF-8',
891n/a 'bn_in': 'bn_IN.UTF-8',
892n/a 'bo_cn': 'bo_CN.UTF-8',
893n/a 'bo_in': 'bo_IN.UTF-8',
894n/a 'bokmal': 'nb_NO.ISO8859-1',
895n/a 'bokm\xe5l': 'nb_NO.ISO8859-1',
896n/a 'br': 'br_FR.ISO8859-1',
897n/a 'br_fr': 'br_FR.ISO8859-1',
898n/a 'brx_in': 'brx_IN.UTF-8',
899n/a 'bs': 'bs_BA.ISO8859-2',
900n/a 'bs_ba': 'bs_BA.ISO8859-2',
901n/a 'bulgarian': 'bg_BG.CP1251',
902n/a 'byn_er': 'byn_ER.UTF-8',
903n/a 'c': 'C',
904n/a 'c-french': 'fr_CA.ISO8859-1',
905n/a 'c.ascii': 'C',
906n/a 'c.en': 'C',
907n/a 'c.iso88591': 'en_US.ISO8859-1',
908n/a 'c.utf8': 'en_US.UTF-8',
909n/a 'c_c': 'C',
910n/a 'c_c.c': 'C',
911n/a 'ca': 'ca_ES.ISO8859-1',
912n/a 'ca_ad': 'ca_AD.ISO8859-1',
913n/a 'ca_es': 'ca_ES.ISO8859-1',
914n/a 'ca_es@valencia': 'ca_ES.ISO8859-15@valencia',
915n/a 'ca_fr': 'ca_FR.ISO8859-1',
916n/a 'ca_it': 'ca_IT.ISO8859-1',
917n/a 'catalan': 'ca_ES.ISO8859-1',
918n/a 'cextend': 'en_US.ISO8859-1',
919n/a 'chinese-s': 'zh_CN.eucCN',
920n/a 'chinese-t': 'zh_TW.eucTW',
921n/a 'crh_ua': 'crh_UA.UTF-8',
922n/a 'croatian': 'hr_HR.ISO8859-2',
923n/a 'cs': 'cs_CZ.ISO8859-2',
924n/a 'cs_cs': 'cs_CZ.ISO8859-2',
925n/a 'cs_cz': 'cs_CZ.ISO8859-2',
926n/a 'csb_pl': 'csb_PL.UTF-8',
927n/a 'cv_ru': 'cv_RU.UTF-8',
928n/a 'cy': 'cy_GB.ISO8859-1',
929n/a 'cy_gb': 'cy_GB.ISO8859-1',
930n/a 'cz': 'cs_CZ.ISO8859-2',
931n/a 'cz_cz': 'cs_CZ.ISO8859-2',
932n/a 'czech': 'cs_CZ.ISO8859-2',
933n/a 'da': 'da_DK.ISO8859-1',
934n/a 'da_dk': 'da_DK.ISO8859-1',
935n/a 'danish': 'da_DK.ISO8859-1',
936n/a 'dansk': 'da_DK.ISO8859-1',
937n/a 'de': 'de_DE.ISO8859-1',
938n/a 'de_at': 'de_AT.ISO8859-1',
939n/a 'de_be': 'de_BE.ISO8859-1',
940n/a 'de_ch': 'de_CH.ISO8859-1',
941n/a 'de_de': 'de_DE.ISO8859-1',
942n/a 'de_li.utf8': 'de_LI.UTF-8',
943n/a 'de_lu': 'de_LU.ISO8859-1',
944n/a 'deutsch': 'de_DE.ISO8859-1',
945n/a 'doi_in': 'doi_IN.UTF-8',
946n/a 'dutch': 'nl_NL.ISO8859-1',
947n/a 'dutch.iso88591': 'nl_BE.ISO8859-1',
948n/a 'dv_mv': 'dv_MV.UTF-8',
949n/a 'dz_bt': 'dz_BT.UTF-8',
950n/a 'ee': 'ee_EE.ISO8859-4',
951n/a 'ee_ee': 'ee_EE.ISO8859-4',
952n/a 'eesti': 'et_EE.ISO8859-1',
953n/a 'el': 'el_GR.ISO8859-7',
954n/a 'el_cy': 'el_CY.ISO8859-7',
955n/a 'el_gr': 'el_GR.ISO8859-7',
956n/a 'el_gr@euro': 'el_GR.ISO8859-15',
957n/a 'en': 'en_US.ISO8859-1',
958n/a 'en_ag': 'en_AG.UTF-8',
959n/a 'en_au': 'en_AU.ISO8859-1',
960n/a 'en_be': 'en_BE.ISO8859-1',
961n/a 'en_bw': 'en_BW.ISO8859-1',
962n/a 'en_ca': 'en_CA.ISO8859-1',
963n/a 'en_dk': 'en_DK.ISO8859-1',
964n/a 'en_dl.utf8': 'en_DL.UTF-8',
965n/a 'en_gb': 'en_GB.ISO8859-1',
966n/a 'en_hk': 'en_HK.ISO8859-1',
967n/a 'en_ie': 'en_IE.ISO8859-1',
968n/a 'en_in': 'en_IN.ISO8859-1',
969n/a 'en_ng': 'en_NG.UTF-8',
970n/a 'en_nz': 'en_NZ.ISO8859-1',
971n/a 'en_ph': 'en_PH.ISO8859-1',
972n/a 'en_sg': 'en_SG.ISO8859-1',
973n/a 'en_uk': 'en_GB.ISO8859-1',
974n/a 'en_us': 'en_US.ISO8859-1',
975n/a 'en_us@euro@euro': 'en_US.ISO8859-15',
976n/a 'en_za': 'en_ZA.ISO8859-1',
977n/a 'en_zm': 'en_ZM.UTF-8',
978n/a 'en_zw': 'en_ZW.ISO8859-1',
979n/a 'en_zw.utf8': 'en_ZS.UTF-8',
980n/a 'eng_gb': 'en_GB.ISO8859-1',
981n/a 'english': 'en_EN.ISO8859-1',
982n/a 'english_uk': 'en_GB.ISO8859-1',
983n/a 'english_united-states': 'en_US.ISO8859-1',
984n/a 'english_united-states.437': 'C',
985n/a 'english_us': 'en_US.ISO8859-1',
986n/a 'eo': 'eo_XX.ISO8859-3',
987n/a 'eo.utf8': 'eo.UTF-8',
988n/a 'eo_eo': 'eo_EO.ISO8859-3',
989n/a 'eo_us.utf8': 'eo_US.UTF-8',
990n/a 'eo_xx': 'eo_XX.ISO8859-3',
991n/a 'es': 'es_ES.ISO8859-1',
992n/a 'es_ar': 'es_AR.ISO8859-1',
993n/a 'es_bo': 'es_BO.ISO8859-1',
994n/a 'es_cl': 'es_CL.ISO8859-1',
995n/a 'es_co': 'es_CO.ISO8859-1',
996n/a 'es_cr': 'es_CR.ISO8859-1',
997n/a 'es_cu': 'es_CU.UTF-8',
998n/a 'es_do': 'es_DO.ISO8859-1',
999n/a 'es_ec': 'es_EC.ISO8859-1',
1000n/a 'es_es': 'es_ES.ISO8859-1',
1001n/a 'es_gt': 'es_GT.ISO8859-1',
1002n/a 'es_hn': 'es_HN.ISO8859-1',
1003n/a 'es_mx': 'es_MX.ISO8859-1',
1004n/a 'es_ni': 'es_NI.ISO8859-1',
1005n/a 'es_pa': 'es_PA.ISO8859-1',
1006n/a 'es_pe': 'es_PE.ISO8859-1',
1007n/a 'es_pr': 'es_PR.ISO8859-1',
1008n/a 'es_py': 'es_PY.ISO8859-1',
1009n/a 'es_sv': 'es_SV.ISO8859-1',
1010n/a 'es_us': 'es_US.ISO8859-1',
1011n/a 'es_uy': 'es_UY.ISO8859-1',
1012n/a 'es_ve': 'es_VE.ISO8859-1',
1013n/a 'estonian': 'et_EE.ISO8859-1',
1014n/a 'et': 'et_EE.ISO8859-15',
1015n/a 'et_ee': 'et_EE.ISO8859-15',
1016n/a 'eu': 'eu_ES.ISO8859-1',
1017n/a 'eu_es': 'eu_ES.ISO8859-1',
1018n/a 'eu_fr': 'eu_FR.ISO8859-1',
1019n/a 'fa': 'fa_IR.UTF-8',
1020n/a 'fa_ir': 'fa_IR.UTF-8',
1021n/a 'fa_ir.isiri3342': 'fa_IR.ISIRI-3342',
1022n/a 'ff_sn': 'ff_SN.UTF-8',
1023n/a 'fi': 'fi_FI.ISO8859-15',
1024n/a 'fi_fi': 'fi_FI.ISO8859-15',
1025n/a 'fil_ph': 'fil_PH.UTF-8',
1026n/a 'finnish': 'fi_FI.ISO8859-1',
1027n/a 'fo': 'fo_FO.ISO8859-1',
1028n/a 'fo_fo': 'fo_FO.ISO8859-1',
1029n/a 'fr': 'fr_FR.ISO8859-1',
1030n/a 'fr_be': 'fr_BE.ISO8859-1',
1031n/a 'fr_ca': 'fr_CA.ISO8859-1',
1032n/a 'fr_ch': 'fr_CH.ISO8859-1',
1033n/a 'fr_fr': 'fr_FR.ISO8859-1',
1034n/a 'fr_lu': 'fr_LU.ISO8859-1',
1035n/a 'fran\xe7ais': 'fr_FR.ISO8859-1',
1036n/a 'fre_fr': 'fr_FR.ISO8859-1',
1037n/a 'french': 'fr_FR.ISO8859-1',
1038n/a 'french.iso88591': 'fr_CH.ISO8859-1',
1039n/a 'french_france': 'fr_FR.ISO8859-1',
1040n/a 'fur_it': 'fur_IT.UTF-8',
1041n/a 'fy_de': 'fy_DE.UTF-8',
1042n/a 'fy_nl': 'fy_NL.UTF-8',
1043n/a 'ga': 'ga_IE.ISO8859-1',
1044n/a 'ga_ie': 'ga_IE.ISO8859-1',
1045n/a 'galego': 'gl_ES.ISO8859-1',
1046n/a 'galician': 'gl_ES.ISO8859-1',
1047n/a 'gd': 'gd_GB.ISO8859-1',
1048n/a 'gd_gb': 'gd_GB.ISO8859-1',
1049n/a 'ger_de': 'de_DE.ISO8859-1',
1050n/a 'german': 'de_DE.ISO8859-1',
1051n/a 'german.iso88591': 'de_CH.ISO8859-1',
1052n/a 'german_germany': 'de_DE.ISO8859-1',
1053n/a 'gez_er': 'gez_ER.UTF-8',
1054n/a 'gez_et': 'gez_ET.UTF-8',
1055n/a 'gl': 'gl_ES.ISO8859-1',
1056n/a 'gl_es': 'gl_ES.ISO8859-1',
1057n/a 'greek': 'el_GR.ISO8859-7',
1058n/a 'gu_in': 'gu_IN.UTF-8',
1059n/a 'gv': 'gv_GB.ISO8859-1',
1060n/a 'gv_gb': 'gv_GB.ISO8859-1',
1061n/a 'ha_ng': 'ha_NG.UTF-8',
1062n/a 'he': 'he_IL.ISO8859-8',
1063n/a 'he_il': 'he_IL.ISO8859-8',
1064n/a 'hebrew': 'he_IL.ISO8859-8',
1065n/a 'hi': 'hi_IN.ISCII-DEV',
1066n/a 'hi_in': 'hi_IN.ISCII-DEV',
1067n/a 'hi_in.isciidev': 'hi_IN.ISCII-DEV',
1068n/a 'hne': 'hne_IN.UTF-8',
1069n/a 'hne_in': 'hne_IN.UTF-8',
1070n/a 'hr': 'hr_HR.ISO8859-2',
1071n/a 'hr_hr': 'hr_HR.ISO8859-2',
1072n/a 'hrvatski': 'hr_HR.ISO8859-2',
1073n/a 'hsb_de': 'hsb_DE.ISO8859-2',
1074n/a 'ht_ht': 'ht_HT.UTF-8',
1075n/a 'hu': 'hu_HU.ISO8859-2',
1076n/a 'hu_hu': 'hu_HU.ISO8859-2',
1077n/a 'hungarian': 'hu_HU.ISO8859-2',
1078n/a 'hy_am': 'hy_AM.UTF-8',
1079n/a 'hy_am.armscii8': 'hy_AM.ARMSCII_8',
1080n/a 'ia': 'ia.UTF-8',
1081n/a 'ia_fr': 'ia_FR.UTF-8',
1082n/a 'icelandic': 'is_IS.ISO8859-1',
1083n/a 'id': 'id_ID.ISO8859-1',
1084n/a 'id_id': 'id_ID.ISO8859-1',
1085n/a 'ig_ng': 'ig_NG.UTF-8',
1086n/a 'ik_ca': 'ik_CA.UTF-8',
1087n/a 'in': 'id_ID.ISO8859-1',
1088n/a 'in_id': 'id_ID.ISO8859-1',
1089n/a 'is': 'is_IS.ISO8859-1',
1090n/a 'is_is': 'is_IS.ISO8859-1',
1091n/a 'iso-8859-1': 'en_US.ISO8859-1',
1092n/a 'iso-8859-15': 'en_US.ISO8859-15',
1093n/a 'iso8859-1': 'en_US.ISO8859-1',
1094n/a 'iso8859-15': 'en_US.ISO8859-15',
1095n/a 'iso_8859_1': 'en_US.ISO8859-1',
1096n/a 'iso_8859_15': 'en_US.ISO8859-15',
1097n/a 'it': 'it_IT.ISO8859-1',
1098n/a 'it_ch': 'it_CH.ISO8859-1',
1099n/a 'it_it': 'it_IT.ISO8859-1',
1100n/a 'italian': 'it_IT.ISO8859-1',
1101n/a 'iu': 'iu_CA.NUNACOM-8',
1102n/a 'iu_ca': 'iu_CA.NUNACOM-8',
1103n/a 'iu_ca.nunacom8': 'iu_CA.NUNACOM-8',
1104n/a 'iw': 'he_IL.ISO8859-8',
1105n/a 'iw_il': 'he_IL.ISO8859-8',
1106n/a 'iw_il.utf8': 'iw_IL.UTF-8',
1107n/a 'ja': 'ja_JP.eucJP',
1108n/a 'ja_jp': 'ja_JP.eucJP',
1109n/a 'ja_jp.euc': 'ja_JP.eucJP',
1110n/a 'ja_jp.mscode': 'ja_JP.SJIS',
1111n/a 'ja_jp.pck': 'ja_JP.SJIS',
1112n/a 'japan': 'ja_JP.eucJP',
1113n/a 'japanese': 'ja_JP.eucJP',
1114n/a 'japanese-euc': 'ja_JP.eucJP',
1115n/a 'japanese.euc': 'ja_JP.eucJP',
1116n/a 'jp_jp': 'ja_JP.eucJP',
1117n/a 'ka': 'ka_GE.GEORGIAN-ACADEMY',
1118n/a 'ka_ge': 'ka_GE.GEORGIAN-ACADEMY',
1119n/a 'ka_ge.georgianacademy': 'ka_GE.GEORGIAN-ACADEMY',
1120n/a 'ka_ge.georgianps': 'ka_GE.GEORGIAN-PS',
1121n/a 'ka_ge.georgianrs': 'ka_GE.GEORGIAN-ACADEMY',
1122n/a 'kk_kz': 'kk_KZ.RK1048',
1123n/a 'kl': 'kl_GL.ISO8859-1',
1124n/a 'kl_gl': 'kl_GL.ISO8859-1',
1125n/a 'km_kh': 'km_KH.UTF-8',
1126n/a 'kn': 'kn_IN.UTF-8',
1127n/a 'kn_in': 'kn_IN.UTF-8',
1128n/a 'ko': 'ko_KR.eucKR',
1129n/a 'ko_kr': 'ko_KR.eucKR',
1130n/a 'ko_kr.euc': 'ko_KR.eucKR',
1131n/a 'kok_in': 'kok_IN.UTF-8',
1132n/a 'korean': 'ko_KR.eucKR',
1133n/a 'korean.euc': 'ko_KR.eucKR',
1134n/a 'ks': 'ks_IN.UTF-8',
1135n/a 'ks_in': 'ks_IN.UTF-8',
1136n/a 'ks_in@devanagari.utf8': 'ks_IN.UTF-8@devanagari',
1137n/a 'ku_tr': 'ku_TR.ISO8859-9',
1138n/a 'kw': 'kw_GB.ISO8859-1',
1139n/a 'kw_gb': 'kw_GB.ISO8859-1',
1140n/a 'ky': 'ky_KG.UTF-8',
1141n/a 'ky_kg': 'ky_KG.UTF-8',
1142n/a 'lb_lu': 'lb_LU.UTF-8',
1143n/a 'lg_ug': 'lg_UG.ISO8859-10',
1144n/a 'li_be': 'li_BE.UTF-8',
1145n/a 'li_nl': 'li_NL.UTF-8',
1146n/a 'lij_it': 'lij_IT.UTF-8',
1147n/a 'lithuanian': 'lt_LT.ISO8859-13',
1148n/a 'lo': 'lo_LA.MULELAO-1',
1149n/a 'lo_la': 'lo_LA.MULELAO-1',
1150n/a 'lo_la.cp1133': 'lo_LA.IBM-CP1133',
1151n/a 'lo_la.ibmcp1133': 'lo_LA.IBM-CP1133',
1152n/a 'lo_la.mulelao1': 'lo_LA.MULELAO-1',
1153n/a 'lt': 'lt_LT.ISO8859-13',
1154n/a 'lt_lt': 'lt_LT.ISO8859-13',
1155n/a 'lv': 'lv_LV.ISO8859-13',
1156n/a 'lv_lv': 'lv_LV.ISO8859-13',
1157n/a 'mag_in': 'mag_IN.UTF-8',
1158n/a 'mai': 'mai_IN.UTF-8',
1159n/a 'mai_in': 'mai_IN.UTF-8',
1160n/a 'mg_mg': 'mg_MG.ISO8859-15',
1161n/a 'mhr_ru': 'mhr_RU.UTF-8',
1162n/a 'mi': 'mi_NZ.ISO8859-1',
1163n/a 'mi_nz': 'mi_NZ.ISO8859-1',
1164n/a 'mk': 'mk_MK.ISO8859-5',
1165n/a 'mk_mk': 'mk_MK.ISO8859-5',
1166n/a 'ml': 'ml_IN.UTF-8',
1167n/a 'ml_in': 'ml_IN.UTF-8',
1168n/a 'mn_mn': 'mn_MN.UTF-8',
1169n/a 'mni_in': 'mni_IN.UTF-8',
1170n/a 'mr': 'mr_IN.UTF-8',
1171n/a 'mr_in': 'mr_IN.UTF-8',
1172n/a 'ms': 'ms_MY.ISO8859-1',
1173n/a 'ms_my': 'ms_MY.ISO8859-1',
1174n/a 'mt': 'mt_MT.ISO8859-3',
1175n/a 'mt_mt': 'mt_MT.ISO8859-3',
1176n/a 'my_mm': 'my_MM.UTF-8',
1177n/a 'nan_tw@latin': 'nan_TW.UTF-8@latin',
1178n/a 'nb': 'nb_NO.ISO8859-1',
1179n/a 'nb_no': 'nb_NO.ISO8859-1',
1180n/a 'nds_de': 'nds_DE.UTF-8',
1181n/a 'nds_nl': 'nds_NL.UTF-8',
1182n/a 'ne_np': 'ne_NP.UTF-8',
1183n/a 'nhn_mx': 'nhn_MX.UTF-8',
1184n/a 'niu_nu': 'niu_NU.UTF-8',
1185n/a 'niu_nz': 'niu_NZ.UTF-8',
1186n/a 'nl': 'nl_NL.ISO8859-1',
1187n/a 'nl_aw': 'nl_AW.UTF-8',
1188n/a 'nl_be': 'nl_BE.ISO8859-1',
1189n/a 'nl_nl': 'nl_NL.ISO8859-1',
1190n/a 'nn': 'nn_NO.ISO8859-1',
1191n/a 'nn_no': 'nn_NO.ISO8859-1',
1192n/a 'no': 'no_NO.ISO8859-1',
1193n/a 'no@nynorsk': 'ny_NO.ISO8859-1',
1194n/a 'no_no': 'no_NO.ISO8859-1',
1195n/a 'no_no.iso88591@bokmal': 'no_NO.ISO8859-1',
1196n/a 'no_no.iso88591@nynorsk': 'no_NO.ISO8859-1',
1197n/a 'norwegian': 'no_NO.ISO8859-1',
1198n/a 'nr': 'nr_ZA.ISO8859-1',
1199n/a 'nr_za': 'nr_ZA.ISO8859-1',
1200n/a 'nso': 'nso_ZA.ISO8859-15',
1201n/a 'nso_za': 'nso_ZA.ISO8859-15',
1202n/a 'ny': 'ny_NO.ISO8859-1',
1203n/a 'ny_no': 'ny_NO.ISO8859-1',
1204n/a 'nynorsk': 'nn_NO.ISO8859-1',
1205n/a 'oc': 'oc_FR.ISO8859-1',
1206n/a 'oc_fr': 'oc_FR.ISO8859-1',
1207n/a 'om_et': 'om_ET.UTF-8',
1208n/a 'om_ke': 'om_KE.ISO8859-1',
1209n/a 'or': 'or_IN.UTF-8',
1210n/a 'or_in': 'or_IN.UTF-8',
1211n/a 'os_ru': 'os_RU.UTF-8',
1212n/a 'pa': 'pa_IN.UTF-8',
1213n/a 'pa_in': 'pa_IN.UTF-8',
1214n/a 'pa_pk': 'pa_PK.UTF-8',
1215n/a 'pap_an': 'pap_AN.UTF-8',
1216n/a 'pd': 'pd_US.ISO8859-1',
1217n/a 'pd_de': 'pd_DE.ISO8859-1',
1218n/a 'pd_us': 'pd_US.ISO8859-1',
1219n/a 'ph': 'ph_PH.ISO8859-1',
1220n/a 'ph_ph': 'ph_PH.ISO8859-1',
1221n/a 'pl': 'pl_PL.ISO8859-2',
1222n/a 'pl_pl': 'pl_PL.ISO8859-2',
1223n/a 'polish': 'pl_PL.ISO8859-2',
1224n/a 'portuguese': 'pt_PT.ISO8859-1',
1225n/a 'portuguese_brazil': 'pt_BR.ISO8859-1',
1226n/a 'posix': 'C',
1227n/a 'posix-utf2': 'C',
1228n/a 'pp': 'pp_AN.ISO8859-1',
1229n/a 'pp_an': 'pp_AN.ISO8859-1',
1230n/a 'ps_af': 'ps_AF.UTF-8',
1231n/a 'pt': 'pt_PT.ISO8859-1',
1232n/a 'pt_br': 'pt_BR.ISO8859-1',
1233n/a 'pt_pt': 'pt_PT.ISO8859-1',
1234n/a 'ro': 'ro_RO.ISO8859-2',
1235n/a 'ro_ro': 'ro_RO.ISO8859-2',
1236n/a 'romanian': 'ro_RO.ISO8859-2',
1237n/a 'ru': 'ru_RU.UTF-8',
1238n/a 'ru_ru': 'ru_RU.UTF-8',
1239n/a 'ru_ua': 'ru_UA.KOI8-U',
1240n/a 'rumanian': 'ro_RO.ISO8859-2',
1241n/a 'russian': 'ru_RU.ISO8859-5',
1242n/a 'rw': 'rw_RW.ISO8859-1',
1243n/a 'rw_rw': 'rw_RW.ISO8859-1',
1244n/a 'sa_in': 'sa_IN.UTF-8',
1245n/a 'sat_in': 'sat_IN.UTF-8',
1246n/a 'sc_it': 'sc_IT.UTF-8',
1247n/a 'sd': 'sd_IN.UTF-8',
1248n/a 'sd_in': 'sd_IN.UTF-8',
1249n/a 'sd_in@devanagari.utf8': 'sd_IN.UTF-8@devanagari',
1250n/a 'sd_pk': 'sd_PK.UTF-8',
1251n/a 'se_no': 'se_NO.UTF-8',
1252n/a 'serbocroatian': 'sr_RS.UTF-8@latin',
1253n/a 'sh': 'sr_RS.UTF-8@latin',
1254n/a 'sh_ba.iso88592@bosnia': 'sr_CS.ISO8859-2',
1255n/a 'sh_hr': 'sh_HR.ISO8859-2',
1256n/a 'sh_hr.iso88592': 'hr_HR.ISO8859-2',
1257n/a 'sh_sp': 'sr_CS.ISO8859-2',
1258n/a 'sh_yu': 'sr_RS.UTF-8@latin',
1259n/a 'shs_ca': 'shs_CA.UTF-8',
1260n/a 'si': 'si_LK.UTF-8',
1261n/a 'si_lk': 'si_LK.UTF-8',
1262n/a 'sid_et': 'sid_ET.UTF-8',
1263n/a 'sinhala': 'si_LK.UTF-8',
1264n/a 'sk': 'sk_SK.ISO8859-2',
1265n/a 'sk_sk': 'sk_SK.ISO8859-2',
1266n/a 'sl': 'sl_SI.ISO8859-2',
1267n/a 'sl_cs': 'sl_CS.ISO8859-2',
1268n/a 'sl_si': 'sl_SI.ISO8859-2',
1269n/a 'slovak': 'sk_SK.ISO8859-2',
1270n/a 'slovene': 'sl_SI.ISO8859-2',
1271n/a 'slovenian': 'sl_SI.ISO8859-2',
1272n/a 'so_dj': 'so_DJ.ISO8859-1',
1273n/a 'so_et': 'so_ET.UTF-8',
1274n/a 'so_ke': 'so_KE.ISO8859-1',
1275n/a 'so_so': 'so_SO.ISO8859-1',
1276n/a 'sp': 'sr_CS.ISO8859-5',
1277n/a 'sp_yu': 'sr_CS.ISO8859-5',
1278n/a 'spanish': 'es_ES.ISO8859-1',
1279n/a 'spanish_spain': 'es_ES.ISO8859-1',
1280n/a 'sq': 'sq_AL.ISO8859-2',
1281n/a 'sq_al': 'sq_AL.ISO8859-2',
1282n/a 'sq_mk': 'sq_MK.UTF-8',
1283n/a 'sr': 'sr_RS.UTF-8',
1284n/a 'sr@cyrillic': 'sr_RS.UTF-8',
1285n/a 'sr@latn': 'sr_CS.UTF-8@latin',
1286n/a 'sr_cs': 'sr_CS.UTF-8',
1287n/a 'sr_cs.iso88592@latn': 'sr_CS.ISO8859-2',
1288n/a 'sr_cs@latn': 'sr_CS.UTF-8@latin',
1289n/a 'sr_me': 'sr_ME.UTF-8',
1290n/a 'sr_rs': 'sr_RS.UTF-8',
1291n/a 'sr_rs@latn': 'sr_RS.UTF-8@latin',
1292n/a 'sr_sp': 'sr_CS.ISO8859-2',
1293n/a 'sr_yu': 'sr_RS.UTF-8@latin',
1294n/a 'sr_yu.cp1251@cyrillic': 'sr_CS.CP1251',
1295n/a 'sr_yu.iso88592': 'sr_CS.ISO8859-2',
1296n/a 'sr_yu.iso88595': 'sr_CS.ISO8859-5',
1297n/a 'sr_yu.iso88595@cyrillic': 'sr_CS.ISO8859-5',
1298n/a 'sr_yu.microsoftcp1251@cyrillic': 'sr_CS.CP1251',
1299n/a 'sr_yu.utf8': 'sr_RS.UTF-8',
1300n/a 'sr_yu.utf8@cyrillic': 'sr_RS.UTF-8',
1301n/a 'sr_yu@cyrillic': 'sr_RS.UTF-8',
1302n/a 'ss': 'ss_ZA.ISO8859-1',
1303n/a 'ss_za': 'ss_ZA.ISO8859-1',
1304n/a 'st': 'st_ZA.ISO8859-1',
1305n/a 'st_za': 'st_ZA.ISO8859-1',
1306n/a 'sv': 'sv_SE.ISO8859-1',
1307n/a 'sv_fi': 'sv_FI.ISO8859-1',
1308n/a 'sv_se': 'sv_SE.ISO8859-1',
1309n/a 'sw_ke': 'sw_KE.UTF-8',
1310n/a 'sw_tz': 'sw_TZ.UTF-8',
1311n/a 'swedish': 'sv_SE.ISO8859-1',
1312n/a 'szl_pl': 'szl_PL.UTF-8',
1313n/a 'ta': 'ta_IN.TSCII-0',
1314n/a 'ta_in': 'ta_IN.TSCII-0',
1315n/a 'ta_in.tscii': 'ta_IN.TSCII-0',
1316n/a 'ta_in.tscii0': 'ta_IN.TSCII-0',
1317n/a 'ta_lk': 'ta_LK.UTF-8',
1318n/a 'te': 'te_IN.UTF-8',
1319n/a 'te_in': 'te_IN.UTF-8',
1320n/a 'tg': 'tg_TJ.KOI8-C',
1321n/a 'tg_tj': 'tg_TJ.KOI8-C',
1322n/a 'th': 'th_TH.ISO8859-11',
1323n/a 'th_th': 'th_TH.ISO8859-11',
1324n/a 'th_th.tactis': 'th_TH.TIS620',
1325n/a 'th_th.tis620': 'th_TH.TIS620',
1326n/a 'thai': 'th_TH.ISO8859-11',
1327n/a 'ti_er': 'ti_ER.UTF-8',
1328n/a 'ti_et': 'ti_ET.UTF-8',
1329n/a 'tig_er': 'tig_ER.UTF-8',
1330n/a 'tk_tm': 'tk_TM.UTF-8',
1331n/a 'tl': 'tl_PH.ISO8859-1',
1332n/a 'tl_ph': 'tl_PH.ISO8859-1',
1333n/a 'tn': 'tn_ZA.ISO8859-15',
1334n/a 'tn_za': 'tn_ZA.ISO8859-15',
1335n/a 'tr': 'tr_TR.ISO8859-9',
1336n/a 'tr_cy': 'tr_CY.ISO8859-9',
1337n/a 'tr_tr': 'tr_TR.ISO8859-9',
1338n/a 'ts': 'ts_ZA.ISO8859-1',
1339n/a 'ts_za': 'ts_ZA.ISO8859-1',
1340n/a 'tt': 'tt_RU.TATAR-CYR',
1341n/a 'tt_ru': 'tt_RU.TATAR-CYR',
1342n/a 'tt_ru.tatarcyr': 'tt_RU.TATAR-CYR',
1343n/a 'tt_ru@iqtelif': 'tt_RU.UTF-8@iqtelif',
1344n/a 'turkish': 'tr_TR.ISO8859-9',
1345n/a 'ug_cn': 'ug_CN.UTF-8',
1346n/a 'uk': 'uk_UA.KOI8-U',
1347n/a 'uk_ua': 'uk_UA.KOI8-U',
1348n/a 'univ': 'en_US.utf',
1349n/a 'universal': 'en_US.utf',
1350n/a 'universal.utf8@ucs4': 'en_US.UTF-8',
1351n/a 'unm_us': 'unm_US.UTF-8',
1352n/a 'ur': 'ur_PK.CP1256',
1353n/a 'ur_in': 'ur_IN.UTF-8',
1354n/a 'ur_pk': 'ur_PK.CP1256',
1355n/a 'uz': 'uz_UZ.UTF-8',
1356n/a 'uz_uz': 'uz_UZ.UTF-8',
1357n/a 'uz_uz@cyrillic': 'uz_UZ.UTF-8',
1358n/a 've': 've_ZA.UTF-8',
1359n/a 've_za': 've_ZA.UTF-8',
1360n/a 'vi': 'vi_VN.TCVN',
1361n/a 'vi_vn': 'vi_VN.TCVN',
1362n/a 'vi_vn.tcvn': 'vi_VN.TCVN',
1363n/a 'vi_vn.tcvn5712': 'vi_VN.TCVN',
1364n/a 'vi_vn.viscii': 'vi_VN.VISCII',
1365n/a 'vi_vn.viscii111': 'vi_VN.VISCII',
1366n/a 'wa': 'wa_BE.ISO8859-1',
1367n/a 'wa_be': 'wa_BE.ISO8859-1',
1368n/a 'wae_ch': 'wae_CH.UTF-8',
1369n/a 'wal_et': 'wal_ET.UTF-8',
1370n/a 'wo_sn': 'wo_SN.UTF-8',
1371n/a 'xh': 'xh_ZA.ISO8859-1',
1372n/a 'xh_za': 'xh_ZA.ISO8859-1',
1373n/a 'yi': 'yi_US.CP1255',
1374n/a 'yi_us': 'yi_US.CP1255',
1375n/a 'yo_ng': 'yo_NG.UTF-8',
1376n/a 'yue_hk': 'yue_HK.UTF-8',
1377n/a 'zh': 'zh_CN.eucCN',
1378n/a 'zh_cn': 'zh_CN.gb2312',
1379n/a 'zh_cn.big5': 'zh_TW.big5',
1380n/a 'zh_cn.euc': 'zh_CN.eucCN',
1381n/a 'zh_hk': 'zh_HK.big5hkscs',
1382n/a 'zh_hk.big5hk': 'zh_HK.big5hkscs',
1383n/a 'zh_sg': 'zh_SG.GB2312',
1384n/a 'zh_sg.gbk': 'zh_SG.GBK',
1385n/a 'zh_tw': 'zh_TW.big5',
1386n/a 'zh_tw.euc': 'zh_TW.eucTW',
1387n/a 'zh_tw.euctw': 'zh_TW.eucTW',
1388n/a 'zu': 'zu_ZA.ISO8859-1',
1389n/a 'zu_za': 'zu_ZA.ISO8859-1',
1390n/a}
1391n/a
1392n/a#
1393n/a# This maps Windows language identifiers to locale strings.
1394n/a#
1395n/a# This list has been updated from
1396n/a# http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp
1397n/a# to include every locale up to Windows Vista.
1398n/a#
1399n/a# NOTE: this mapping is incomplete. If your language is missing, please
1400n/a# submit a bug report to the Python bug tracker at http://bugs.python.org/
1401n/a# Make sure you include the missing language identifier and the suggested
1402n/a# locale code.
1403n/a#
1404n/a
1405n/awindows_locale = {
1406n/a 0x0436: "af_ZA", # Afrikaans
1407n/a 0x041c: "sq_AL", # Albanian
1408n/a 0x0484: "gsw_FR",# Alsatian - France
1409n/a 0x045e: "am_ET", # Amharic - Ethiopia
1410n/a 0x0401: "ar_SA", # Arabic - Saudi Arabia
1411n/a 0x0801: "ar_IQ", # Arabic - Iraq
1412n/a 0x0c01: "ar_EG", # Arabic - Egypt
1413n/a 0x1001: "ar_LY", # Arabic - Libya
1414n/a 0x1401: "ar_DZ", # Arabic - Algeria
1415n/a 0x1801: "ar_MA", # Arabic - Morocco
1416n/a 0x1c01: "ar_TN", # Arabic - Tunisia
1417n/a 0x2001: "ar_OM", # Arabic - Oman
1418n/a 0x2401: "ar_YE", # Arabic - Yemen
1419n/a 0x2801: "ar_SY", # Arabic - Syria
1420n/a 0x2c01: "ar_JO", # Arabic - Jordan
1421n/a 0x3001: "ar_LB", # Arabic - Lebanon
1422n/a 0x3401: "ar_KW", # Arabic - Kuwait
1423n/a 0x3801: "ar_AE", # Arabic - United Arab Emirates
1424n/a 0x3c01: "ar_BH", # Arabic - Bahrain
1425n/a 0x4001: "ar_QA", # Arabic - Qatar
1426n/a 0x042b: "hy_AM", # Armenian
1427n/a 0x044d: "as_IN", # Assamese - India
1428n/a 0x042c: "az_AZ", # Azeri - Latin
1429n/a 0x082c: "az_AZ", # Azeri - Cyrillic
1430n/a 0x046d: "ba_RU", # Bashkir
1431n/a 0x042d: "eu_ES", # Basque - Russia
1432n/a 0x0423: "be_BY", # Belarusian
1433n/a 0x0445: "bn_IN", # Begali
1434n/a 0x201a: "bs_BA", # Bosnian - Cyrillic
1435n/a 0x141a: "bs_BA", # Bosnian - Latin
1436n/a 0x047e: "br_FR", # Breton - France
1437n/a 0x0402: "bg_BG", # Bulgarian
1438n/a# 0x0455: "my_MM", # Burmese - Not supported
1439n/a 0x0403: "ca_ES", # Catalan
1440n/a 0x0004: "zh_CHS",# Chinese - Simplified
1441n/a 0x0404: "zh_TW", # Chinese - Taiwan
1442n/a 0x0804: "zh_CN", # Chinese - PRC
1443n/a 0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R.
1444n/a 0x1004: "zh_SG", # Chinese - Singapore
1445n/a 0x1404: "zh_MO", # Chinese - Macao S.A.R.
1446n/a 0x7c04: "zh_CHT",# Chinese - Traditional
1447n/a 0x0483: "co_FR", # Corsican - France
1448n/a 0x041a: "hr_HR", # Croatian
1449n/a 0x101a: "hr_BA", # Croatian - Bosnia
1450n/a 0x0405: "cs_CZ", # Czech
1451n/a 0x0406: "da_DK", # Danish
1452n/a 0x048c: "gbz_AF",# Dari - Afghanistan
1453n/a 0x0465: "div_MV",# Divehi - Maldives
1454n/a 0x0413: "nl_NL", # Dutch - The Netherlands
1455n/a 0x0813: "nl_BE", # Dutch - Belgium
1456n/a 0x0409: "en_US", # English - United States
1457n/a 0x0809: "en_GB", # English - United Kingdom
1458n/a 0x0c09: "en_AU", # English - Australia
1459n/a 0x1009: "en_CA", # English - Canada
1460n/a 0x1409: "en_NZ", # English - New Zealand
1461n/a 0x1809: "en_IE", # English - Ireland
1462n/a 0x1c09: "en_ZA", # English - South Africa
1463n/a 0x2009: "en_JA", # English - Jamaica
1464n/a 0x2409: "en_CB", # English - Caribbean
1465n/a 0x2809: "en_BZ", # English - Belize
1466n/a 0x2c09: "en_TT", # English - Trinidad
1467n/a 0x3009: "en_ZW", # English - Zimbabwe
1468n/a 0x3409: "en_PH", # English - Philippines
1469n/a 0x4009: "en_IN", # English - India
1470n/a 0x4409: "en_MY", # English - Malaysia
1471n/a 0x4809: "en_IN", # English - Singapore
1472n/a 0x0425: "et_EE", # Estonian
1473n/a 0x0438: "fo_FO", # Faroese
1474n/a 0x0464: "fil_PH",# Filipino
1475n/a 0x040b: "fi_FI", # Finnish
1476n/a 0x040c: "fr_FR", # French - France
1477n/a 0x080c: "fr_BE", # French - Belgium
1478n/a 0x0c0c: "fr_CA", # French - Canada
1479n/a 0x100c: "fr_CH", # French - Switzerland
1480n/a 0x140c: "fr_LU", # French - Luxembourg
1481n/a 0x180c: "fr_MC", # French - Monaco
1482n/a 0x0462: "fy_NL", # Frisian - Netherlands
1483n/a 0x0456: "gl_ES", # Galician
1484n/a 0x0437: "ka_GE", # Georgian
1485n/a 0x0407: "de_DE", # German - Germany
1486n/a 0x0807: "de_CH", # German - Switzerland
1487n/a 0x0c07: "de_AT", # German - Austria
1488n/a 0x1007: "de_LU", # German - Luxembourg
1489n/a 0x1407: "de_LI", # German - Liechtenstein
1490n/a 0x0408: "el_GR", # Greek
1491n/a 0x046f: "kl_GL", # Greenlandic - Greenland
1492n/a 0x0447: "gu_IN", # Gujarati
1493n/a 0x0468: "ha_NG", # Hausa - Latin
1494n/a 0x040d: "he_IL", # Hebrew
1495n/a 0x0439: "hi_IN", # Hindi
1496n/a 0x040e: "hu_HU", # Hungarian
1497n/a 0x040f: "is_IS", # Icelandic
1498n/a 0x0421: "id_ID", # Indonesian
1499n/a 0x045d: "iu_CA", # Inuktitut - Syllabics
1500n/a 0x085d: "iu_CA", # Inuktitut - Latin
1501n/a 0x083c: "ga_IE", # Irish - Ireland
1502n/a 0x0410: "it_IT", # Italian - Italy
1503n/a 0x0810: "it_CH", # Italian - Switzerland
1504n/a 0x0411: "ja_JP", # Japanese
1505n/a 0x044b: "kn_IN", # Kannada - India
1506n/a 0x043f: "kk_KZ", # Kazakh
1507n/a 0x0453: "kh_KH", # Khmer - Cambodia
1508n/a 0x0486: "qut_GT",# K'iche - Guatemala
1509n/a 0x0487: "rw_RW", # Kinyarwanda - Rwanda
1510n/a 0x0457: "kok_IN",# Konkani
1511n/a 0x0412: "ko_KR", # Korean
1512n/a 0x0440: "ky_KG", # Kyrgyz
1513n/a 0x0454: "lo_LA", # Lao - Lao PDR
1514n/a 0x0426: "lv_LV", # Latvian
1515n/a 0x0427: "lt_LT", # Lithuanian
1516n/a 0x082e: "dsb_DE",# Lower Sorbian - Germany
1517n/a 0x046e: "lb_LU", # Luxembourgish
1518n/a 0x042f: "mk_MK", # FYROM Macedonian
1519n/a 0x043e: "ms_MY", # Malay - Malaysia
1520n/a 0x083e: "ms_BN", # Malay - Brunei Darussalam
1521n/a 0x044c: "ml_IN", # Malayalam - India
1522n/a 0x043a: "mt_MT", # Maltese
1523n/a 0x0481: "mi_NZ", # Maori
1524n/a 0x047a: "arn_CL",# Mapudungun
1525n/a 0x044e: "mr_IN", # Marathi
1526n/a 0x047c: "moh_CA",# Mohawk - Canada
1527n/a 0x0450: "mn_MN", # Mongolian - Cyrillic
1528n/a 0x0850: "mn_CN", # Mongolian - PRC
1529n/a 0x0461: "ne_NP", # Nepali
1530n/a 0x0414: "nb_NO", # Norwegian - Bokmal
1531n/a 0x0814: "nn_NO", # Norwegian - Nynorsk
1532n/a 0x0482: "oc_FR", # Occitan - France
1533n/a 0x0448: "or_IN", # Oriya - India
1534n/a 0x0463: "ps_AF", # Pashto - Afghanistan
1535n/a 0x0429: "fa_IR", # Persian
1536n/a 0x0415: "pl_PL", # Polish
1537n/a 0x0416: "pt_BR", # Portuguese - Brazil
1538n/a 0x0816: "pt_PT", # Portuguese - Portugal
1539n/a 0x0446: "pa_IN", # Punjabi
1540n/a 0x046b: "quz_BO",# Quechua (Bolivia)
1541n/a 0x086b: "quz_EC",# Quechua (Ecuador)
1542n/a 0x0c6b: "quz_PE",# Quechua (Peru)
1543n/a 0x0418: "ro_RO", # Romanian - Romania
1544n/a 0x0417: "rm_CH", # Romansh
1545n/a 0x0419: "ru_RU", # Russian
1546n/a 0x243b: "smn_FI",# Sami Finland
1547n/a 0x103b: "smj_NO",# Sami Norway
1548n/a 0x143b: "smj_SE",# Sami Sweden
1549n/a 0x043b: "se_NO", # Sami Northern Norway
1550n/a 0x083b: "se_SE", # Sami Northern Sweden
1551n/a 0x0c3b: "se_FI", # Sami Northern Finland
1552n/a 0x203b: "sms_FI",# Sami Skolt
1553n/a 0x183b: "sma_NO",# Sami Southern Norway
1554n/a 0x1c3b: "sma_SE",# Sami Southern Sweden
1555n/a 0x044f: "sa_IN", # Sanskrit
1556n/a 0x0c1a: "sr_SP", # Serbian - Cyrillic
1557n/a 0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic
1558n/a 0x081a: "sr_SP", # Serbian - Latin
1559n/a 0x181a: "sr_BA", # Serbian - Bosnia Latin
1560n/a 0x045b: "si_LK", # Sinhala - Sri Lanka
1561n/a 0x046c: "ns_ZA", # Northern Sotho
1562n/a 0x0432: "tn_ZA", # Setswana - Southern Africa
1563n/a 0x041b: "sk_SK", # Slovak
1564n/a 0x0424: "sl_SI", # Slovenian
1565n/a 0x040a: "es_ES", # Spanish - Spain
1566n/a 0x080a: "es_MX", # Spanish - Mexico
1567n/a 0x0c0a: "es_ES", # Spanish - Spain (Modern)
1568n/a 0x100a: "es_GT", # Spanish - Guatemala
1569n/a 0x140a: "es_CR", # Spanish - Costa Rica
1570n/a 0x180a: "es_PA", # Spanish - Panama
1571n/a 0x1c0a: "es_DO", # Spanish - Dominican Republic
1572n/a 0x200a: "es_VE", # Spanish - Venezuela
1573n/a 0x240a: "es_CO", # Spanish - Colombia
1574n/a 0x280a: "es_PE", # Spanish - Peru
1575n/a 0x2c0a: "es_AR", # Spanish - Argentina
1576n/a 0x300a: "es_EC", # Spanish - Ecuador
1577n/a 0x340a: "es_CL", # Spanish - Chile
1578n/a 0x380a: "es_UR", # Spanish - Uruguay
1579n/a 0x3c0a: "es_PY", # Spanish - Paraguay
1580n/a 0x400a: "es_BO", # Spanish - Bolivia
1581n/a 0x440a: "es_SV", # Spanish - El Salvador
1582n/a 0x480a: "es_HN", # Spanish - Honduras
1583n/a 0x4c0a: "es_NI", # Spanish - Nicaragua
1584n/a 0x500a: "es_PR", # Spanish - Puerto Rico
1585n/a 0x540a: "es_US", # Spanish - United States
1586n/a# 0x0430: "", # Sutu - Not supported
1587n/a 0x0441: "sw_KE", # Swahili
1588n/a 0x041d: "sv_SE", # Swedish - Sweden
1589n/a 0x081d: "sv_FI", # Swedish - Finland
1590n/a 0x045a: "syr_SY",# Syriac
1591n/a 0x0428: "tg_TJ", # Tajik - Cyrillic
1592n/a 0x085f: "tmz_DZ",# Tamazight - Latin
1593n/a 0x0449: "ta_IN", # Tamil
1594n/a 0x0444: "tt_RU", # Tatar
1595n/a 0x044a: "te_IN", # Telugu
1596n/a 0x041e: "th_TH", # Thai
1597n/a 0x0851: "bo_BT", # Tibetan - Bhutan
1598n/a 0x0451: "bo_CN", # Tibetan - PRC
1599n/a 0x041f: "tr_TR", # Turkish
1600n/a 0x0442: "tk_TM", # Turkmen - Cyrillic
1601n/a 0x0480: "ug_CN", # Uighur - Arabic
1602n/a 0x0422: "uk_UA", # Ukrainian
1603n/a 0x042e: "wen_DE",# Upper Sorbian - Germany
1604n/a 0x0420: "ur_PK", # Urdu
1605n/a 0x0820: "ur_IN", # Urdu - India
1606n/a 0x0443: "uz_UZ", # Uzbek - Latin
1607n/a 0x0843: "uz_UZ", # Uzbek - Cyrillic
1608n/a 0x042a: "vi_VN", # Vietnamese
1609n/a 0x0452: "cy_GB", # Welsh
1610n/a 0x0488: "wo_SN", # Wolof - Senegal
1611n/a 0x0434: "xh_ZA", # Xhosa - South Africa
1612n/a 0x0485: "sah_RU",# Yakut - Cyrillic
1613n/a 0x0478: "ii_CN", # Yi - PRC
1614n/a 0x046a: "yo_NG", # Yoruba - Nigeria
1615n/a 0x0435: "zu_ZA", # Zulu
1616n/a}
1617n/a
1618n/adef _print_locale():
1619n/a
1620n/a """ Test function.
1621n/a """
1622n/a categories = {}
1623n/a def _init_categories(categories=categories):
1624n/a for k,v in globals().items():
1625n/a if k[:3] == 'LC_':
1626n/a categories[k] = v
1627n/a _init_categories()
1628n/a del categories['LC_ALL']
1629n/a
1630n/a print('Locale defaults as determined by getdefaultlocale():')
1631n/a print('-'*72)
1632n/a lang, enc = getdefaultlocale()
1633n/a print('Language: ', lang or '(undefined)')
1634n/a print('Encoding: ', enc or '(undefined)')
1635n/a print()
1636n/a
1637n/a print('Locale settings on startup:')
1638n/a print('-'*72)
1639n/a for name,category in categories.items():
1640n/a print(name, '...')
1641n/a lang, enc = getlocale(category)
1642n/a print(' Language: ', lang or '(undefined)')
1643n/a print(' Encoding: ', enc or '(undefined)')
1644n/a print()
1645n/a
1646n/a print()
1647n/a print('Locale settings after calling resetlocale():')
1648n/a print('-'*72)
1649n/a resetlocale()
1650n/a for name,category in categories.items():
1651n/a print(name, '...')
1652n/a lang, enc = getlocale(category)
1653n/a print(' Language: ', lang or '(undefined)')
1654n/a print(' Encoding: ', enc or '(undefined)')
1655n/a print()
1656n/a
1657n/a try:
1658n/a setlocale(LC_ALL, "")
1659n/a except:
1660n/a print('NOTE:')
1661n/a print('setlocale(LC_ALL, "") does not support the default locale')
1662n/a print('given in the OS environment variables.')
1663n/a else:
1664n/a print()
1665n/a print('Locale settings after calling setlocale(LC_ALL, ""):')
1666n/a print('-'*72)
1667n/a for name,category in categories.items():
1668n/a print(name, '...')
1669n/a lang, enc = getlocale(category)
1670n/a print(' Language: ', lang or '(undefined)')
1671n/a print(' Encoding: ', enc or '(undefined)')
1672n/a print()
1673n/a
1674n/a###
1675n/a
1676n/atry:
1677n/a LC_MESSAGES
1678n/aexcept NameError:
1679n/a pass
1680n/aelse:
1681n/a __all__.append("LC_MESSAGES")
1682n/a
1683n/aif __name__=='__main__':
1684n/a print('Locale aliasing:')
1685n/a print()
1686n/a _print_locale()
1687n/a print()
1688n/a print('Number formatting:')
1689n/a print()
1690n/a _test()