»Core Development>Code coverage>Lib/encodings/punycode.py

Python code coverage for Lib/encodings/punycode.py

#countcontent
1n/a""" Codec for the Punicode encoding, as specified in RFC 3492
2n/a
3n/aWritten by Martin v. Löwis.
4n/a"""
5n/a
6n/aimport codecs
7n/a
8n/a##################### Encoding #####################################
9n/a
10n/adef segregate(str):
11n/a """3.1 Basic code point segregation"""
12n/a base = bytearray()
13n/a extended = set()
14n/a for c in str:
15n/a if ord(c) < 128:
16n/a base.append(ord(c))
17n/a else:
18n/a extended.add(c)
19n/a extended = sorted(extended)
20n/a return bytes(base), extended
21n/a
22n/adef selective_len(str, max):
23n/a """Return the length of str, considering only characters below max."""
24n/a res = 0
25n/a for c in str:
26n/a if ord(c) < max:
27n/a res += 1
28n/a return res
29n/a
30n/adef selective_find(str, char, index, pos):
31n/a """Return a pair (index, pos), indicating the next occurrence of
32n/a char in str. index is the position of the character considering
33n/a only ordinals up to and including char, and pos is the position in
34n/a the full string. index/pos is the starting position in the full
35n/a string."""
36n/a
37n/a l = len(str)
38n/a while 1:
39n/a pos += 1
40n/a if pos == l:
41n/a return (-1, -1)
42n/a c = str[pos]
43n/a if c == char:
44n/a return index+1, pos
45n/a elif c < char:
46n/a index += 1
47n/a
48n/adef insertion_unsort(str, extended):
49n/a """3.2 Insertion unsort coding"""
50n/a oldchar = 0x80
51n/a result = []
52n/a oldindex = -1
53n/a for c in extended:
54n/a index = pos = -1
55n/a char = ord(c)
56n/a curlen = selective_len(str, char)
57n/a delta = (curlen+1) * (char - oldchar)
58n/a while 1:
59n/a index,pos = selective_find(str,c,index,pos)
60n/a if index == -1:
61n/a break
62n/a delta += index - oldindex
63n/a result.append(delta-1)
64n/a oldindex = index
65n/a delta = 0
66n/a oldchar = char
67n/a
68n/a return result
69n/a
70n/adef T(j, bias):
71n/a # Punycode parameters: tmin = 1, tmax = 26, base = 36
72n/a res = 36 * (j + 1) - bias
73n/a if res < 1: return 1
74n/a if res > 26: return 26
75n/a return res
76n/a
77n/adigits = b"abcdefghijklmnopqrstuvwxyz0123456789"
78n/adef generate_generalized_integer(N, bias):
79n/a """3.3 Generalized variable-length integers"""
80n/a result = bytearray()
81n/a j = 0
82n/a while 1:
83n/a t = T(j, bias)
84n/a if N < t:
85n/a result.append(digits[N])
86n/a return bytes(result)
87n/a result.append(digits[t + ((N - t) % (36 - t))])
88n/a N = (N - t) // (36 - t)
89n/a j += 1
90n/a
91n/adef adapt(delta, first, numchars):
92n/a if first:
93n/a delta //= 700
94n/a else:
95n/a delta //= 2
96n/a delta += delta // numchars
97n/a # ((base - tmin) * tmax) // 2 == 455
98n/a divisions = 0
99n/a while delta > 455:
100n/a delta = delta // 35 # base - tmin
101n/a divisions += 36
102n/a bias = divisions + (36 * delta // (delta + 38))
103n/a return bias
104n/a
105n/a
106n/adef generate_integers(baselen, deltas):
107n/a """3.4 Bias adaptation"""
108n/a # Punycode parameters: initial bias = 72, damp = 700, skew = 38
109n/a result = bytearray()
110n/a bias = 72
111n/a for points, delta in enumerate(deltas):
112n/a s = generate_generalized_integer(delta, bias)
113n/a result.extend(s)
114n/a bias = adapt(delta, points==0, baselen+points+1)
115n/a return bytes(result)
116n/a
117n/adef punycode_encode(text):
118n/a base, extended = segregate(text)
119n/a deltas = insertion_unsort(text, extended)
120n/a extended = generate_integers(len(base), deltas)
121n/a if base:
122n/a return base + b"-" + extended
123n/a return extended
124n/a
125n/a##################### Decoding #####################################
126n/a
127n/adef decode_generalized_number(extended, extpos, bias, errors):
128n/a """3.3 Generalized variable-length integers"""
129n/a result = 0
130n/a w = 1
131n/a j = 0
132n/a while 1:
133n/a try:
134n/a char = ord(extended[extpos])
135n/a except IndexError:
136n/a if errors == "strict":
137n/a raise UnicodeError("incomplete punicode string")
138n/a return extpos + 1, None
139n/a extpos += 1
140n/a if 0x41 <= char <= 0x5A: # A-Z
141n/a digit = char - 0x41
142n/a elif 0x30 <= char <= 0x39:
143n/a digit = char - 22 # 0x30-26
144n/a elif errors == "strict":
145n/a raise UnicodeError("Invalid extended code point '%s'"
146n/a % extended[extpos])
147n/a else:
148n/a return extpos, None
149n/a t = T(j, bias)
150n/a result += digit * w
151n/a if digit < t:
152n/a return extpos, result
153n/a w = w * (36 - t)
154n/a j += 1
155n/a
156n/a
157n/adef insertion_sort(base, extended, errors):
158n/a """3.2 Insertion unsort coding"""
159n/a char = 0x80
160n/a pos = -1
161n/a bias = 72
162n/a extpos = 0
163n/a while extpos < len(extended):
164n/a newpos, delta = decode_generalized_number(extended, extpos,
165n/a bias, errors)
166n/a if delta is None:
167n/a # There was an error in decoding. We can't continue because
168n/a # synchronization is lost.
169n/a return base
170n/a pos += delta+1
171n/a char += pos // (len(base) + 1)
172n/a if char > 0x10FFFF:
173n/a if errors == "strict":
174n/a raise UnicodeError("Invalid character U+%x" % char)
175n/a char = ord('?')
176n/a pos = pos % (len(base) + 1)
177n/a base = base[:pos] + chr(char) + base[pos:]
178n/a bias = adapt(delta, (extpos == 0), len(base))
179n/a extpos = newpos
180n/a return base
181n/a
182n/adef punycode_decode(text, errors):
183n/a if isinstance(text, str):
184n/a text = text.encode("ascii")
185n/a if isinstance(text, memoryview):
186n/a text = bytes(text)
187n/a pos = text.rfind(b"-")
188n/a if pos == -1:
189n/a base = ""
190n/a extended = str(text, "ascii").upper()
191n/a else:
192n/a base = str(text[:pos], "ascii", errors)
193n/a extended = str(text[pos+1:], "ascii").upper()
194n/a return insertion_sort(base, extended, errors)
195n/a
196n/a### Codec APIs
197n/a
198n/aclass Codec(codecs.Codec):
199n/a
200n/a def encode(self, input, errors='strict'):
201n/a res = punycode_encode(input)
202n/a return res, len(input)
203n/a
204n/a def decode(self, input, errors='strict'):
205n/a if errors not in ('strict', 'replace', 'ignore'):
206n/a raise UnicodeError("Unsupported error handling "+errors)
207n/a res = punycode_decode(input, errors)
208n/a return res, len(input)
209n/a
210n/aclass IncrementalEncoder(codecs.IncrementalEncoder):
211n/a def encode(self, input, final=False):
212n/a return punycode_encode(input)
213n/a
214n/aclass IncrementalDecoder(codecs.IncrementalDecoder):
215n/a def decode(self, input, final=False):
216n/a if self.errors not in ('strict', 'replace', 'ignore'):
217n/a raise UnicodeError("Unsupported error handling "+self.errors)
218n/a return punycode_decode(input, self.errors)
219n/a
220n/aclass StreamWriter(Codec,codecs.StreamWriter):
221n/a pass
222n/a
223n/aclass StreamReader(Codec,codecs.StreamReader):
224n/a pass
225n/a
226n/a### encodings module API
227n/a
228n/adef getregentry():
229n/a return codecs.CodecInfo(
230n/a name='punycode',
231n/a encode=Codec().encode,
232n/a decode=Codec().decode,
233n/a incrementalencoder=IncrementalEncoder,
234n/a incrementaldecoder=IncrementalDecoder,
235n/a streamwriter=StreamWriter,
236n/a streamreader=StreamReader,
237n/a )