ยปCore Development>Code coverage>Lib/encodings/idna.py

Python code coverage for Lib/encodings/idna.py

#countcontent
1n/a# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
2n/a
3n/aimport stringprep, re, codecs
4n/afrom unicodedata import ucd_3_2_0 as unicodedata
5n/a
6n/a# IDNA section 3.1
7n/adots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
8n/a
9n/a# IDNA section 5
10n/aace_prefix = b"xn--"
11n/asace_prefix = "xn--"
12n/a
13n/a# This assumes query strings, so AllowUnassigned is true
14n/adef nameprep(label):
15n/a # Map
16n/a newlabel = []
17n/a for c in label:
18n/a if stringprep.in_table_b1(c):
19n/a # Map to nothing
20n/a continue
21n/a newlabel.append(stringprep.map_table_b2(c))
22n/a label = "".join(newlabel)
23n/a
24n/a # Normalize
25n/a label = unicodedata.normalize("NFKC", label)
26n/a
27n/a # Prohibit
28n/a for c in label:
29n/a if stringprep.in_table_c12(c) or \
30n/a stringprep.in_table_c22(c) or \
31n/a stringprep.in_table_c3(c) or \
32n/a stringprep.in_table_c4(c) or \
33n/a stringprep.in_table_c5(c) or \
34n/a stringprep.in_table_c6(c) or \
35n/a stringprep.in_table_c7(c) or \
36n/a stringprep.in_table_c8(c) or \
37n/a stringprep.in_table_c9(c):
38n/a raise UnicodeError("Invalid character %r" % c)
39n/a
40n/a # Check bidi
41n/a RandAL = [stringprep.in_table_d1(x) for x in label]
42n/a for c in RandAL:
43n/a if c:
44n/a # There is a RandAL char in the string. Must perform further
45n/a # tests:
46n/a # 1) The characters in section 5.8 MUST be prohibited.
47n/a # This is table C.8, which was already checked
48n/a # 2) If a string contains any RandALCat character, the string
49n/a # MUST NOT contain any LCat character.
50n/a if any(stringprep.in_table_d2(x) for x in label):
51n/a raise UnicodeError("Violation of BIDI requirement 2")
52n/a
53n/a # 3) If a string contains any RandALCat character, a
54n/a # RandALCat character MUST be the first character of the
55n/a # string, and a RandALCat character MUST be the last
56n/a # character of the string.
57n/a if not RandAL[0] or not RandAL[-1]:
58n/a raise UnicodeError("Violation of BIDI requirement 3")
59n/a
60n/a return label
61n/a
62n/adef ToASCII(label):
63n/a try:
64n/a # Step 1: try ASCII
65n/a label = label.encode("ascii")
66n/a except UnicodeError:
67n/a pass
68n/a else:
69n/a # Skip to step 3: UseSTD3ASCIIRules is false, so
70n/a # Skip to step 8.
71n/a if 0 < len(label) < 64:
72n/a return label
73n/a raise UnicodeError("label empty or too long")
74n/a
75n/a # Step 2: nameprep
76n/a label = nameprep(label)
77n/a
78n/a # Step 3: UseSTD3ASCIIRules is false
79n/a # Step 4: try ASCII
80n/a try:
81n/a label = label.encode("ascii")
82n/a except UnicodeError:
83n/a pass
84n/a else:
85n/a # Skip to step 8.
86n/a if 0 < len(label) < 64:
87n/a return label
88n/a raise UnicodeError("label empty or too long")
89n/a
90n/a # Step 5: Check ACE prefix
91n/a if label.startswith(sace_prefix):
92n/a raise UnicodeError("Label starts with ACE prefix")
93n/a
94n/a # Step 6: Encode with PUNYCODE
95n/a label = label.encode("punycode")
96n/a
97n/a # Step 7: Prepend ACE prefix
98n/a label = ace_prefix + label
99n/a
100n/a # Step 8: Check size
101n/a if 0 < len(label) < 64:
102n/a return label
103n/a raise UnicodeError("label empty or too long")
104n/a
105n/adef ToUnicode(label):
106n/a # Step 1: Check for ASCII
107n/a if isinstance(label, bytes):
108n/a pure_ascii = True
109n/a else:
110n/a try:
111n/a label = label.encode("ascii")
112n/a pure_ascii = True
113n/a except UnicodeError:
114n/a pure_ascii = False
115n/a if not pure_ascii:
116n/a # Step 2: Perform nameprep
117n/a label = nameprep(label)
118n/a # It doesn't say this, but apparently, it should be ASCII now
119n/a try:
120n/a label = label.encode("ascii")
121n/a except UnicodeError:
122n/a raise UnicodeError("Invalid character in IDN label")
123n/a # Step 3: Check for ACE prefix
124n/a if not label.startswith(ace_prefix):
125n/a return str(label, "ascii")
126n/a
127n/a # Step 4: Remove ACE prefix
128n/a label1 = label[len(ace_prefix):]
129n/a
130n/a # Step 5: Decode using PUNYCODE
131n/a result = label1.decode("punycode")
132n/a
133n/a # Step 6: Apply ToASCII
134n/a label2 = ToASCII(result)
135n/a
136n/a # Step 7: Compare the result of step 6 with the one of step 3
137n/a # label2 will already be in lower case.
138n/a if str(label, "ascii").lower() != str(label2, "ascii"):
139n/a raise UnicodeError("IDNA does not round-trip", label, label2)
140n/a
141n/a # Step 8: return the result of step 5
142n/a return result
143n/a
144n/a### Codec APIs
145n/a
146n/aclass Codec(codecs.Codec):
147n/a def encode(self, input, errors='strict'):
148n/a
149n/a if errors != 'strict':
150n/a # IDNA is quite clear that implementations must be strict
151n/a raise UnicodeError("unsupported error handling "+errors)
152n/a
153n/a if not input:
154n/a return b'', 0
155n/a
156n/a try:
157n/a result = input.encode('ascii')
158n/a except UnicodeEncodeError:
159n/a pass
160n/a else:
161n/a # ASCII name: fast path
162n/a labels = result.split(b'.')
163n/a for label in labels[:-1]:
164n/a if not (0 < len(label) < 64):
165n/a raise UnicodeError("label empty or too long")
166n/a if len(labels[-1]) >= 64:
167n/a raise UnicodeError("label too long")
168n/a return result, len(input)
169n/a
170n/a result = bytearray()
171n/a labels = dots.split(input)
172n/a if labels and not labels[-1]:
173n/a trailing_dot = b'.'
174n/a del labels[-1]
175n/a else:
176n/a trailing_dot = b''
177n/a for label in labels:
178n/a if result:
179n/a # Join with U+002E
180n/a result.extend(b'.')
181n/a result.extend(ToASCII(label))
182n/a return bytes(result+trailing_dot), len(input)
183n/a
184n/a def decode(self, input, errors='strict'):
185n/a
186n/a if errors != 'strict':
187n/a raise UnicodeError("Unsupported error handling "+errors)
188n/a
189n/a if not input:
190n/a return "", 0
191n/a
192n/a # IDNA allows decoding to operate on Unicode strings, too.
193n/a if not isinstance(input, bytes):
194n/a # XXX obviously wrong, see #3232
195n/a input = bytes(input)
196n/a
197n/a if ace_prefix not in input:
198n/a # Fast path
199n/a try:
200n/a return input.decode('ascii'), len(input)
201n/a except UnicodeDecodeError:
202n/a pass
203n/a
204n/a labels = input.split(b".")
205n/a
206n/a if labels and len(labels[-1]) == 0:
207n/a trailing_dot = '.'
208n/a del labels[-1]
209n/a else:
210n/a trailing_dot = ''
211n/a
212n/a result = []
213n/a for label in labels:
214n/a result.append(ToUnicode(label))
215n/a
216n/a return ".".join(result)+trailing_dot, len(input)
217n/a
218n/aclass IncrementalEncoder(codecs.BufferedIncrementalEncoder):
219n/a def _buffer_encode(self, input, errors, final):
220n/a if errors != 'strict':
221n/a # IDNA is quite clear that implementations must be strict
222n/a raise UnicodeError("unsupported error handling "+errors)
223n/a
224n/a if not input:
225n/a return (b'', 0)
226n/a
227n/a labels = dots.split(input)
228n/a trailing_dot = b''
229n/a if labels:
230n/a if not labels[-1]:
231n/a trailing_dot = b'.'
232n/a del labels[-1]
233n/a elif not final:
234n/a # Keep potentially unfinished label until the next call
235n/a del labels[-1]
236n/a if labels:
237n/a trailing_dot = b'.'
238n/a
239n/a result = bytearray()
240n/a size = 0
241n/a for label in labels:
242n/a if size:
243n/a # Join with U+002E
244n/a result.extend(b'.')
245n/a size += 1
246n/a result.extend(ToASCII(label))
247n/a size += len(label)
248n/a
249n/a result += trailing_dot
250n/a size += len(trailing_dot)
251n/a return (bytes(result), size)
252n/a
253n/aclass IncrementalDecoder(codecs.BufferedIncrementalDecoder):
254n/a def _buffer_decode(self, input, errors, final):
255n/a if errors != 'strict':
256n/a raise UnicodeError("Unsupported error handling "+errors)
257n/a
258n/a if not input:
259n/a return ("", 0)
260n/a
261n/a # IDNA allows decoding to operate on Unicode strings, too.
262n/a if isinstance(input, str):
263n/a labels = dots.split(input)
264n/a else:
265n/a # Must be ASCII string
266n/a input = str(input, "ascii")
267n/a labels = input.split(".")
268n/a
269n/a trailing_dot = ''
270n/a if labels:
271n/a if not labels[-1]:
272n/a trailing_dot = '.'
273n/a del labels[-1]
274n/a elif not final:
275n/a # Keep potentially unfinished label until the next call
276n/a del labels[-1]
277n/a if labels:
278n/a trailing_dot = '.'
279n/a
280n/a result = []
281n/a size = 0
282n/a for label in labels:
283n/a result.append(ToUnicode(label))
284n/a if size:
285n/a size += 1
286n/a size += len(label)
287n/a
288n/a result = ".".join(result) + trailing_dot
289n/a size += len(trailing_dot)
290n/a return (result, size)
291n/a
292n/aclass StreamWriter(Codec,codecs.StreamWriter):
293n/a pass
294n/a
295n/aclass StreamReader(Codec,codecs.StreamReader):
296n/a pass
297n/a
298n/a### encodings module API
299n/a
300n/adef getregentry():
301n/a return codecs.CodecInfo(
302n/a name='idna',
303n/a encode=Codec().encode,
304n/a decode=Codec().decode,
305n/a incrementalencoder=IncrementalEncoder,
306n/a incrementaldecoder=IncrementalDecoder,
307n/a streamwriter=StreamWriter,
308n/a streamreader=StreamReader,
309n/a )