1 | n/a | from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \ |
---|
2 | n/a | cpython_only, captured_stdout |
---|
3 | n/a | import locale |
---|
4 | n/a | import re |
---|
5 | n/a | import sre_compile |
---|
6 | n/a | import string |
---|
7 | n/a | import unittest |
---|
8 | n/a | import warnings |
---|
9 | n/a | from re import Scanner |
---|
10 | n/a | from weakref import proxy |
---|
11 | n/a | |
---|
12 | n/a | # Misc tests from Tim Peters' re.doc |
---|
13 | n/a | |
---|
14 | n/a | # WARNING: Don't change details in these tests if you don't know |
---|
15 | n/a | # what you're doing. Some of these tests were carefully modeled to |
---|
16 | n/a | # cover most of the code. |
---|
17 | n/a | |
---|
18 | n/a | class S(str): |
---|
19 | n/a | def __getitem__(self, index): |
---|
20 | n/a | return S(super().__getitem__(index)) |
---|
21 | n/a | |
---|
22 | n/a | class B(bytes): |
---|
23 | n/a | def __getitem__(self, index): |
---|
24 | n/a | return B(super().__getitem__(index)) |
---|
25 | n/a | |
---|
26 | n/a | class ReTests(unittest.TestCase): |
---|
27 | n/a | |
---|
28 | n/a | def assertTypedEqual(self, actual, expect, msg=None): |
---|
29 | n/a | self.assertEqual(actual, expect, msg) |
---|
30 | n/a | def recurse(actual, expect): |
---|
31 | n/a | if isinstance(expect, (tuple, list)): |
---|
32 | n/a | for x, y in zip(actual, expect): |
---|
33 | n/a | recurse(x, y) |
---|
34 | n/a | else: |
---|
35 | n/a | self.assertIs(type(actual), type(expect), msg) |
---|
36 | n/a | recurse(actual, expect) |
---|
37 | n/a | |
---|
38 | n/a | def checkPatternError(self, pattern, errmsg, pos=None): |
---|
39 | n/a | with self.assertRaises(re.error) as cm: |
---|
40 | n/a | re.compile(pattern) |
---|
41 | n/a | with self.subTest(pattern=pattern): |
---|
42 | n/a | err = cm.exception |
---|
43 | n/a | self.assertEqual(err.msg, errmsg) |
---|
44 | n/a | if pos is not None: |
---|
45 | n/a | self.assertEqual(err.pos, pos) |
---|
46 | n/a | |
---|
47 | n/a | def checkTemplateError(self, pattern, repl, string, errmsg, pos=None): |
---|
48 | n/a | with self.assertRaises(re.error) as cm: |
---|
49 | n/a | re.sub(pattern, repl, string) |
---|
50 | n/a | with self.subTest(pattern=pattern, repl=repl): |
---|
51 | n/a | err = cm.exception |
---|
52 | n/a | self.assertEqual(err.msg, errmsg) |
---|
53 | n/a | if pos is not None: |
---|
54 | n/a | self.assertEqual(err.pos, pos) |
---|
55 | n/a | |
---|
56 | n/a | def test_keep_buffer(self): |
---|
57 | n/a | # See bug 14212 |
---|
58 | n/a | b = bytearray(b'x') |
---|
59 | n/a | it = re.finditer(b'a', b) |
---|
60 | n/a | with self.assertRaises(BufferError): |
---|
61 | n/a | b.extend(b'x'*400) |
---|
62 | n/a | list(it) |
---|
63 | n/a | del it |
---|
64 | n/a | gc_collect() |
---|
65 | n/a | b.extend(b'x'*400) |
---|
66 | n/a | |
---|
67 | n/a | def test_weakref(self): |
---|
68 | n/a | s = 'QabbbcR' |
---|
69 | n/a | x = re.compile('ab+c') |
---|
70 | n/a | y = proxy(x) |
---|
71 | n/a | self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR')) |
---|
72 | n/a | |
---|
73 | n/a | def test_search_star_plus(self): |
---|
74 | n/a | self.assertEqual(re.search('x*', 'axx').span(0), (0, 0)) |
---|
75 | n/a | self.assertEqual(re.search('x*', 'axx').span(), (0, 0)) |
---|
76 | n/a | self.assertEqual(re.search('x+', 'axx').span(0), (1, 3)) |
---|
77 | n/a | self.assertEqual(re.search('x+', 'axx').span(), (1, 3)) |
---|
78 | n/a | self.assertIsNone(re.search('x', 'aaa')) |
---|
79 | n/a | self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0)) |
---|
80 | n/a | self.assertEqual(re.match('a*', 'xxx').span(), (0, 0)) |
---|
81 | n/a | self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3)) |
---|
82 | n/a | self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3)) |
---|
83 | n/a | self.assertIsNone(re.match('a+', 'xxx')) |
---|
84 | n/a | |
---|
85 | n/a | def bump_num(self, matchobj): |
---|
86 | n/a | int_value = int(matchobj.group(0)) |
---|
87 | n/a | return str(int_value + 1) |
---|
88 | n/a | |
---|
89 | n/a | def test_basic_re_sub(self): |
---|
90 | n/a | self.assertTypedEqual(re.sub('y', 'a', 'xyz'), 'xaz') |
---|
91 | n/a | self.assertTypedEqual(re.sub('y', S('a'), S('xyz')), 'xaz') |
---|
92 | n/a | self.assertTypedEqual(re.sub(b'y', b'a', b'xyz'), b'xaz') |
---|
93 | n/a | self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz') |
---|
94 | n/a | self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz') |
---|
95 | n/a | self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')), b'xaz') |
---|
96 | n/a | for y in ("\xe0", "\u0430", "\U0001d49c"): |
---|
97 | n/a | self.assertEqual(re.sub(y, 'a', 'x%sz' % y), 'xaz') |
---|
98 | n/a | |
---|
99 | n/a | self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') |
---|
100 | n/a | self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), |
---|
101 | n/a | '9.3 -3 24x100y') |
---|
102 | n/a | self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), |
---|
103 | n/a | '9.3 -3 23x99y') |
---|
104 | n/a | self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3), |
---|
105 | n/a | '9.3 -3 23x99y') |
---|
106 | n/a | |
---|
107 | n/a | self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n') |
---|
108 | n/a | self.assertEqual(re.sub('.', r"\n", 'x'), '\n') |
---|
109 | n/a | |
---|
110 | n/a | s = r"\1\1" |
---|
111 | n/a | self.assertEqual(re.sub('(.)', s, 'x'), 'xx') |
---|
112 | n/a | self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s) |
---|
113 | n/a | self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s) |
---|
114 | n/a | |
---|
115 | n/a | self.assertEqual(re.sub('(?P<a>x)', r'\g<a>\g<a>', 'xx'), 'xxxx') |
---|
116 | n/a | self.assertEqual(re.sub('(?P<a>x)', r'\g<a>\g<1>', 'xx'), 'xxxx') |
---|
117 | n/a | self.assertEqual(re.sub('(?P<unk>x)', r'\g<unk>\g<unk>', 'xx'), 'xxxx') |
---|
118 | n/a | self.assertEqual(re.sub('(?P<unk>x)', r'\g<1>\g<1>', 'xx'), 'xxxx') |
---|
119 | n/a | |
---|
120 | n/a | self.assertEqual(re.sub('a', r'\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b') |
---|
121 | n/a | self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b') |
---|
122 | n/a | self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'), |
---|
123 | n/a | (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8))) |
---|
124 | n/a | for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ': |
---|
125 | n/a | with self.subTest(c): |
---|
126 | n/a | with self.assertRaises(re.error): |
---|
127 | n/a | self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c) |
---|
128 | n/a | |
---|
129 | n/a | self.assertEqual(re.sub(r'^\s*', 'X', 'test'), 'Xtest') |
---|
130 | n/a | |
---|
131 | n/a | def test_bug_449964(self): |
---|
132 | n/a | # fails for group followed by other escape |
---|
133 | n/a | self.assertEqual(re.sub(r'(?P<unk>x)', r'\g<1>\g<1>\b', 'xx'), |
---|
134 | n/a | 'xx\bxx\b') |
---|
135 | n/a | |
---|
136 | n/a | def test_bug_449000(self): |
---|
137 | n/a | # Test for sub() on escaped characters |
---|
138 | n/a | self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'), |
---|
139 | n/a | 'abc\ndef\n') |
---|
140 | n/a | self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'), |
---|
141 | n/a | 'abc\ndef\n') |
---|
142 | n/a | self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'), |
---|
143 | n/a | 'abc\ndef\n') |
---|
144 | n/a | self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'), |
---|
145 | n/a | 'abc\ndef\n') |
---|
146 | n/a | |
---|
147 | n/a | def test_bug_1661(self): |
---|
148 | n/a | # Verify that flags do not get silently ignored with compiled patterns |
---|
149 | n/a | pattern = re.compile('.') |
---|
150 | n/a | self.assertRaises(ValueError, re.match, pattern, 'A', re.I) |
---|
151 | n/a | self.assertRaises(ValueError, re.search, pattern, 'A', re.I) |
---|
152 | n/a | self.assertRaises(ValueError, re.findall, pattern, 'A', re.I) |
---|
153 | n/a | self.assertRaises(ValueError, re.compile, pattern, re.I) |
---|
154 | n/a | |
---|
155 | n/a | def test_bug_3629(self): |
---|
156 | n/a | # A regex that triggered a bug in the sre-code validator |
---|
157 | n/a | re.compile("(?P<quote>)(?(quote))") |
---|
158 | n/a | |
---|
159 | n/a | def test_sub_template_numeric_escape(self): |
---|
160 | n/a | # bug 776311 and friends |
---|
161 | n/a | self.assertEqual(re.sub('x', r'\0', 'x'), '\0') |
---|
162 | n/a | self.assertEqual(re.sub('x', r'\000', 'x'), '\000') |
---|
163 | n/a | self.assertEqual(re.sub('x', r'\001', 'x'), '\001') |
---|
164 | n/a | self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8') |
---|
165 | n/a | self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9') |
---|
166 | n/a | self.assertEqual(re.sub('x', r'\111', 'x'), '\111') |
---|
167 | n/a | self.assertEqual(re.sub('x', r'\117', 'x'), '\117') |
---|
168 | n/a | self.assertEqual(re.sub('x', r'\377', 'x'), '\377') |
---|
169 | n/a | |
---|
170 | n/a | self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111') |
---|
171 | n/a | self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1') |
---|
172 | n/a | |
---|
173 | n/a | self.assertEqual(re.sub('x', r'\00', 'x'), '\x00') |
---|
174 | n/a | self.assertEqual(re.sub('x', r'\07', 'x'), '\x07') |
---|
175 | n/a | self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8') |
---|
176 | n/a | self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9') |
---|
177 | n/a | self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a') |
---|
178 | n/a | |
---|
179 | n/a | self.checkTemplateError('x', r'\400', 'x', |
---|
180 | n/a | r'octal escape value \400 outside of ' |
---|
181 | n/a | r'range 0-0o377', 0) |
---|
182 | n/a | self.checkTemplateError('x', r'\777', 'x', |
---|
183 | n/a | r'octal escape value \777 outside of ' |
---|
184 | n/a | r'range 0-0o377', 0) |
---|
185 | n/a | |
---|
186 | n/a | self.checkTemplateError('x', r'\1', 'x', 'invalid group reference 1', 1) |
---|
187 | n/a | self.checkTemplateError('x', r'\8', 'x', 'invalid group reference 8', 1) |
---|
188 | n/a | self.checkTemplateError('x', r'\9', 'x', 'invalid group reference 9', 1) |
---|
189 | n/a | self.checkTemplateError('x', r'\11', 'x', 'invalid group reference 11', 1) |
---|
190 | n/a | self.checkTemplateError('x', r'\18', 'x', 'invalid group reference 18', 1) |
---|
191 | n/a | self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference 1', 1) |
---|
192 | n/a | self.checkTemplateError('x', r'\90', 'x', 'invalid group reference 90', 1) |
---|
193 | n/a | self.checkTemplateError('x', r'\99', 'x', 'invalid group reference 99', 1) |
---|
194 | n/a | self.checkTemplateError('x', r'\118', 'x', 'invalid group reference 11', 1) |
---|
195 | n/a | self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference 11', 1) |
---|
196 | n/a | self.checkTemplateError('x', r'\181', 'x', 'invalid group reference 18', 1) |
---|
197 | n/a | self.checkTemplateError('x', r'\800', 'x', 'invalid group reference 80', 1) |
---|
198 | n/a | self.checkTemplateError('x', r'\8', '', 'invalid group reference 8', 1) |
---|
199 | n/a | |
---|
200 | n/a | # in python2.3 (etc), these loop endlessly in sre_parser.py |
---|
201 | n/a | self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x') |
---|
202 | n/a | self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'), |
---|
203 | n/a | 'xz8') |
---|
204 | n/a | self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'), |
---|
205 | n/a | 'xza') |
---|
206 | n/a | |
---|
207 | n/a | def test_qualified_re_sub(self): |
---|
208 | n/a | self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb') |
---|
209 | n/a | self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa') |
---|
210 | n/a | self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa') |
---|
211 | n/a | |
---|
212 | n/a | def test_bug_114660(self): |
---|
213 | n/a | self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'), |
---|
214 | n/a | 'hello there') |
---|
215 | n/a | |
---|
216 | n/a | def test_bug_462270(self): |
---|
217 | n/a | # Test for empty sub() behaviour, see SF bug #462270 |
---|
218 | n/a | self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-') |
---|
219 | n/a | self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d') |
---|
220 | n/a | |
---|
221 | n/a | def test_symbolic_groups(self): |
---|
222 | n/a | re.compile(r'(?P<a>x)(?P=a)(?(a)y)') |
---|
223 | n/a | re.compile(r'(?P<a1>x)(?P=a1)(?(a1)y)') |
---|
224 | n/a | re.compile(r'(?P<a1>x)\1(?(1)y)') |
---|
225 | n/a | self.checkPatternError(r'(?P<a>)(?P<a>)', |
---|
226 | n/a | "redefinition of group name 'a' as group 2; " |
---|
227 | n/a | "was group 1") |
---|
228 | n/a | self.checkPatternError(r'(?P<a>(?P=a))', |
---|
229 | n/a | "cannot refer to an open group", 10) |
---|
230 | n/a | self.checkPatternError(r'(?Pxy)', 'unknown extension ?Px') |
---|
231 | n/a | self.checkPatternError(r'(?P<a>)(?P=a', 'missing ), unterminated name', 11) |
---|
232 | n/a | self.checkPatternError(r'(?P=', 'missing group name', 4) |
---|
233 | n/a | self.checkPatternError(r'(?P=)', 'missing group name', 4) |
---|
234 | n/a | self.checkPatternError(r'(?P=1)', "bad character in group name '1'", 4) |
---|
235 | n/a | self.checkPatternError(r'(?P=a)', "unknown group name 'a'") |
---|
236 | n/a | self.checkPatternError(r'(?P=a1)', "unknown group name 'a1'") |
---|
237 | n/a | self.checkPatternError(r'(?P=a.)', "bad character in group name 'a.'", 4) |
---|
238 | n/a | self.checkPatternError(r'(?P<)', 'missing >, unterminated name', 4) |
---|
239 | n/a | self.checkPatternError(r'(?P<a', 'missing >, unterminated name', 4) |
---|
240 | n/a | self.checkPatternError(r'(?P<', 'missing group name', 4) |
---|
241 | n/a | self.checkPatternError(r'(?P<>)', 'missing group name', 4) |
---|
242 | n/a | self.checkPatternError(r'(?P<1>)', "bad character in group name '1'", 4) |
---|
243 | n/a | self.checkPatternError(r'(?P<a.>)', "bad character in group name 'a.'", 4) |
---|
244 | n/a | self.checkPatternError(r'(?(', 'missing group name', 3) |
---|
245 | n/a | self.checkPatternError(r'(?())', 'missing group name', 3) |
---|
246 | n/a | self.checkPatternError(r'(?(a))', "unknown group name 'a'", 3) |
---|
247 | n/a | self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3) |
---|
248 | n/a | self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3) |
---|
249 | n/a | self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3) |
---|
250 | n/a | # New valid/invalid identifiers in Python 3 |
---|
251 | n/a | re.compile('(?P<µ>x)(?P=µ)(?(µ)y)') |
---|
252 | n/a | re.compile('(?P<ðð«ð¦ð ð¬ð¡ð¢>x)(?P=ðð«ð¦ð ð¬ð¡ð¢)(?(ðð«ð¦ð ð¬ð¡ð¢)y)') |
---|
253 | n/a | self.checkPatternError('(?P<©>x)', "bad character in group name '©'", 4) |
---|
254 | n/a | # Support > 100 groups. |
---|
255 | n/a | pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1)) |
---|
256 | n/a | pat = '(?:%s)(?(200)z|t)' % pat |
---|
257 | n/a | self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) |
---|
258 | n/a | |
---|
259 | n/a | def test_symbolic_refs(self): |
---|
260 | n/a | self.checkTemplateError('(?P<a>x)', r'\g<a', 'xx', |
---|
261 | n/a | 'missing >, unterminated name', 3) |
---|
262 | n/a | self.checkTemplateError('(?P<a>x)', r'\g<', 'xx', |
---|
263 | n/a | 'missing group name', 3) |
---|
264 | n/a | self.checkTemplateError('(?P<a>x)', r'\g', 'xx', 'missing <', 2) |
---|
265 | n/a | self.checkTemplateError('(?P<a>x)', r'\g<a a>', 'xx', |
---|
266 | n/a | "bad character in group name 'a a'", 3) |
---|
267 | n/a | self.checkTemplateError('(?P<a>x)', r'\g<>', 'xx', |
---|
268 | n/a | 'missing group name', 3) |
---|
269 | n/a | self.checkTemplateError('(?P<a>x)', r'\g<1a1>', 'xx', |
---|
270 | n/a | "bad character in group name '1a1'", 3) |
---|
271 | n/a | self.checkTemplateError('(?P<a>x)', r'\g<2>', 'xx', |
---|
272 | n/a | 'invalid group reference 2', 3) |
---|
273 | n/a | self.checkTemplateError('(?P<a>x)', r'\2', 'xx', |
---|
274 | n/a | 'invalid group reference 2', 1) |
---|
275 | n/a | with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"): |
---|
276 | n/a | re.sub('(?P<a>x)', r'\g<ab>', 'xx') |
---|
277 | n/a | self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '') |
---|
278 | n/a | self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '') |
---|
279 | n/a | self.checkTemplateError('(?P<a>x)', r'\g<-1>', 'xx', |
---|
280 | n/a | "bad character in group name '-1'", 3) |
---|
281 | n/a | # New valid/invalid identifiers in Python 3 |
---|
282 | n/a | self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx') |
---|
283 | n/a | self.assertEqual(re.sub('(?P<ðð«ð¦ð ð¬ð¡ð¢>x)', r'\g<ðð«ð¦ð ð¬ð¡ð¢>', 'xx'), 'xx') |
---|
284 | n/a | self.checkTemplateError('(?P<a>x)', r'\g<©>', 'xx', |
---|
285 | n/a | "bad character in group name '©'", 3) |
---|
286 | n/a | # Support > 100 groups. |
---|
287 | n/a | pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1)) |
---|
288 | n/a | self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8') |
---|
289 | n/a | |
---|
290 | n/a | def test_re_subn(self): |
---|
291 | n/a | self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) |
---|
292 | n/a | self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1)) |
---|
293 | n/a | self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0)) |
---|
294 | n/a | self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4)) |
---|
295 | n/a | self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2)) |
---|
296 | n/a | self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2)) |
---|
297 | n/a | |
---|
298 | n/a | def test_re_split(self): |
---|
299 | n/a | for string in ":a:b::c", S(":a:b::c"): |
---|
300 | n/a | self.assertTypedEqual(re.split(":", string), |
---|
301 | n/a | ['', 'a', 'b', '', 'c']) |
---|
302 | n/a | self.assertTypedEqual(re.split(":+", string), |
---|
303 | n/a | ['', 'a', 'b', 'c']) |
---|
304 | n/a | self.assertTypedEqual(re.split("(:+)", string), |
---|
305 | n/a | ['', ':', 'a', ':', 'b', '::', 'c']) |
---|
306 | n/a | for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"), |
---|
307 | n/a | memoryview(b":a:b::c")): |
---|
308 | n/a | self.assertTypedEqual(re.split(b":", string), |
---|
309 | n/a | [b'', b'a', b'b', b'', b'c']) |
---|
310 | n/a | self.assertTypedEqual(re.split(b":+", string), |
---|
311 | n/a | [b'', b'a', b'b', b'c']) |
---|
312 | n/a | self.assertTypedEqual(re.split(b"(:+)", string), |
---|
313 | n/a | [b'', b':', b'a', b':', b'b', b'::', b'c']) |
---|
314 | n/a | for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432", |
---|
315 | n/a | "\U0001d49c\U0001d49e\U0001d4b5"): |
---|
316 | n/a | string = ":%s:%s::%s" % (a, b, c) |
---|
317 | n/a | self.assertEqual(re.split(":", string), ['', a, b, '', c]) |
---|
318 | n/a | self.assertEqual(re.split(":+", string), ['', a, b, c]) |
---|
319 | n/a | self.assertEqual(re.split("(:+)", string), |
---|
320 | n/a | ['', ':', a, ':', b, '::', c]) |
---|
321 | n/a | |
---|
322 | n/a | self.assertEqual(re.split("(?::+)", ":a:b::c"), ['', 'a', 'b', 'c']) |
---|
323 | n/a | self.assertEqual(re.split("(:)+", ":a:b::c"), |
---|
324 | n/a | ['', ':', 'a', ':', 'b', ':', 'c']) |
---|
325 | n/a | self.assertEqual(re.split("([b:]+)", ":a:b::c"), |
---|
326 | n/a | ['', ':', 'a', ':b::', 'c']) |
---|
327 | n/a | self.assertEqual(re.split("(b)|(:+)", ":a:b::c"), |
---|
328 | n/a | ['', None, ':', 'a', None, ':', '', 'b', None, '', |
---|
329 | n/a | None, '::', 'c']) |
---|
330 | n/a | self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"), |
---|
331 | n/a | ['', 'a', '', '', 'c']) |
---|
332 | n/a | |
---|
333 | n/a | for sep, expected in [ |
---|
334 | n/a | (':*', ['', 'a', 'b', 'c']), |
---|
335 | n/a | ('(?::*)', ['', 'a', 'b', 'c']), |
---|
336 | n/a | ('(:*)', ['', ':', 'a', ':', 'b', '::', 'c']), |
---|
337 | n/a | ('(:)*', ['', ':', 'a', ':', 'b', ':', 'c']), |
---|
338 | n/a | ]: |
---|
339 | n/a | with self.subTest(sep=sep), self.assertWarns(FutureWarning): |
---|
340 | n/a | self.assertTypedEqual(re.split(sep, ':a:b::c'), expected) |
---|
341 | n/a | |
---|
342 | n/a | for sep, expected in [ |
---|
343 | n/a | ('', [':a:b::c']), |
---|
344 | n/a | (r'\b', [':a:b::c']), |
---|
345 | n/a | (r'(?=:)', [':a:b::c']), |
---|
346 | n/a | (r'(?<=:)', [':a:b::c']), |
---|
347 | n/a | ]: |
---|
348 | n/a | with self.subTest(sep=sep), self.assertRaises(ValueError): |
---|
349 | n/a | self.assertTypedEqual(re.split(sep, ':a:b::c'), expected) |
---|
350 | n/a | |
---|
351 | n/a | def test_qualified_re_split(self): |
---|
352 | n/a | self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c']) |
---|
353 | n/a | self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c']) |
---|
354 | n/a | self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d']) |
---|
355 | n/a | self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2), |
---|
356 | n/a | ['', ':', 'a', ':', 'b::c']) |
---|
357 | n/a | self.assertEqual(re.split("(:+)", ":a:b::c", maxsplit=2), |
---|
358 | n/a | ['', ':', 'a', ':', 'b::c']) |
---|
359 | n/a | with self.assertWarns(FutureWarning): |
---|
360 | n/a | self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2), |
---|
361 | n/a | ['', ':', 'a', ':', 'b::c']) |
---|
362 | n/a | |
---|
363 | n/a | def test_re_findall(self): |
---|
364 | n/a | self.assertEqual(re.findall(":+", "abc"), []) |
---|
365 | n/a | for string in "a:b::c:::d", S("a:b::c:::d"): |
---|
366 | n/a | self.assertTypedEqual(re.findall(":+", string), |
---|
367 | n/a | [":", "::", ":::"]) |
---|
368 | n/a | self.assertTypedEqual(re.findall("(:+)", string), |
---|
369 | n/a | [":", "::", ":::"]) |
---|
370 | n/a | self.assertTypedEqual(re.findall("(:)(:*)", string), |
---|
371 | n/a | [(":", ""), (":", ":"), (":", "::")]) |
---|
372 | n/a | for string in (b"a:b::c:::d", B(b"a:b::c:::d"), bytearray(b"a:b::c:::d"), |
---|
373 | n/a | memoryview(b"a:b::c:::d")): |
---|
374 | n/a | self.assertTypedEqual(re.findall(b":+", string), |
---|
375 | n/a | [b":", b"::", b":::"]) |
---|
376 | n/a | self.assertTypedEqual(re.findall(b"(:+)", string), |
---|
377 | n/a | [b":", b"::", b":::"]) |
---|
378 | n/a | self.assertTypedEqual(re.findall(b"(:)(:*)", string), |
---|
379 | n/a | [(b":", b""), (b":", b":"), (b":", b"::")]) |
---|
380 | n/a | for x in ("\xe0", "\u0430", "\U0001d49c"): |
---|
381 | n/a | xx = x * 2 |
---|
382 | n/a | xxx = x * 3 |
---|
383 | n/a | string = "a%sb%sc%sd" % (x, xx, xxx) |
---|
384 | n/a | self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx]) |
---|
385 | n/a | self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx]) |
---|
386 | n/a | self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string), |
---|
387 | n/a | [(x, ""), (x, x), (x, xx)]) |
---|
388 | n/a | |
---|
389 | n/a | def test_bug_117612(self): |
---|
390 | n/a | self.assertEqual(re.findall(r"(a|(b))", "aba"), |
---|
391 | n/a | [("a", ""),("b", "b"),("a", "")]) |
---|
392 | n/a | |
---|
393 | n/a | def test_re_match(self): |
---|
394 | n/a | for string in 'a', S('a'): |
---|
395 | n/a | self.assertEqual(re.match('a', string).groups(), ()) |
---|
396 | n/a | self.assertEqual(re.match('(a)', string).groups(), ('a',)) |
---|
397 | n/a | self.assertEqual(re.match('(a)', string).group(0), 'a') |
---|
398 | n/a | self.assertEqual(re.match('(a)', string).group(1), 'a') |
---|
399 | n/a | self.assertEqual(re.match('(a)', string).group(1, 1), ('a', 'a')) |
---|
400 | n/a | for string in b'a', B(b'a'), bytearray(b'a'), memoryview(b'a'): |
---|
401 | n/a | self.assertEqual(re.match(b'a', string).groups(), ()) |
---|
402 | n/a | self.assertEqual(re.match(b'(a)', string).groups(), (b'a',)) |
---|
403 | n/a | self.assertEqual(re.match(b'(a)', string).group(0), b'a') |
---|
404 | n/a | self.assertEqual(re.match(b'(a)', string).group(1), b'a') |
---|
405 | n/a | self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a')) |
---|
406 | n/a | for a in ("\xe0", "\u0430", "\U0001d49c"): |
---|
407 | n/a | self.assertEqual(re.match(a, a).groups(), ()) |
---|
408 | n/a | self.assertEqual(re.match('(%s)' % a, a).groups(), (a,)) |
---|
409 | n/a | self.assertEqual(re.match('(%s)' % a, a).group(0), a) |
---|
410 | n/a | self.assertEqual(re.match('(%s)' % a, a).group(1), a) |
---|
411 | n/a | self.assertEqual(re.match('(%s)' % a, a).group(1, 1), (a, a)) |
---|
412 | n/a | |
---|
413 | n/a | pat = re.compile('((a)|(b))(c)?') |
---|
414 | n/a | self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None)) |
---|
415 | n/a | self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None)) |
---|
416 | n/a | self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c')) |
---|
417 | n/a | self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c')) |
---|
418 | n/a | self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c')) |
---|
419 | n/a | |
---|
420 | n/a | pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?') |
---|
421 | n/a | self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None)) |
---|
422 | n/a | self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), |
---|
423 | n/a | (None, 'b', None)) |
---|
424 | n/a | self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c')) |
---|
425 | n/a | |
---|
426 | n/a | def test_group(self): |
---|
427 | n/a | class Index: |
---|
428 | n/a | def __init__(self, value): |
---|
429 | n/a | self.value = value |
---|
430 | n/a | def __index__(self): |
---|
431 | n/a | return self.value |
---|
432 | n/a | # A single group |
---|
433 | n/a | m = re.match('(a)(b)', 'ab') |
---|
434 | n/a | self.assertEqual(m.group(), 'ab') |
---|
435 | n/a | self.assertEqual(m.group(0), 'ab') |
---|
436 | n/a | self.assertEqual(m.group(1), 'a') |
---|
437 | n/a | self.assertEqual(m.group(Index(1)), 'a') |
---|
438 | n/a | self.assertRaises(IndexError, m.group, -1) |
---|
439 | n/a | self.assertRaises(IndexError, m.group, 3) |
---|
440 | n/a | self.assertRaises(IndexError, m.group, 1<<1000) |
---|
441 | n/a | self.assertRaises(IndexError, m.group, Index(1<<1000)) |
---|
442 | n/a | self.assertRaises(IndexError, m.group, 'x') |
---|
443 | n/a | # Multiple groups |
---|
444 | n/a | self.assertEqual(m.group(2, 1), ('b', 'a')) |
---|
445 | n/a | self.assertEqual(m.group(Index(2), Index(1)), ('b', 'a')) |
---|
446 | n/a | |
---|
447 | n/a | def test_match_getitem(self): |
---|
448 | n/a | pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?') |
---|
449 | n/a | |
---|
450 | n/a | m = pat.match('a') |
---|
451 | n/a | self.assertEqual(m['a1'], 'a') |
---|
452 | n/a | self.assertEqual(m['b2'], None) |
---|
453 | n/a | self.assertEqual(m['c3'], None) |
---|
454 | n/a | self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=None') |
---|
455 | n/a | self.assertEqual(m[0], 'a') |
---|
456 | n/a | self.assertEqual(m[1], 'a') |
---|
457 | n/a | self.assertEqual(m[2], None) |
---|
458 | n/a | self.assertEqual(m[3], None) |
---|
459 | n/a | with self.assertRaisesRegex(IndexError, 'no such group'): |
---|
460 | n/a | m['X'] |
---|
461 | n/a | with self.assertRaisesRegex(IndexError, 'no such group'): |
---|
462 | n/a | m[-1] |
---|
463 | n/a | with self.assertRaisesRegex(IndexError, 'no such group'): |
---|
464 | n/a | m[4] |
---|
465 | n/a | with self.assertRaisesRegex(IndexError, 'no such group'): |
---|
466 | n/a | m[0, 1] |
---|
467 | n/a | with self.assertRaisesRegex(IndexError, 'no such group'): |
---|
468 | n/a | m[(0,)] |
---|
469 | n/a | with self.assertRaisesRegex(IndexError, 'no such group'): |
---|
470 | n/a | m[(0, 1)] |
---|
471 | n/a | with self.assertRaisesRegex(KeyError, 'a2'): |
---|
472 | n/a | 'a1={a2}'.format_map(m) |
---|
473 | n/a | |
---|
474 | n/a | m = pat.match('ac') |
---|
475 | n/a | self.assertEqual(m['a1'], 'a') |
---|
476 | n/a | self.assertEqual(m['b2'], None) |
---|
477 | n/a | self.assertEqual(m['c3'], 'c') |
---|
478 | n/a | self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=c') |
---|
479 | n/a | self.assertEqual(m[0], 'ac') |
---|
480 | n/a | self.assertEqual(m[1], 'a') |
---|
481 | n/a | self.assertEqual(m[2], None) |
---|
482 | n/a | self.assertEqual(m[3], 'c') |
---|
483 | n/a | |
---|
484 | n/a | # Cannot assign. |
---|
485 | n/a | with self.assertRaises(TypeError): |
---|
486 | n/a | m[0] = 1 |
---|
487 | n/a | |
---|
488 | n/a | # No len(). |
---|
489 | n/a | self.assertRaises(TypeError, len, m) |
---|
490 | n/a | |
---|
491 | n/a | def test_re_fullmatch(self): |
---|
492 | n/a | # Issue 16203: Proposal: add re.fullmatch() method. |
---|
493 | n/a | self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1)) |
---|
494 | n/a | for string in "ab", S("ab"): |
---|
495 | n/a | self.assertEqual(re.fullmatch(r"a|ab", string).span(), (0, 2)) |
---|
496 | n/a | for string in b"ab", B(b"ab"), bytearray(b"ab"), memoryview(b"ab"): |
---|
497 | n/a | self.assertEqual(re.fullmatch(br"a|ab", string).span(), (0, 2)) |
---|
498 | n/a | for a, b in "\xe0\xdf", "\u0430\u0431", "\U0001d49c\U0001d49e": |
---|
499 | n/a | r = r"%s|%s" % (a, a + b) |
---|
500 | n/a | self.assertEqual(re.fullmatch(r, a + b).span(), (0, 2)) |
---|
501 | n/a | self.assertEqual(re.fullmatch(r".*?$", "abc").span(), (0, 3)) |
---|
502 | n/a | self.assertEqual(re.fullmatch(r".*?", "abc").span(), (0, 3)) |
---|
503 | n/a | self.assertEqual(re.fullmatch(r"a.*?b", "ab").span(), (0, 2)) |
---|
504 | n/a | self.assertEqual(re.fullmatch(r"a.*?b", "abb").span(), (0, 3)) |
---|
505 | n/a | self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4)) |
---|
506 | n/a | self.assertIsNone(re.fullmatch(r"a+", "ab")) |
---|
507 | n/a | self.assertIsNone(re.fullmatch(r"abc$", "abc\n")) |
---|
508 | n/a | self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n")) |
---|
509 | n/a | self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n")) |
---|
510 | n/a | self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4)) |
---|
511 | n/a | self.assertEqual(re.fullmatch(r"ab(?<=b)cd", "abcd").span(), (0, 4)) |
---|
512 | n/a | self.assertEqual(re.fullmatch(r"(?=a|ab)ab", "ab").span(), (0, 2)) |
---|
513 | n/a | |
---|
514 | n/a | self.assertEqual( |
---|
515 | n/a | re.compile(r"bc").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3)) |
---|
516 | n/a | self.assertEqual( |
---|
517 | n/a | re.compile(r".*?$").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3)) |
---|
518 | n/a | self.assertEqual( |
---|
519 | n/a | re.compile(r".*?").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3)) |
---|
520 | n/a | |
---|
521 | n/a | def test_re_groupref_exists(self): |
---|
522 | n/a | self.assertEqual(re.match(r'^(\()?([^()]+)(?(1)\))$', '(a)').groups(), |
---|
523 | n/a | ('(', 'a')) |
---|
524 | n/a | self.assertEqual(re.match(r'^(\()?([^()]+)(?(1)\))$', 'a').groups(), |
---|
525 | n/a | (None, 'a')) |
---|
526 | n/a | self.assertIsNone(re.match(r'^(\()?([^()]+)(?(1)\))$', 'a)')) |
---|
527 | n/a | self.assertIsNone(re.match(r'^(\()?([^()]+)(?(1)\))$', '(a')) |
---|
528 | n/a | self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(), |
---|
529 | n/a | ('a', 'b')) |
---|
530 | n/a | self.assertEqual(re.match(r'^(?:(a)|c)((?(1)b|d))$', 'cd').groups(), |
---|
531 | n/a | (None, 'd')) |
---|
532 | n/a | self.assertEqual(re.match(r'^(?:(a)|c)((?(1)|d))$', 'cd').groups(), |
---|
533 | n/a | (None, 'd')) |
---|
534 | n/a | self.assertEqual(re.match(r'^(?:(a)|c)((?(1)|d))$', 'a').groups(), |
---|
535 | n/a | ('a', '')) |
---|
536 | n/a | |
---|
537 | n/a | # Tests for bug #1177831: exercise groups other than the first group |
---|
538 | n/a | p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))') |
---|
539 | n/a | self.assertEqual(p.match('abc').groups(), |
---|
540 | n/a | ('a', 'b', 'c')) |
---|
541 | n/a | self.assertEqual(p.match('ad').groups(), |
---|
542 | n/a | ('a', None, 'd')) |
---|
543 | n/a | self.assertIsNone(p.match('abd')) |
---|
544 | n/a | self.assertIsNone(p.match('ac')) |
---|
545 | n/a | |
---|
546 | n/a | # Support > 100 groups. |
---|
547 | n/a | pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1)) |
---|
548 | n/a | pat = '(?:%s)(?(200)z)' % pat |
---|
549 | n/a | self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5)) |
---|
550 | n/a | |
---|
551 | n/a | self.checkPatternError(r'(?P<a>)(?(0))', 'bad group number', 10) |
---|
552 | n/a | self.checkPatternError(r'()(?(1)a|b', |
---|
553 | n/a | 'missing ), unterminated subpattern', 2) |
---|
554 | n/a | self.checkPatternError(r'()(?(1)a|b|c)', |
---|
555 | n/a | 'conditional backref with more than ' |
---|
556 | n/a | 'two branches', 10) |
---|
557 | n/a | |
---|
558 | n/a | def test_re_groupref_overflow(self): |
---|
559 | n/a | from sre_constants import MAXGROUPS |
---|
560 | n/a | self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx', |
---|
561 | n/a | 'invalid group reference %d' % MAXGROUPS, 3) |
---|
562 | n/a | self.checkPatternError(r'(?P<a>)(?(%d))' % MAXGROUPS, |
---|
563 | n/a | 'invalid group reference %d' % MAXGROUPS, 10) |
---|
564 | n/a | |
---|
565 | n/a | def test_re_groupref(self): |
---|
566 | n/a | self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(), |
---|
567 | n/a | ('|', 'a')) |
---|
568 | n/a | self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(), |
---|
569 | n/a | (None, 'a')) |
---|
570 | n/a | self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', 'a|')) |
---|
571 | n/a | self.assertIsNone(re.match(r'^(\|)?([^()]+)\1$', '|a')) |
---|
572 | n/a | self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(), |
---|
573 | n/a | ('a', 'a')) |
---|
574 | n/a | self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(), |
---|
575 | n/a | (None, None)) |
---|
576 | n/a | |
---|
577 | n/a | self.checkPatternError(r'(abc\1)', 'cannot refer to an open group', 4) |
---|
578 | n/a | |
---|
579 | n/a | def test_groupdict(self): |
---|
580 | n/a | self.assertEqual(re.match('(?P<first>first) (?P<second>second)', |
---|
581 | n/a | 'first second').groupdict(), |
---|
582 | n/a | {'first':'first', 'second':'second'}) |
---|
583 | n/a | |
---|
584 | n/a | def test_expand(self): |
---|
585 | n/a | self.assertEqual(re.match("(?P<first>first) (?P<second>second)", |
---|
586 | n/a | "first second") |
---|
587 | n/a | .expand(r"\2 \1 \g<second> \g<first>"), |
---|
588 | n/a | "second first second first") |
---|
589 | n/a | self.assertEqual(re.match("(?P<first>first)|(?P<second>second)", |
---|
590 | n/a | "first") |
---|
591 | n/a | .expand(r"\2 \g<second>"), |
---|
592 | n/a | " ") |
---|
593 | n/a | |
---|
594 | n/a | def test_repeat_minmax(self): |
---|
595 | n/a | self.assertIsNone(re.match(r"^(\w){1}$", "abc")) |
---|
596 | n/a | self.assertIsNone(re.match(r"^(\w){1}?$", "abc")) |
---|
597 | n/a | self.assertIsNone(re.match(r"^(\w){1,2}$", "abc")) |
---|
598 | n/a | self.assertIsNone(re.match(r"^(\w){1,2}?$", "abc")) |
---|
599 | n/a | |
---|
600 | n/a | self.assertEqual(re.match(r"^(\w){3}$", "abc").group(1), "c") |
---|
601 | n/a | self.assertEqual(re.match(r"^(\w){1,3}$", "abc").group(1), "c") |
---|
602 | n/a | self.assertEqual(re.match(r"^(\w){1,4}$", "abc").group(1), "c") |
---|
603 | n/a | self.assertEqual(re.match(r"^(\w){3,4}?$", "abc").group(1), "c") |
---|
604 | n/a | self.assertEqual(re.match(r"^(\w){3}?$", "abc").group(1), "c") |
---|
605 | n/a | self.assertEqual(re.match(r"^(\w){1,3}?$", "abc").group(1), "c") |
---|
606 | n/a | self.assertEqual(re.match(r"^(\w){1,4}?$", "abc").group(1), "c") |
---|
607 | n/a | self.assertEqual(re.match(r"^(\w){3,4}?$", "abc").group(1), "c") |
---|
608 | n/a | |
---|
609 | n/a | self.assertIsNone(re.match(r"^x{1}$", "xxx")) |
---|
610 | n/a | self.assertIsNone(re.match(r"^x{1}?$", "xxx")) |
---|
611 | n/a | self.assertIsNone(re.match(r"^x{1,2}$", "xxx")) |
---|
612 | n/a | self.assertIsNone(re.match(r"^x{1,2}?$", "xxx")) |
---|
613 | n/a | |
---|
614 | n/a | self.assertTrue(re.match(r"^x{3}$", "xxx")) |
---|
615 | n/a | self.assertTrue(re.match(r"^x{1,3}$", "xxx")) |
---|
616 | n/a | self.assertTrue(re.match(r"^x{3,3}$", "xxx")) |
---|
617 | n/a | self.assertTrue(re.match(r"^x{1,4}$", "xxx")) |
---|
618 | n/a | self.assertTrue(re.match(r"^x{3,4}?$", "xxx")) |
---|
619 | n/a | self.assertTrue(re.match(r"^x{3}?$", "xxx")) |
---|
620 | n/a | self.assertTrue(re.match(r"^x{1,3}?$", "xxx")) |
---|
621 | n/a | self.assertTrue(re.match(r"^x{1,4}?$", "xxx")) |
---|
622 | n/a | self.assertTrue(re.match(r"^x{3,4}?$", "xxx")) |
---|
623 | n/a | |
---|
624 | n/a | self.assertIsNone(re.match(r"^x{}$", "xxx")) |
---|
625 | n/a | self.assertTrue(re.match(r"^x{}$", "x{}")) |
---|
626 | n/a | |
---|
627 | n/a | self.checkPatternError(r'x{2,1}', |
---|
628 | n/a | 'min repeat greater than max repeat', 2) |
---|
629 | n/a | |
---|
630 | n/a | def test_getattr(self): |
---|
631 | n/a | self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)") |
---|
632 | n/a | self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U) |
---|
633 | n/a | self.assertEqual(re.compile("(?i)(a)(b)").groups, 2) |
---|
634 | n/a | self.assertEqual(re.compile("(?i)(a)(b)").groupindex, {}) |
---|
635 | n/a | self.assertEqual(re.compile("(?i)(?P<first>a)(?P<other>b)").groupindex, |
---|
636 | n/a | {'first': 1, 'other': 2}) |
---|
637 | n/a | |
---|
638 | n/a | self.assertEqual(re.match("(a)", "a").pos, 0) |
---|
639 | n/a | self.assertEqual(re.match("(a)", "a").endpos, 1) |
---|
640 | n/a | self.assertEqual(re.match("(a)", "a").string, "a") |
---|
641 | n/a | self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1))) |
---|
642 | n/a | self.assertTrue(re.match("(a)", "a").re) |
---|
643 | n/a | |
---|
644 | n/a | # Issue 14260. groupindex should be non-modifiable mapping. |
---|
645 | n/a | p = re.compile(r'(?i)(?P<first>a)(?P<other>b)') |
---|
646 | n/a | self.assertEqual(sorted(p.groupindex), ['first', 'other']) |
---|
647 | n/a | self.assertEqual(p.groupindex['other'], 2) |
---|
648 | n/a | with self.assertRaises(TypeError): |
---|
649 | n/a | p.groupindex['other'] = 0 |
---|
650 | n/a | self.assertEqual(p.groupindex['other'], 2) |
---|
651 | n/a | |
---|
652 | n/a | def test_special_escapes(self): |
---|
653 | n/a | self.assertEqual(re.search(r"\b(b.)\b", |
---|
654 | n/a | "abcd abc bcd bx").group(1), "bx") |
---|
655 | n/a | self.assertEqual(re.search(r"\B(b.)\B", |
---|
656 | n/a | "abc bcd bc abxd").group(1), "bx") |
---|
657 | n/a | self.assertEqual(re.search(r"\b(b.)\b", |
---|
658 | n/a | "abcd abc bcd bx", re.ASCII).group(1), "bx") |
---|
659 | n/a | self.assertEqual(re.search(r"\B(b.)\B", |
---|
660 | n/a | "abc bcd bc abxd", re.ASCII).group(1), "bx") |
---|
661 | n/a | self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc") |
---|
662 | n/a | self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc") |
---|
663 | n/a | self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M)) |
---|
664 | n/a | self.assertEqual(re.search(br"\b(b.)\b", |
---|
665 | n/a | b"abcd abc bcd bx").group(1), b"bx") |
---|
666 | n/a | self.assertEqual(re.search(br"\B(b.)\B", |
---|
667 | n/a | b"abc bcd bc abxd").group(1), b"bx") |
---|
668 | n/a | self.assertEqual(re.search(br"\b(b.)\b", |
---|
669 | n/a | b"abcd abc bcd bx", re.LOCALE).group(1), b"bx") |
---|
670 | n/a | self.assertEqual(re.search(br"\B(b.)\B", |
---|
671 | n/a | b"abc bcd bc abxd", re.LOCALE).group(1), b"bx") |
---|
672 | n/a | self.assertEqual(re.search(br"^abc$", b"\nabc\n", re.M).group(0), b"abc") |
---|
673 | n/a | self.assertEqual(re.search(br"^\Aabc\Z$", b"abc", re.M).group(0), b"abc") |
---|
674 | n/a | self.assertIsNone(re.search(br"^\Aabc\Z$", b"\nabc\n", re.M)) |
---|
675 | n/a | self.assertEqual(re.search(r"\d\D\w\W\s\S", |
---|
676 | n/a | "1aa! a").group(0), "1aa! a") |
---|
677 | n/a | self.assertEqual(re.search(br"\d\D\w\W\s\S", |
---|
678 | n/a | b"1aa! a").group(0), b"1aa! a") |
---|
679 | n/a | self.assertEqual(re.search(r"\d\D\w\W\s\S", |
---|
680 | n/a | "1aa! a", re.ASCII).group(0), "1aa! a") |
---|
681 | n/a | self.assertEqual(re.search(br"\d\D\w\W\s\S", |
---|
682 | n/a | b"1aa! a", re.LOCALE).group(0), b"1aa! a") |
---|
683 | n/a | |
---|
684 | n/a | def test_other_escapes(self): |
---|
685 | n/a | self.checkPatternError("\\", 'bad escape (end of pattern)', 0) |
---|
686 | n/a | self.assertEqual(re.match(r"\(", '(').group(), '(') |
---|
687 | n/a | self.assertIsNone(re.match(r"\(", ')')) |
---|
688 | n/a | self.assertEqual(re.match(r"\\", '\\').group(), '\\') |
---|
689 | n/a | self.assertEqual(re.match(r"[\]]", ']').group(), ']') |
---|
690 | n/a | self.assertIsNone(re.match(r"[\]]", '[')) |
---|
691 | n/a | self.assertEqual(re.match(r"[a\-c]", '-').group(), '-') |
---|
692 | n/a | self.assertIsNone(re.match(r"[a\-c]", 'b')) |
---|
693 | n/a | self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^') |
---|
694 | n/a | self.assertIsNone(re.match(r"[\^a]+", 'b')) |
---|
695 | n/a | re.purge() # for warnings |
---|
696 | n/a | for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY': |
---|
697 | n/a | with self.subTest(c): |
---|
698 | n/a | self.assertRaises(re.error, re.compile, '\\%c' % c) |
---|
699 | n/a | for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ': |
---|
700 | n/a | with self.subTest(c): |
---|
701 | n/a | self.assertRaises(re.error, re.compile, '[\\%c]' % c) |
---|
702 | n/a | |
---|
703 | n/a | def test_string_boundaries(self): |
---|
704 | n/a | # See http://bugs.python.org/issue10713 |
---|
705 | n/a | self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1), |
---|
706 | n/a | "abc") |
---|
707 | n/a | # There's a word boundary at the start of a string. |
---|
708 | n/a | self.assertTrue(re.match(r"\b", "abc")) |
---|
709 | n/a | # A non-empty string includes a non-boundary zero-length match. |
---|
710 | n/a | self.assertTrue(re.search(r"\B", "abc")) |
---|
711 | n/a | # There is no non-boundary match at the start of a string. |
---|
712 | n/a | self.assertFalse(re.match(r"\B", "abc")) |
---|
713 | n/a | # However, an empty string contains no word boundaries, and also no |
---|
714 | n/a | # non-boundaries. |
---|
715 | n/a | self.assertIsNone(re.search(r"\B", "")) |
---|
716 | n/a | # This one is questionable and different from the perlre behaviour, |
---|
717 | n/a | # but describes current behavior. |
---|
718 | n/a | self.assertIsNone(re.search(r"\b", "")) |
---|
719 | n/a | # A single word-character string has two boundaries, but no |
---|
720 | n/a | # non-boundary gaps. |
---|
721 | n/a | self.assertEqual(len(re.findall(r"\b", "a")), 2) |
---|
722 | n/a | self.assertEqual(len(re.findall(r"\B", "a")), 0) |
---|
723 | n/a | # If there are no words, there are no boundaries |
---|
724 | n/a | self.assertEqual(len(re.findall(r"\b", " ")), 0) |
---|
725 | n/a | self.assertEqual(len(re.findall(r"\b", " ")), 0) |
---|
726 | n/a | # Can match around the whitespace. |
---|
727 | n/a | self.assertEqual(len(re.findall(r"\B", " ")), 2) |
---|
728 | n/a | |
---|
729 | n/a | def test_bigcharset(self): |
---|
730 | n/a | self.assertEqual(re.match("([\u2222\u2223])", |
---|
731 | n/a | "\u2222").group(1), "\u2222") |
---|
732 | n/a | r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255))) |
---|
733 | n/a | self.assertEqual(re.match(r, "\uff01").group(), "\uff01") |
---|
734 | n/a | |
---|
735 | n/a | def test_big_codesize(self): |
---|
736 | n/a | # Issue #1160 |
---|
737 | n/a | r = re.compile('|'.join(('%d'%x for x in range(10000)))) |
---|
738 | n/a | self.assertTrue(r.match('1000')) |
---|
739 | n/a | self.assertTrue(r.match('9999')) |
---|
740 | n/a | |
---|
741 | n/a | def test_anyall(self): |
---|
742 | n/a | self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0), |
---|
743 | n/a | "a\nb") |
---|
744 | n/a | self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0), |
---|
745 | n/a | "a\n\nb") |
---|
746 | n/a | |
---|
747 | n/a | def test_lookahead(self): |
---|
748 | n/a | self.assertEqual(re.match(r"(a(?=\s[^a]))", "a b").group(1), "a") |
---|
749 | n/a | self.assertEqual(re.match(r"(a(?=\s[^a]*))", "a b").group(1), "a") |
---|
750 | n/a | self.assertEqual(re.match(r"(a(?=\s[abc]))", "a b").group(1), "a") |
---|
751 | n/a | self.assertEqual(re.match(r"(a(?=\s[abc]*))", "a bc").group(1), "a") |
---|
752 | n/a | self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a") |
---|
753 | n/a | self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a") |
---|
754 | n/a | self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a") |
---|
755 | n/a | |
---|
756 | n/a | self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a") |
---|
757 | n/a | self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a") |
---|
758 | n/a | self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a") |
---|
759 | n/a | self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a") |
---|
760 | n/a | |
---|
761 | n/a | # Group reference. |
---|
762 | n/a | self.assertTrue(re.match(r'(a)b(?=\1)a', 'aba')) |
---|
763 | n/a | self.assertIsNone(re.match(r'(a)b(?=\1)c', 'abac')) |
---|
764 | n/a | # Conditional group reference. |
---|
765 | n/a | self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc')) |
---|
766 | n/a | self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(2)c|x))c', 'abc')) |
---|
767 | n/a | self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(2)x|c))c', 'abc')) |
---|
768 | n/a | self.assertIsNone(re.match(r'(?:(a)|(x))b(?=(?(1)b|x))c', 'abc')) |
---|
769 | n/a | self.assertTrue(re.match(r'(?:(a)|(x))b(?=(?(1)c|x))c', 'abc')) |
---|
770 | n/a | # Group used before defined. |
---|
771 | n/a | self.assertTrue(re.match(r'(a)b(?=(?(2)x|c))(c)', 'abc')) |
---|
772 | n/a | self.assertIsNone(re.match(r'(a)b(?=(?(2)b|x))(c)', 'abc')) |
---|
773 | n/a | self.assertTrue(re.match(r'(a)b(?=(?(1)c|x))(c)', 'abc')) |
---|
774 | n/a | |
---|
775 | n/a | def test_lookbehind(self): |
---|
776 | n/a | self.assertTrue(re.match(r'ab(?<=b)c', 'abc')) |
---|
777 | n/a | self.assertIsNone(re.match(r'ab(?<=c)c', 'abc')) |
---|
778 | n/a | self.assertIsNone(re.match(r'ab(?<!b)c', 'abc')) |
---|
779 | n/a | self.assertTrue(re.match(r'ab(?<!c)c', 'abc')) |
---|
780 | n/a | # Group reference. |
---|
781 | n/a | self.assertTrue(re.match(r'(a)a(?<=\1)c', 'aac')) |
---|
782 | n/a | self.assertIsNone(re.match(r'(a)b(?<=\1)a', 'abaa')) |
---|
783 | n/a | self.assertIsNone(re.match(r'(a)a(?<!\1)c', 'aac')) |
---|
784 | n/a | self.assertTrue(re.match(r'(a)b(?<!\1)a', 'abaa')) |
---|
785 | n/a | # Conditional group reference. |
---|
786 | n/a | self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)x|c))c', 'abc')) |
---|
787 | n/a | self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(2)b|x))c', 'abc')) |
---|
788 | n/a | self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(2)x|b))c', 'abc')) |
---|
789 | n/a | self.assertIsNone(re.match(r'(?:(a)|(x))b(?<=(?(1)c|x))c', 'abc')) |
---|
790 | n/a | self.assertTrue(re.match(r'(?:(a)|(x))b(?<=(?(1)b|x))c', 'abc')) |
---|
791 | n/a | # Group used before defined. |
---|
792 | n/a | self.assertRaises(re.error, re.compile, r'(a)b(?<=(?(2)b|x))(c)') |
---|
793 | n/a | self.assertIsNone(re.match(r'(a)b(?<=(?(1)c|x))(c)', 'abc')) |
---|
794 | n/a | self.assertTrue(re.match(r'(a)b(?<=(?(1)b|x))(c)', 'abc')) |
---|
795 | n/a | # Group defined in the same lookbehind pattern |
---|
796 | n/a | self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)\2)(c)') |
---|
797 | n/a | self.assertRaises(re.error, re.compile, r'(a)b(?<=(?P<a>.)(?P=a))(c)') |
---|
798 | n/a | self.assertRaises(re.error, re.compile, r'(a)b(?<=(a)(?(2)b|x))(c)') |
---|
799 | n/a | self.assertRaises(re.error, re.compile, r'(a)b(?<=(.)(?<=\2))(c)') |
---|
800 | n/a | |
---|
801 | n/a | def test_ignore_case(self): |
---|
802 | n/a | self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") |
---|
803 | n/a | self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC") |
---|
804 | n/a | self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b") |
---|
805 | n/a | self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb") |
---|
806 | n/a | self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b") |
---|
807 | n/a | self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb") |
---|
808 | n/a | self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a") |
---|
809 | n/a | self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa") |
---|
810 | n/a | self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a") |
---|
811 | n/a | self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa") |
---|
812 | n/a | |
---|
813 | n/a | assert '\u212a'.lower() == 'k' # 'âª' |
---|
814 | n/a | self.assertTrue(re.match(r'K', '\u212a', re.I)) |
---|
815 | n/a | self.assertTrue(re.match(r'k', '\u212a', re.I)) |
---|
816 | n/a | self.assertTrue(re.match(r'\u212a', 'K', re.I)) |
---|
817 | n/a | self.assertTrue(re.match(r'\u212a', 'k', re.I)) |
---|
818 | n/a | assert '\u017f'.upper() == 'S' # 'Å¿' |
---|
819 | n/a | self.assertTrue(re.match(r'S', '\u017f', re.I)) |
---|
820 | n/a | self.assertTrue(re.match(r's', '\u017f', re.I)) |
---|
821 | n/a | self.assertTrue(re.match(r'\u017f', 'S', re.I)) |
---|
822 | n/a | self.assertTrue(re.match(r'\u017f', 's', re.I)) |
---|
823 | n/a | assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ï¬
', 'ï¬' |
---|
824 | n/a | self.assertTrue(re.match(r'\ufb05', '\ufb06', re.I)) |
---|
825 | n/a | self.assertTrue(re.match(r'\ufb06', '\ufb05', re.I)) |
---|
826 | n/a | |
---|
827 | n/a | def test_ignore_case_set(self): |
---|
828 | n/a | self.assertTrue(re.match(r'[19A]', 'A', re.I)) |
---|
829 | n/a | self.assertTrue(re.match(r'[19a]', 'a', re.I)) |
---|
830 | n/a | self.assertTrue(re.match(r'[19a]', 'A', re.I)) |
---|
831 | n/a | self.assertTrue(re.match(r'[19A]', 'a', re.I)) |
---|
832 | n/a | self.assertTrue(re.match(br'[19A]', b'A', re.I)) |
---|
833 | n/a | self.assertTrue(re.match(br'[19a]', b'a', re.I)) |
---|
834 | n/a | self.assertTrue(re.match(br'[19a]', b'A', re.I)) |
---|
835 | n/a | self.assertTrue(re.match(br'[19A]', b'a', re.I)) |
---|
836 | n/a | assert '\u212a'.lower() == 'k' # 'âª' |
---|
837 | n/a | self.assertTrue(re.match(r'[19K]', '\u212a', re.I)) |
---|
838 | n/a | self.assertTrue(re.match(r'[19k]', '\u212a', re.I)) |
---|
839 | n/a | self.assertTrue(re.match(r'[19\u212a]', 'K', re.I)) |
---|
840 | n/a | self.assertTrue(re.match(r'[19\u212a]', 'k', re.I)) |
---|
841 | n/a | assert '\u017f'.upper() == 'S' # 'Å¿' |
---|
842 | n/a | self.assertTrue(re.match(r'[19S]', '\u017f', re.I)) |
---|
843 | n/a | self.assertTrue(re.match(r'[19s]', '\u017f', re.I)) |
---|
844 | n/a | self.assertTrue(re.match(r'[19\u017f]', 'S', re.I)) |
---|
845 | n/a | self.assertTrue(re.match(r'[19\u017f]', 's', re.I)) |
---|
846 | n/a | assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ï¬
', 'ï¬' |
---|
847 | n/a | self.assertTrue(re.match(r'[19\ufb05]', '\ufb06', re.I)) |
---|
848 | n/a | self.assertTrue(re.match(r'[19\ufb06]', '\ufb05', re.I)) |
---|
849 | n/a | |
---|
850 | n/a | def test_ignore_case_range(self): |
---|
851 | n/a | # Issues #3511, #17381. |
---|
852 | n/a | self.assertTrue(re.match(r'[9-a]', '_', re.I)) |
---|
853 | n/a | self.assertIsNone(re.match(r'[9-A]', '_', re.I)) |
---|
854 | n/a | self.assertTrue(re.match(br'[9-a]', b'_', re.I)) |
---|
855 | n/a | self.assertIsNone(re.match(br'[9-A]', b'_', re.I)) |
---|
856 | n/a | self.assertTrue(re.match(r'[\xc0-\xde]', '\xd7', re.I)) |
---|
857 | n/a | self.assertIsNone(re.match(r'[\xc0-\xde]', '\xf7', re.I)) |
---|
858 | n/a | self.assertTrue(re.match(r'[\xe0-\xfe]', '\xf7', re.I)) |
---|
859 | n/a | self.assertIsNone(re.match(r'[\xe0-\xfe]', '\xd7', re.I)) |
---|
860 | n/a | self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0450', re.I)) |
---|
861 | n/a | self.assertTrue(re.match(r'[\u0430-\u045f]', '\u0400', re.I)) |
---|
862 | n/a | self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0450', re.I)) |
---|
863 | n/a | self.assertTrue(re.match(r'[\u0400-\u042f]', '\u0400', re.I)) |
---|
864 | n/a | self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010428', re.I)) |
---|
865 | n/a | self.assertTrue(re.match(r'[\U00010428-\U0001044f]', '\U00010400', re.I)) |
---|
866 | n/a | self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010428', re.I)) |
---|
867 | n/a | self.assertTrue(re.match(r'[\U00010400-\U00010427]', '\U00010400', re.I)) |
---|
868 | n/a | |
---|
869 | n/a | assert '\u212a'.lower() == 'k' # 'âª' |
---|
870 | n/a | self.assertTrue(re.match(r'[J-M]', '\u212a', re.I)) |
---|
871 | n/a | self.assertTrue(re.match(r'[j-m]', '\u212a', re.I)) |
---|
872 | n/a | self.assertTrue(re.match(r'[\u2129-\u212b]', 'K', re.I)) |
---|
873 | n/a | self.assertTrue(re.match(r'[\u2129-\u212b]', 'k', re.I)) |
---|
874 | n/a | assert '\u017f'.upper() == 'S' # 'Å¿' |
---|
875 | n/a | self.assertTrue(re.match(r'[R-T]', '\u017f', re.I)) |
---|
876 | n/a | self.assertTrue(re.match(r'[r-t]', '\u017f', re.I)) |
---|
877 | n/a | self.assertTrue(re.match(r'[\u017e-\u0180]', 'S', re.I)) |
---|
878 | n/a | self.assertTrue(re.match(r'[\u017e-\u0180]', 's', re.I)) |
---|
879 | n/a | assert '\ufb05'.upper() == '\ufb06'.upper() == 'ST' # 'ï¬
', 'ï¬' |
---|
880 | n/a | self.assertTrue(re.match(r'[\ufb04-\ufb05]', '\ufb06', re.I)) |
---|
881 | n/a | self.assertTrue(re.match(r'[\ufb06-\ufb07]', '\ufb05', re.I)) |
---|
882 | n/a | |
---|
883 | n/a | def test_category(self): |
---|
884 | n/a | self.assertEqual(re.match(r"(\s)", " ").group(1), " ") |
---|
885 | n/a | |
---|
886 | n/a | def test_getlower(self): |
---|
887 | n/a | import _sre |
---|
888 | n/a | self.assertEqual(_sre.getlower(ord('A'), 0), ord('a')) |
---|
889 | n/a | self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a')) |
---|
890 | n/a | self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a')) |
---|
891 | n/a | self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a')) |
---|
892 | n/a | |
---|
893 | n/a | self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") |
---|
894 | n/a | self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC") |
---|
895 | n/a | self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC") |
---|
896 | n/a | self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC") |
---|
897 | n/a | |
---|
898 | n/a | def test_not_literal(self): |
---|
899 | n/a | self.assertEqual(re.search(r"\s([^a])", " b").group(1), "b") |
---|
900 | n/a | self.assertEqual(re.search(r"\s([^a]*)", " bb").group(1), "bb") |
---|
901 | n/a | |
---|
902 | n/a | def test_search_coverage(self): |
---|
903 | n/a | self.assertEqual(re.search(r"\s(b)", " b").group(1), "b") |
---|
904 | n/a | self.assertEqual(re.search(r"a\s", "a ").group(0), "a ") |
---|
905 | n/a | |
---|
906 | n/a | def assertMatch(self, pattern, text, match=None, span=None, |
---|
907 | n/a | matcher=re.match): |
---|
908 | n/a | if match is None and span is None: |
---|
909 | n/a | # the pattern matches the whole text |
---|
910 | n/a | match = text |
---|
911 | n/a | span = (0, len(text)) |
---|
912 | n/a | elif match is None or span is None: |
---|
913 | n/a | raise ValueError('If match is not None, span should be specified ' |
---|
914 | n/a | '(and vice versa).') |
---|
915 | n/a | m = matcher(pattern, text) |
---|
916 | n/a | self.assertTrue(m) |
---|
917 | n/a | self.assertEqual(m.group(), match) |
---|
918 | n/a | self.assertEqual(m.span(), span) |
---|
919 | n/a | |
---|
920 | n/a | def test_re_escape(self): |
---|
921 | n/a | alnum_chars = string.ascii_letters + string.digits + '_' |
---|
922 | n/a | p = ''.join(chr(i) for i in range(256)) |
---|
923 | n/a | for c in p: |
---|
924 | n/a | if c in alnum_chars: |
---|
925 | n/a | self.assertEqual(re.escape(c), c) |
---|
926 | n/a | elif c == '\x00': |
---|
927 | n/a | self.assertEqual(re.escape(c), '\\000') |
---|
928 | n/a | else: |
---|
929 | n/a | self.assertEqual(re.escape(c), '\\' + c) |
---|
930 | n/a | self.assertMatch(re.escape(c), c) |
---|
931 | n/a | self.assertMatch(re.escape(p), p) |
---|
932 | n/a | |
---|
933 | n/a | def test_re_escape_byte(self): |
---|
934 | n/a | alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii') |
---|
935 | n/a | p = bytes(range(256)) |
---|
936 | n/a | for i in p: |
---|
937 | n/a | b = bytes([i]) |
---|
938 | n/a | if b in alnum_chars: |
---|
939 | n/a | self.assertEqual(re.escape(b), b) |
---|
940 | n/a | elif i == 0: |
---|
941 | n/a | self.assertEqual(re.escape(b), b'\\000') |
---|
942 | n/a | else: |
---|
943 | n/a | self.assertEqual(re.escape(b), b'\\' + b) |
---|
944 | n/a | self.assertMatch(re.escape(b), b) |
---|
945 | n/a | self.assertMatch(re.escape(p), p) |
---|
946 | n/a | |
---|
947 | n/a | def test_re_escape_non_ascii(self): |
---|
948 | n/a | s = 'xxx\u2620\u2620\u2620xxx' |
---|
949 | n/a | s_escaped = re.escape(s) |
---|
950 | n/a | self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx') |
---|
951 | n/a | self.assertMatch(s_escaped, s) |
---|
952 | n/a | self.assertMatch('.%s+.' % re.escape('\u2620'), s, |
---|
953 | n/a | 'x\u2620\u2620\u2620x', (2, 7), re.search) |
---|
954 | n/a | |
---|
955 | n/a | def test_re_escape_non_ascii_bytes(self): |
---|
956 | n/a | b = 'y\u2620y\u2620y'.encode('utf-8') |
---|
957 | n/a | b_escaped = re.escape(b) |
---|
958 | n/a | self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y') |
---|
959 | n/a | self.assertMatch(b_escaped, b) |
---|
960 | n/a | res = re.findall(re.escape('\u2620'.encode('utf-8')), b) |
---|
961 | n/a | self.assertEqual(len(res), 2) |
---|
962 | n/a | |
---|
963 | n/a | def test_pickling(self): |
---|
964 | n/a | import pickle |
---|
965 | n/a | oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)', re.UNICODE) |
---|
966 | n/a | for proto in range(pickle.HIGHEST_PROTOCOL + 1): |
---|
967 | n/a | pickled = pickle.dumps(oldpat, proto) |
---|
968 | n/a | newpat = pickle.loads(pickled) |
---|
969 | n/a | self.assertEqual(newpat, oldpat) |
---|
970 | n/a | # current pickle expects the _compile() reconstructor in re module |
---|
971 | n/a | from re import _compile |
---|
972 | n/a | |
---|
973 | n/a | def test_constants(self): |
---|
974 | n/a | self.assertEqual(re.I, re.IGNORECASE) |
---|
975 | n/a | self.assertEqual(re.L, re.LOCALE) |
---|
976 | n/a | self.assertEqual(re.M, re.MULTILINE) |
---|
977 | n/a | self.assertEqual(re.S, re.DOTALL) |
---|
978 | n/a | self.assertEqual(re.X, re.VERBOSE) |
---|
979 | n/a | |
---|
980 | n/a | def test_flags(self): |
---|
981 | n/a | for flag in [re.I, re.M, re.X, re.S, re.A, re.U]: |
---|
982 | n/a | self.assertTrue(re.compile('^pattern$', flag)) |
---|
983 | n/a | for flag in [re.I, re.M, re.X, re.S, re.A, re.L]: |
---|
984 | n/a | self.assertTrue(re.compile(b'^pattern$', flag)) |
---|
985 | n/a | |
---|
986 | n/a | def test_sre_character_literals(self): |
---|
987 | n/a | for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]: |
---|
988 | n/a | if i < 256: |
---|
989 | n/a | self.assertTrue(re.match(r"\%03o" % i, chr(i))) |
---|
990 | n/a | self.assertTrue(re.match(r"\%03o0" % i, chr(i)+"0")) |
---|
991 | n/a | self.assertTrue(re.match(r"\%03o8" % i, chr(i)+"8")) |
---|
992 | n/a | self.assertTrue(re.match(r"\x%02x" % i, chr(i))) |
---|
993 | n/a | self.assertTrue(re.match(r"\x%02x0" % i, chr(i)+"0")) |
---|
994 | n/a | self.assertTrue(re.match(r"\x%02xz" % i, chr(i)+"z")) |
---|
995 | n/a | if i < 0x10000: |
---|
996 | n/a | self.assertTrue(re.match(r"\u%04x" % i, chr(i))) |
---|
997 | n/a | self.assertTrue(re.match(r"\u%04x0" % i, chr(i)+"0")) |
---|
998 | n/a | self.assertTrue(re.match(r"\u%04xz" % i, chr(i)+"z")) |
---|
999 | n/a | self.assertTrue(re.match(r"\U%08x" % i, chr(i))) |
---|
1000 | n/a | self.assertTrue(re.match(r"\U%08x0" % i, chr(i)+"0")) |
---|
1001 | n/a | self.assertTrue(re.match(r"\U%08xz" % i, chr(i)+"z")) |
---|
1002 | n/a | self.assertTrue(re.match(r"\0", "\000")) |
---|
1003 | n/a | self.assertTrue(re.match(r"\08", "\0008")) |
---|
1004 | n/a | self.assertTrue(re.match(r"\01", "\001")) |
---|
1005 | n/a | self.assertTrue(re.match(r"\018", "\0018")) |
---|
1006 | n/a | self.checkPatternError(r"\567", |
---|
1007 | n/a | r'octal escape value \567 outside of ' |
---|
1008 | n/a | r'range 0-0o377', 0) |
---|
1009 | n/a | self.checkPatternError(r"\911", 'invalid group reference 91', 1) |
---|
1010 | n/a | self.checkPatternError(r"\x1", r'incomplete escape \x1', 0) |
---|
1011 | n/a | self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0) |
---|
1012 | n/a | self.checkPatternError(r"\u123", r'incomplete escape \u123', 0) |
---|
1013 | n/a | self.checkPatternError(r"\u123z", r'incomplete escape \u123', 0) |
---|
1014 | n/a | self.checkPatternError(r"\U0001234", r'incomplete escape \U0001234', 0) |
---|
1015 | n/a | self.checkPatternError(r"\U0001234z", r'incomplete escape \U0001234', 0) |
---|
1016 | n/a | self.checkPatternError(r"\U00110000", r'bad escape \U00110000', 0) |
---|
1017 | n/a | |
---|
1018 | n/a | def test_sre_character_class_literals(self): |
---|
1019 | n/a | for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]: |
---|
1020 | n/a | if i < 256: |
---|
1021 | n/a | self.assertTrue(re.match(r"[\%o]" % i, chr(i))) |
---|
1022 | n/a | self.assertTrue(re.match(r"[\%o8]" % i, chr(i))) |
---|
1023 | n/a | self.assertTrue(re.match(r"[\%03o]" % i, chr(i))) |
---|
1024 | n/a | self.assertTrue(re.match(r"[\%03o0]" % i, chr(i))) |
---|
1025 | n/a | self.assertTrue(re.match(r"[\%03o8]" % i, chr(i))) |
---|
1026 | n/a | self.assertTrue(re.match(r"[\x%02x]" % i, chr(i))) |
---|
1027 | n/a | self.assertTrue(re.match(r"[\x%02x0]" % i, chr(i))) |
---|
1028 | n/a | self.assertTrue(re.match(r"[\x%02xz]" % i, chr(i))) |
---|
1029 | n/a | if i < 0x10000: |
---|
1030 | n/a | self.assertTrue(re.match(r"[\u%04x]" % i, chr(i))) |
---|
1031 | n/a | self.assertTrue(re.match(r"[\u%04x0]" % i, chr(i))) |
---|
1032 | n/a | self.assertTrue(re.match(r"[\u%04xz]" % i, chr(i))) |
---|
1033 | n/a | self.assertTrue(re.match(r"[\U%08x]" % i, chr(i))) |
---|
1034 | n/a | self.assertTrue(re.match(r"[\U%08x0]" % i, chr(i)+"0")) |
---|
1035 | n/a | self.assertTrue(re.match(r"[\U%08xz]" % i, chr(i)+"z")) |
---|
1036 | n/a | self.checkPatternError(r"[\567]", |
---|
1037 | n/a | r'octal escape value \567 outside of ' |
---|
1038 | n/a | r'range 0-0o377', 1) |
---|
1039 | n/a | self.checkPatternError(r"[\911]", r'bad escape \9', 1) |
---|
1040 | n/a | self.checkPatternError(r"[\x1z]", r'incomplete escape \x1', 1) |
---|
1041 | n/a | self.checkPatternError(r"[\u123z]", r'incomplete escape \u123', 1) |
---|
1042 | n/a | self.checkPatternError(r"[\U0001234z]", r'incomplete escape \U0001234', 1) |
---|
1043 | n/a | self.checkPatternError(r"[\U00110000]", r'bad escape \U00110000', 1) |
---|
1044 | n/a | self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e")) |
---|
1045 | n/a | |
---|
1046 | n/a | def test_sre_byte_literals(self): |
---|
1047 | n/a | for i in [0, 8, 16, 32, 64, 127, 128, 255]: |
---|
1048 | n/a | self.assertTrue(re.match((r"\%03o" % i).encode(), bytes([i]))) |
---|
1049 | n/a | self.assertTrue(re.match((r"\%03o0" % i).encode(), bytes([i])+b"0")) |
---|
1050 | n/a | self.assertTrue(re.match((r"\%03o8" % i).encode(), bytes([i])+b"8")) |
---|
1051 | n/a | self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i]))) |
---|
1052 | n/a | self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0")) |
---|
1053 | n/a | self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z")) |
---|
1054 | n/a | self.assertRaises(re.error, re.compile, br"\u1234") |
---|
1055 | n/a | self.assertRaises(re.error, re.compile, br"\U00012345") |
---|
1056 | n/a | self.assertTrue(re.match(br"\0", b"\000")) |
---|
1057 | n/a | self.assertTrue(re.match(br"\08", b"\0008")) |
---|
1058 | n/a | self.assertTrue(re.match(br"\01", b"\001")) |
---|
1059 | n/a | self.assertTrue(re.match(br"\018", b"\0018")) |
---|
1060 | n/a | self.checkPatternError(br"\567", |
---|
1061 | n/a | r'octal escape value \567 outside of ' |
---|
1062 | n/a | r'range 0-0o377', 0) |
---|
1063 | n/a | self.checkPatternError(br"\911", 'invalid group reference 91', 1) |
---|
1064 | n/a | self.checkPatternError(br"\x1", r'incomplete escape \x1', 0) |
---|
1065 | n/a | self.checkPatternError(br"\x1z", r'incomplete escape \x1', 0) |
---|
1066 | n/a | |
---|
1067 | n/a | def test_sre_byte_class_literals(self): |
---|
1068 | n/a | for i in [0, 8, 16, 32, 64, 127, 128, 255]: |
---|
1069 | n/a | self.assertTrue(re.match((r"[\%o]" % i).encode(), bytes([i]))) |
---|
1070 | n/a | self.assertTrue(re.match((r"[\%o8]" % i).encode(), bytes([i]))) |
---|
1071 | n/a | self.assertTrue(re.match((r"[\%03o]" % i).encode(), bytes([i]))) |
---|
1072 | n/a | self.assertTrue(re.match((r"[\%03o0]" % i).encode(), bytes([i]))) |
---|
1073 | n/a | self.assertTrue(re.match((r"[\%03o8]" % i).encode(), bytes([i]))) |
---|
1074 | n/a | self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i]))) |
---|
1075 | n/a | self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i]))) |
---|
1076 | n/a | self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i]))) |
---|
1077 | n/a | self.assertRaises(re.error, re.compile, br"[\u1234]") |
---|
1078 | n/a | self.assertRaises(re.error, re.compile, br"[\U00012345]") |
---|
1079 | n/a | self.checkPatternError(br"[\567]", |
---|
1080 | n/a | r'octal escape value \567 outside of ' |
---|
1081 | n/a | r'range 0-0o377', 1) |
---|
1082 | n/a | self.checkPatternError(br"[\911]", r'bad escape \9', 1) |
---|
1083 | n/a | self.checkPatternError(br"[\x1z]", r'incomplete escape \x1', 1) |
---|
1084 | n/a | |
---|
1085 | n/a | def test_character_set_errors(self): |
---|
1086 | n/a | self.checkPatternError(r'[', 'unterminated character set', 0) |
---|
1087 | n/a | self.checkPatternError(r'[^', 'unterminated character set', 0) |
---|
1088 | n/a | self.checkPatternError(r'[a', 'unterminated character set', 0) |
---|
1089 | n/a | # bug 545855 -- This pattern failed to cause a compile error as it |
---|
1090 | n/a | # should, instead provoking a TypeError. |
---|
1091 | n/a | self.checkPatternError(r"[a-", 'unterminated character set', 0) |
---|
1092 | n/a | self.checkPatternError(r"[\w-b]", r'bad character range \w-b', 1) |
---|
1093 | n/a | self.checkPatternError(r"[a-\w]", r'bad character range a-\w', 1) |
---|
1094 | n/a | self.checkPatternError(r"[b-a]", 'bad character range b-a', 1) |
---|
1095 | n/a | |
---|
1096 | n/a | def test_bug_113254(self): |
---|
1097 | n/a | self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1) |
---|
1098 | n/a | self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1) |
---|
1099 | n/a | self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1)) |
---|
1100 | n/a | |
---|
1101 | n/a | def test_bug_527371(self): |
---|
1102 | n/a | # bug described in patches 527371/672491 |
---|
1103 | n/a | self.assertIsNone(re.match(r'(a)?a','a').lastindex) |
---|
1104 | n/a | self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1) |
---|
1105 | n/a | self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a') |
---|
1106 | n/a | self.assertEqual(re.match(r"(?P<a>a(b))", "ab").lastgroup, 'a') |
---|
1107 | n/a | self.assertEqual(re.match(r"((a))", "a").lastindex, 1) |
---|
1108 | n/a | |
---|
1109 | n/a | def test_bug_418626(self): |
---|
1110 | n/a | # bugs 418626 at al. -- Testing Greg Chapman's addition of op code |
---|
1111 | n/a | # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of |
---|
1112 | n/a | # pattern '*?' on a long string. |
---|
1113 | n/a | self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001) |
---|
1114 | n/a | self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0), |
---|
1115 | n/a | 20003) |
---|
1116 | n/a | self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001) |
---|
1117 | n/a | # non-simple '*?' still used to hit the recursion limit, before the |
---|
1118 | n/a | # non-recursive scheme was implemented. |
---|
1119 | n/a | self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001) |
---|
1120 | n/a | |
---|
1121 | n/a | def test_bug_612074(self): |
---|
1122 | n/a | pat="["+re.escape("\u2039")+"]" |
---|
1123 | n/a | self.assertEqual(re.compile(pat) and 1, 1) |
---|
1124 | n/a | |
---|
1125 | n/a | def test_stack_overflow(self): |
---|
1126 | n/a | # nasty cases that used to overflow the straightforward recursive |
---|
1127 | n/a | # implementation of repeated groups. |
---|
1128 | n/a | self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x') |
---|
1129 | n/a | self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x') |
---|
1130 | n/a | self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x') |
---|
1131 | n/a | |
---|
1132 | n/a | def test_nothing_to_repeat(self): |
---|
1133 | n/a | for reps in '*', '+', '?', '{1,2}': |
---|
1134 | n/a | for mod in '', '?': |
---|
1135 | n/a | self.checkPatternError('%s%s' % (reps, mod), |
---|
1136 | n/a | 'nothing to repeat', 0) |
---|
1137 | n/a | self.checkPatternError('(?:%s%s)' % (reps, mod), |
---|
1138 | n/a | 'nothing to repeat', 3) |
---|
1139 | n/a | |
---|
1140 | n/a | def test_multiple_repeat(self): |
---|
1141 | n/a | for outer_reps in '*', '+', '{1,2}': |
---|
1142 | n/a | for outer_mod in '', '?': |
---|
1143 | n/a | outer_op = outer_reps + outer_mod |
---|
1144 | n/a | for inner_reps in '*', '+', '?', '{1,2}': |
---|
1145 | n/a | for inner_mod in '', '?': |
---|
1146 | n/a | inner_op = inner_reps + inner_mod |
---|
1147 | n/a | self.checkPatternError(r'x%s%s' % (inner_op, outer_op), |
---|
1148 | n/a | 'multiple repeat', 1 + len(inner_op)) |
---|
1149 | n/a | |
---|
1150 | n/a | def test_unlimited_zero_width_repeat(self): |
---|
1151 | n/a | # Issue #9669 |
---|
1152 | n/a | self.assertIsNone(re.match(r'(?:a?)*y', 'z')) |
---|
1153 | n/a | self.assertIsNone(re.match(r'(?:a?)+y', 'z')) |
---|
1154 | n/a | self.assertIsNone(re.match(r'(?:a?){2,}y', 'z')) |
---|
1155 | n/a | self.assertIsNone(re.match(r'(?:a?)*?y', 'z')) |
---|
1156 | n/a | self.assertIsNone(re.match(r'(?:a?)+?y', 'z')) |
---|
1157 | n/a | self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z')) |
---|
1158 | n/a | |
---|
1159 | n/a | def test_scanner(self): |
---|
1160 | n/a | def s_ident(scanner, token): return token |
---|
1161 | n/a | def s_operator(scanner, token): return "op%s" % token |
---|
1162 | n/a | def s_float(scanner, token): return float(token) |
---|
1163 | n/a | def s_int(scanner, token): return int(token) |
---|
1164 | n/a | |
---|
1165 | n/a | scanner = Scanner([ |
---|
1166 | n/a | (r"[a-zA-Z_]\w*", s_ident), |
---|
1167 | n/a | (r"\d+\.\d*", s_float), |
---|
1168 | n/a | (r"\d+", s_int), |
---|
1169 | n/a | (r"=|\+|-|\*|/", s_operator), |
---|
1170 | n/a | (r"\s+", None), |
---|
1171 | n/a | ]) |
---|
1172 | n/a | |
---|
1173 | n/a | self.assertTrue(scanner.scanner.scanner("").pattern) |
---|
1174 | n/a | |
---|
1175 | n/a | self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"), |
---|
1176 | n/a | (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, |
---|
1177 | n/a | 'op+', 'bar'], '')) |
---|
1178 | n/a | |
---|
1179 | n/a | def test_bug_448951(self): |
---|
1180 | n/a | # bug 448951 (similar to 429357, but with single char match) |
---|
1181 | n/a | # (Also test greedy matches.) |
---|
1182 | n/a | for op in '','?','*': |
---|
1183 | n/a | self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(), |
---|
1184 | n/a | (None, None)) |
---|
1185 | n/a | self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(), |
---|
1186 | n/a | ('a:', 'a')) |
---|
1187 | n/a | |
---|
1188 | n/a | def test_bug_725106(self): |
---|
1189 | n/a | # capturing groups in alternatives in repeats |
---|
1190 | n/a | self.assertEqual(re.match('^((a)|b)*', 'abc').groups(), |
---|
1191 | n/a | ('b', 'a')) |
---|
1192 | n/a | self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(), |
---|
1193 | n/a | ('c', 'b')) |
---|
1194 | n/a | self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(), |
---|
1195 | n/a | ('b', None)) |
---|
1196 | n/a | self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(), |
---|
1197 | n/a | ('b', None)) |
---|
1198 | n/a | self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(), |
---|
1199 | n/a | ('b', 'a')) |
---|
1200 | n/a | self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(), |
---|
1201 | n/a | ('c', 'b')) |
---|
1202 | n/a | self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(), |
---|
1203 | n/a | ('b', None)) |
---|
1204 | n/a | self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(), |
---|
1205 | n/a | ('b', None)) |
---|
1206 | n/a | |
---|
1207 | n/a | def test_bug_725149(self): |
---|
1208 | n/a | # mark_stack_base restoring before restoring marks |
---|
1209 | n/a | self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(), |
---|
1210 | n/a | ('a', None)) |
---|
1211 | n/a | self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(), |
---|
1212 | n/a | ('a', None, None)) |
---|
1213 | n/a | |
---|
1214 | n/a | def test_bug_764548(self): |
---|
1215 | n/a | # bug 764548, re.compile() barfs on str/unicode subclasses |
---|
1216 | n/a | class my_unicode(str): pass |
---|
1217 | n/a | pat = re.compile(my_unicode("abc")) |
---|
1218 | n/a | self.assertIsNone(pat.match("xyz")) |
---|
1219 | n/a | |
---|
1220 | n/a | def test_finditer(self): |
---|
1221 | n/a | iter = re.finditer(r":+", "a:b::c:::d") |
---|
1222 | n/a | self.assertEqual([item.group(0) for item in iter], |
---|
1223 | n/a | [":", "::", ":::"]) |
---|
1224 | n/a | |
---|
1225 | n/a | pat = re.compile(r":+") |
---|
1226 | n/a | iter = pat.finditer("a:b::c:::d", 1, 10) |
---|
1227 | n/a | self.assertEqual([item.group(0) for item in iter], |
---|
1228 | n/a | [":", "::", ":::"]) |
---|
1229 | n/a | |
---|
1230 | n/a | pat = re.compile(r":+") |
---|
1231 | n/a | iter = pat.finditer("a:b::c:::d", pos=1, endpos=10) |
---|
1232 | n/a | self.assertEqual([item.group(0) for item in iter], |
---|
1233 | n/a | [":", "::", ":::"]) |
---|
1234 | n/a | |
---|
1235 | n/a | pat = re.compile(r":+") |
---|
1236 | n/a | iter = pat.finditer("a:b::c:::d", endpos=10, pos=1) |
---|
1237 | n/a | self.assertEqual([item.group(0) for item in iter], |
---|
1238 | n/a | [":", "::", ":::"]) |
---|
1239 | n/a | |
---|
1240 | n/a | pat = re.compile(r":+") |
---|
1241 | n/a | iter = pat.finditer("a:b::c:::d", pos=3, endpos=8) |
---|
1242 | n/a | self.assertEqual([item.group(0) for item in iter], |
---|
1243 | n/a | ["::", "::"]) |
---|
1244 | n/a | |
---|
1245 | n/a | def test_bug_926075(self): |
---|
1246 | n/a | self.assertIsNot(re.compile('bug_926075'), |
---|
1247 | n/a | re.compile(b'bug_926075')) |
---|
1248 | n/a | |
---|
1249 | n/a | def test_bug_931848(self): |
---|
1250 | n/a | pattern = "[\u002E\u3002\uFF0E\uFF61]" |
---|
1251 | n/a | self.assertEqual(re.compile(pattern).split("a.b.c"), |
---|
1252 | n/a | ['a','b','c']) |
---|
1253 | n/a | |
---|
1254 | n/a | def test_bug_581080(self): |
---|
1255 | n/a | iter = re.finditer(r"\s", "a b") |
---|
1256 | n/a | self.assertEqual(next(iter).span(), (1,2)) |
---|
1257 | n/a | self.assertRaises(StopIteration, next, iter) |
---|
1258 | n/a | |
---|
1259 | n/a | scanner = re.compile(r"\s").scanner("a b") |
---|
1260 | n/a | self.assertEqual(scanner.search().span(), (1, 2)) |
---|
1261 | n/a | self.assertIsNone(scanner.search()) |
---|
1262 | n/a | |
---|
1263 | n/a | def test_bug_817234(self): |
---|
1264 | n/a | iter = re.finditer(r".*", "asdf") |
---|
1265 | n/a | self.assertEqual(next(iter).span(), (0, 4)) |
---|
1266 | n/a | self.assertEqual(next(iter).span(), (4, 4)) |
---|
1267 | n/a | self.assertRaises(StopIteration, next, iter) |
---|
1268 | n/a | |
---|
1269 | n/a | def test_bug_6561(self): |
---|
1270 | n/a | # '\d' should match characters in Unicode category 'Nd' |
---|
1271 | n/a | # (Number, Decimal Digit), but not those in 'Nl' (Number, |
---|
1272 | n/a | # Letter) or 'No' (Number, Other). |
---|
1273 | n/a | decimal_digits = [ |
---|
1274 | n/a | '\u0037', # '\N{DIGIT SEVEN}', category 'Nd' |
---|
1275 | n/a | '\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd' |
---|
1276 | n/a | '\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd' |
---|
1277 | n/a | ] |
---|
1278 | n/a | for x in decimal_digits: |
---|
1279 | n/a | self.assertEqual(re.match(r'^\d$', x).group(0), x) |
---|
1280 | n/a | |
---|
1281 | n/a | not_decimal_digits = [ |
---|
1282 | n/a | '\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl' |
---|
1283 | n/a | '\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl' |
---|
1284 | n/a | '\u2082', # '\N{SUBSCRIPT TWO}', category 'No' |
---|
1285 | n/a | '\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No' |
---|
1286 | n/a | ] |
---|
1287 | n/a | for x in not_decimal_digits: |
---|
1288 | n/a | self.assertIsNone(re.match(r'^\d$', x)) |
---|
1289 | n/a | |
---|
1290 | n/a | def test_empty_array(self): |
---|
1291 | n/a | # SF buf 1647541 |
---|
1292 | n/a | import array |
---|
1293 | n/a | for typecode in 'bBuhHiIlLfd': |
---|
1294 | n/a | a = array.array(typecode) |
---|
1295 | n/a | self.assertIsNone(re.compile(b"bla").match(a)) |
---|
1296 | n/a | self.assertEqual(re.compile(b"").match(a).groups(), ()) |
---|
1297 | n/a | |
---|
1298 | n/a | def test_inline_flags(self): |
---|
1299 | n/a | # Bug #1700 |
---|
1300 | n/a | upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below |
---|
1301 | n/a | lower_char = '\u1ea1' # Latin Small Letter A with Dot Below |
---|
1302 | n/a | |
---|
1303 | n/a | p = re.compile(upper_char, re.I | re.U) |
---|
1304 | n/a | q = p.match(lower_char) |
---|
1305 | n/a | self.assertTrue(q) |
---|
1306 | n/a | |
---|
1307 | n/a | p = re.compile(lower_char, re.I | re.U) |
---|
1308 | n/a | q = p.match(upper_char) |
---|
1309 | n/a | self.assertTrue(q) |
---|
1310 | n/a | |
---|
1311 | n/a | p = re.compile('(?i)' + upper_char, re.U) |
---|
1312 | n/a | q = p.match(lower_char) |
---|
1313 | n/a | self.assertTrue(q) |
---|
1314 | n/a | |
---|
1315 | n/a | p = re.compile('(?i)' + lower_char, re.U) |
---|
1316 | n/a | q = p.match(upper_char) |
---|
1317 | n/a | self.assertTrue(q) |
---|
1318 | n/a | |
---|
1319 | n/a | p = re.compile('(?iu)' + upper_char) |
---|
1320 | n/a | q = p.match(lower_char) |
---|
1321 | n/a | self.assertTrue(q) |
---|
1322 | n/a | |
---|
1323 | n/a | p = re.compile('(?iu)' + lower_char) |
---|
1324 | n/a | q = p.match(upper_char) |
---|
1325 | n/a | self.assertTrue(q) |
---|
1326 | n/a | |
---|
1327 | n/a | self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char)) |
---|
1328 | n/a | self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char)) |
---|
1329 | n/a | |
---|
1330 | n/a | p = upper_char + '(?i)' |
---|
1331 | n/a | with self.assertWarns(DeprecationWarning) as warns: |
---|
1332 | n/a | self.assertTrue(re.match(p, lower_char)) |
---|
1333 | n/a | self.assertEqual( |
---|
1334 | n/a | str(warns.warnings[0].message), |
---|
1335 | n/a | 'Flags not at the start of the expression %s' % p |
---|
1336 | n/a | ) |
---|
1337 | n/a | |
---|
1338 | n/a | p = upper_char + '(?i)%s' % ('.?' * 100) |
---|
1339 | n/a | with self.assertWarns(DeprecationWarning) as warns: |
---|
1340 | n/a | self.assertTrue(re.match(p, lower_char)) |
---|
1341 | n/a | self.assertEqual( |
---|
1342 | n/a | str(warns.warnings[0].message), |
---|
1343 | n/a | 'Flags not at the start of the expression %s (truncated)' % p[:20] |
---|
1344 | n/a | ) |
---|
1345 | n/a | |
---|
1346 | n/a | def test_dollar_matches_twice(self): |
---|
1347 | n/a | "$ matches the end of string, and just before the terminating \n" |
---|
1348 | n/a | pattern = re.compile('$') |
---|
1349 | n/a | self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#') |
---|
1350 | n/a | self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#') |
---|
1351 | n/a | self.assertEqual(pattern.sub('#', '\n'), '#\n#') |
---|
1352 | n/a | |
---|
1353 | n/a | pattern = re.compile('$', re.MULTILINE) |
---|
1354 | n/a | self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' ) |
---|
1355 | n/a | self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#') |
---|
1356 | n/a | self.assertEqual(pattern.sub('#', '\n'), '#\n#') |
---|
1357 | n/a | |
---|
1358 | n/a | def test_bytes_str_mixing(self): |
---|
1359 | n/a | # Mixing str and bytes is disallowed |
---|
1360 | n/a | pat = re.compile('.') |
---|
1361 | n/a | bpat = re.compile(b'.') |
---|
1362 | n/a | self.assertRaises(TypeError, pat.match, b'b') |
---|
1363 | n/a | self.assertRaises(TypeError, bpat.match, 'b') |
---|
1364 | n/a | self.assertRaises(TypeError, pat.sub, b'b', 'c') |
---|
1365 | n/a | self.assertRaises(TypeError, pat.sub, 'b', b'c') |
---|
1366 | n/a | self.assertRaises(TypeError, pat.sub, b'b', b'c') |
---|
1367 | n/a | self.assertRaises(TypeError, bpat.sub, b'b', 'c') |
---|
1368 | n/a | self.assertRaises(TypeError, bpat.sub, 'b', b'c') |
---|
1369 | n/a | self.assertRaises(TypeError, bpat.sub, 'b', 'c') |
---|
1370 | n/a | |
---|
1371 | n/a | def test_ascii_and_unicode_flag(self): |
---|
1372 | n/a | # String patterns |
---|
1373 | n/a | for flags in (0, re.UNICODE): |
---|
1374 | n/a | pat = re.compile('\xc0', flags | re.IGNORECASE) |
---|
1375 | n/a | self.assertTrue(pat.match('\xe0')) |
---|
1376 | n/a | pat = re.compile(r'\w', flags) |
---|
1377 | n/a | self.assertTrue(pat.match('\xe0')) |
---|
1378 | n/a | pat = re.compile('\xc0', re.ASCII | re.IGNORECASE) |
---|
1379 | n/a | self.assertIsNone(pat.match('\xe0')) |
---|
1380 | n/a | pat = re.compile('(?a)\xc0', re.IGNORECASE) |
---|
1381 | n/a | self.assertIsNone(pat.match('\xe0')) |
---|
1382 | n/a | pat = re.compile(r'\w', re.ASCII) |
---|
1383 | n/a | self.assertIsNone(pat.match('\xe0')) |
---|
1384 | n/a | pat = re.compile(r'(?a)\w') |
---|
1385 | n/a | self.assertIsNone(pat.match('\xe0')) |
---|
1386 | n/a | # Bytes patterns |
---|
1387 | n/a | for flags in (0, re.ASCII): |
---|
1388 | n/a | pat = re.compile(b'\xc0', flags | re.IGNORECASE) |
---|
1389 | n/a | self.assertIsNone(pat.match(b'\xe0')) |
---|
1390 | n/a | pat = re.compile(br'\w', flags) |
---|
1391 | n/a | self.assertIsNone(pat.match(b'\xe0')) |
---|
1392 | n/a | # Incompatibilities |
---|
1393 | n/a | self.assertRaises(ValueError, re.compile, br'\w', re.UNICODE) |
---|
1394 | n/a | self.assertRaises(ValueError, re.compile, br'(?u)\w') |
---|
1395 | n/a | self.assertRaises(ValueError, re.compile, r'\w', re.UNICODE | re.ASCII) |
---|
1396 | n/a | self.assertRaises(ValueError, re.compile, r'(?u)\w', re.ASCII) |
---|
1397 | n/a | self.assertRaises(ValueError, re.compile, r'(?a)\w', re.UNICODE) |
---|
1398 | n/a | self.assertRaises(ValueError, re.compile, r'(?au)\w') |
---|
1399 | n/a | |
---|
1400 | n/a | def test_locale_flag(self): |
---|
1401 | n/a | import locale |
---|
1402 | n/a | _, enc = locale.getlocale(locale.LC_CTYPE) |
---|
1403 | n/a | # Search non-ASCII letter |
---|
1404 | n/a | for i in range(128, 256): |
---|
1405 | n/a | try: |
---|
1406 | n/a | c = bytes([i]).decode(enc) |
---|
1407 | n/a | sletter = c.lower() |
---|
1408 | n/a | if sletter == c: continue |
---|
1409 | n/a | bletter = sletter.encode(enc) |
---|
1410 | n/a | if len(bletter) != 1: continue |
---|
1411 | n/a | if bletter.decode(enc) != sletter: continue |
---|
1412 | n/a | bpat = re.escape(bytes([i])) |
---|
1413 | n/a | break |
---|
1414 | n/a | except (UnicodeError, TypeError): |
---|
1415 | n/a | pass |
---|
1416 | n/a | else: |
---|
1417 | n/a | bletter = None |
---|
1418 | n/a | bpat = b'A' |
---|
1419 | n/a | # Bytes patterns |
---|
1420 | n/a | pat = re.compile(bpat, re.LOCALE | re.IGNORECASE) |
---|
1421 | n/a | if bletter: |
---|
1422 | n/a | self.assertTrue(pat.match(bletter)) |
---|
1423 | n/a | pat = re.compile(b'(?L)' + bpat, re.IGNORECASE) |
---|
1424 | n/a | if bletter: |
---|
1425 | n/a | self.assertTrue(pat.match(bletter)) |
---|
1426 | n/a | pat = re.compile(bpat, re.IGNORECASE) |
---|
1427 | n/a | if bletter: |
---|
1428 | n/a | self.assertIsNone(pat.match(bletter)) |
---|
1429 | n/a | pat = re.compile(br'\w', re.LOCALE) |
---|
1430 | n/a | if bletter: |
---|
1431 | n/a | self.assertTrue(pat.match(bletter)) |
---|
1432 | n/a | pat = re.compile(br'(?L)\w') |
---|
1433 | n/a | if bletter: |
---|
1434 | n/a | self.assertTrue(pat.match(bletter)) |
---|
1435 | n/a | pat = re.compile(br'\w') |
---|
1436 | n/a | if bletter: |
---|
1437 | n/a | self.assertIsNone(pat.match(bletter)) |
---|
1438 | n/a | # Incompatibilities |
---|
1439 | n/a | self.assertRaises(ValueError, re.compile, '', re.LOCALE) |
---|
1440 | n/a | self.assertRaises(ValueError, re.compile, '(?L)') |
---|
1441 | n/a | self.assertRaises(ValueError, re.compile, b'', re.LOCALE | re.ASCII) |
---|
1442 | n/a | self.assertRaises(ValueError, re.compile, b'(?L)', re.ASCII) |
---|
1443 | n/a | self.assertRaises(ValueError, re.compile, b'(?a)', re.LOCALE) |
---|
1444 | n/a | self.assertRaises(ValueError, re.compile, b'(?aL)') |
---|
1445 | n/a | |
---|
1446 | n/a | def test_scoped_flags(self): |
---|
1447 | n/a | self.assertTrue(re.match(r'(?i:a)b', 'Ab')) |
---|
1448 | n/a | self.assertIsNone(re.match(r'(?i:a)b', 'aB')) |
---|
1449 | n/a | self.assertIsNone(re.match(r'(?-i:a)b', 'Ab', re.IGNORECASE)) |
---|
1450 | n/a | self.assertTrue(re.match(r'(?-i:a)b', 'aB', re.IGNORECASE)) |
---|
1451 | n/a | self.assertIsNone(re.match(r'(?i:(?-i:a)b)', 'Ab')) |
---|
1452 | n/a | self.assertTrue(re.match(r'(?i:(?-i:a)b)', 'aB')) |
---|
1453 | n/a | |
---|
1454 | n/a | self.assertTrue(re.match(r'(?x: a) b', 'a b')) |
---|
1455 | n/a | self.assertIsNone(re.match(r'(?x: a) b', ' a b')) |
---|
1456 | n/a | self.assertTrue(re.match(r'(?-x: a) b', ' ab', re.VERBOSE)) |
---|
1457 | n/a | self.assertIsNone(re.match(r'(?-x: a) b', 'ab', re.VERBOSE)) |
---|
1458 | n/a | |
---|
1459 | n/a | self.checkPatternError(r'(?a:\w)', |
---|
1460 | n/a | 'bad inline flags: cannot turn on global flag', 3) |
---|
1461 | n/a | self.checkPatternError(r'(?a)(?-a:\w)', |
---|
1462 | n/a | 'bad inline flags: cannot turn off global flag', 8) |
---|
1463 | n/a | self.checkPatternError(r'(?i-i:a)', |
---|
1464 | n/a | 'bad inline flags: flag turned on and off', 5) |
---|
1465 | n/a | |
---|
1466 | n/a | self.checkPatternError(r'(?-', 'missing flag', 3) |
---|
1467 | n/a | self.checkPatternError(r'(?-+', 'missing flag', 3) |
---|
1468 | n/a | self.checkPatternError(r'(?-z', 'unknown flag', 3) |
---|
1469 | n/a | self.checkPatternError(r'(?-i', 'missing :', 4) |
---|
1470 | n/a | self.checkPatternError(r'(?-i)', 'missing :', 4) |
---|
1471 | n/a | self.checkPatternError(r'(?-i+', 'missing :', 4) |
---|
1472 | n/a | self.checkPatternError(r'(?-iz', 'unknown flag', 4) |
---|
1473 | n/a | self.checkPatternError(r'(?i:', 'missing ), unterminated subpattern', 0) |
---|
1474 | n/a | self.checkPatternError(r'(?i', 'missing -, : or )', 3) |
---|
1475 | n/a | self.checkPatternError(r'(?i+', 'missing -, : or )', 3) |
---|
1476 | n/a | self.checkPatternError(r'(?iz', 'unknown flag', 3) |
---|
1477 | n/a | |
---|
1478 | n/a | def test_bug_6509(self): |
---|
1479 | n/a | # Replacement strings of both types must parse properly. |
---|
1480 | n/a | # all strings |
---|
1481 | n/a | pat = re.compile(r'a(\w)') |
---|
1482 | n/a | self.assertEqual(pat.sub('b\\1', 'ac'), 'bc') |
---|
1483 | n/a | pat = re.compile('a(.)') |
---|
1484 | n/a | self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234') |
---|
1485 | n/a | pat = re.compile('..') |
---|
1486 | n/a | self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str') |
---|
1487 | n/a | |
---|
1488 | n/a | # all bytes |
---|
1489 | n/a | pat = re.compile(br'a(\w)') |
---|
1490 | n/a | self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc') |
---|
1491 | n/a | pat = re.compile(b'a(.)') |
---|
1492 | n/a | self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD') |
---|
1493 | n/a | pat = re.compile(b'..') |
---|
1494 | n/a | self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes') |
---|
1495 | n/a | |
---|
1496 | n/a | def test_dealloc(self): |
---|
1497 | n/a | # issue 3299: check for segfault in debug build |
---|
1498 | n/a | import _sre |
---|
1499 | n/a | # the overflow limit is different on wide and narrow builds and it |
---|
1500 | n/a | # depends on the definition of SRE_CODE (see sre.h). |
---|
1501 | n/a | # 2**128 should be big enough to overflow on both. For smaller values |
---|
1502 | n/a | # a RuntimeError is raised instead of OverflowError. |
---|
1503 | n/a | long_overflow = 2**128 |
---|
1504 | n/a | self.assertRaises(TypeError, re.finditer, "a", {}) |
---|
1505 | n/a | with self.assertRaises(OverflowError): |
---|
1506 | n/a | _sre.compile("abc", 0, [long_overflow], 0, {}, ()) |
---|
1507 | n/a | with self.assertRaises(TypeError): |
---|
1508 | n/a | _sre.compile({}, 0, [], 0, [], []) |
---|
1509 | n/a | |
---|
1510 | n/a | def test_search_dot_unicode(self): |
---|
1511 | n/a | self.assertTrue(re.search("123.*-", '123abc-')) |
---|
1512 | n/a | self.assertTrue(re.search("123.*-", '123\xe9-')) |
---|
1513 | n/a | self.assertTrue(re.search("123.*-", '123\u20ac-')) |
---|
1514 | n/a | self.assertTrue(re.search("123.*-", '123\U0010ffff-')) |
---|
1515 | n/a | self.assertTrue(re.search("123.*-", '123\xe9\u20ac\U0010ffff-')) |
---|
1516 | n/a | |
---|
1517 | n/a | def test_compile(self): |
---|
1518 | n/a | # Test return value when given string and pattern as parameter |
---|
1519 | n/a | pattern = re.compile('random pattern') |
---|
1520 | n/a | self.assertIsInstance(pattern, re._pattern_type) |
---|
1521 | n/a | same_pattern = re.compile(pattern) |
---|
1522 | n/a | self.assertIsInstance(same_pattern, re._pattern_type) |
---|
1523 | n/a | self.assertIs(same_pattern, pattern) |
---|
1524 | n/a | # Test behaviour when not given a string or pattern as parameter |
---|
1525 | n/a | self.assertRaises(TypeError, re.compile, 0) |
---|
1526 | n/a | |
---|
1527 | n/a | @bigmemtest(size=_2G, memuse=1) |
---|
1528 | n/a | def test_large_search(self, size): |
---|
1529 | n/a | # Issue #10182: indices were 32-bit-truncated. |
---|
1530 | n/a | s = 'a' * size |
---|
1531 | n/a | m = re.search('$', s) |
---|
1532 | n/a | self.assertIsNotNone(m) |
---|
1533 | n/a | self.assertEqual(m.start(), size) |
---|
1534 | n/a | self.assertEqual(m.end(), size) |
---|
1535 | n/a | |
---|
1536 | n/a | # The huge memuse is because of re.sub() using a list and a join() |
---|
1537 | n/a | # to create the replacement result. |
---|
1538 | n/a | @bigmemtest(size=_2G, memuse=16 + 2) |
---|
1539 | n/a | def test_large_subn(self, size): |
---|
1540 | n/a | # Issue #10182: indices were 32-bit-truncated. |
---|
1541 | n/a | s = 'a' * size |
---|
1542 | n/a | r, n = re.subn('', '', s) |
---|
1543 | n/a | self.assertEqual(r, s) |
---|
1544 | n/a | self.assertEqual(n, size + 1) |
---|
1545 | n/a | |
---|
1546 | n/a | def test_bug_16688(self): |
---|
1547 | n/a | # Issue 16688: Backreferences make case-insensitive regex fail on |
---|
1548 | n/a | # non-ASCII strings. |
---|
1549 | n/a | self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a']) |
---|
1550 | n/a | self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2)) |
---|
1551 | n/a | |
---|
1552 | n/a | def test_repeat_minmax_overflow(self): |
---|
1553 | n/a | # Issue #13169 |
---|
1554 | n/a | string = "x" * 100000 |
---|
1555 | n/a | self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535)) |
---|
1556 | n/a | self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535)) |
---|
1557 | n/a | self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535)) |
---|
1558 | n/a | self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536)) |
---|
1559 | n/a | self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536)) |
---|
1560 | n/a | self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536)) |
---|
1561 | n/a | # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t. |
---|
1562 | n/a | self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128) |
---|
1563 | n/a | self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128) |
---|
1564 | n/a | self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128) |
---|
1565 | n/a | self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128)) |
---|
1566 | n/a | |
---|
1567 | n/a | @cpython_only |
---|
1568 | n/a | def test_repeat_minmax_overflow_maxrepeat(self): |
---|
1569 | n/a | try: |
---|
1570 | n/a | from _sre import MAXREPEAT |
---|
1571 | n/a | except ImportError: |
---|
1572 | n/a | self.skipTest('requires _sre.MAXREPEAT constant') |
---|
1573 | n/a | string = "x" * 100000 |
---|
1574 | n/a | self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string)) |
---|
1575 | n/a | self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(), |
---|
1576 | n/a | (0, 100000)) |
---|
1577 | n/a | self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string)) |
---|
1578 | n/a | self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT) |
---|
1579 | n/a | self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT) |
---|
1580 | n/a | self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT) |
---|
1581 | n/a | |
---|
1582 | n/a | def test_backref_group_name_in_exception(self): |
---|
1583 | n/a | # Issue 17341: Poor error message when compiling invalid regex |
---|
1584 | n/a | self.checkPatternError('(?P=<foo>)', |
---|
1585 | n/a | "bad character in group name '<foo>'", 4) |
---|
1586 | n/a | |
---|
1587 | n/a | def test_group_name_in_exception(self): |
---|
1588 | n/a | # Issue 17341: Poor error message when compiling invalid regex |
---|
1589 | n/a | self.checkPatternError('(?P<?foo>)', |
---|
1590 | n/a | "bad character in group name '?foo'", 4) |
---|
1591 | n/a | |
---|
1592 | n/a | def test_issue17998(self): |
---|
1593 | n/a | for reps in '*', '+', '?', '{1}': |
---|
1594 | n/a | for mod in '', '?': |
---|
1595 | n/a | pattern = '.' + reps + mod + 'yz' |
---|
1596 | n/a | self.assertEqual(re.compile(pattern, re.S).findall('xyz'), |
---|
1597 | n/a | ['xyz'], msg=pattern) |
---|
1598 | n/a | pattern = pattern.encode() |
---|
1599 | n/a | self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'), |
---|
1600 | n/a | [b'xyz'], msg=pattern) |
---|
1601 | n/a | |
---|
1602 | n/a | def test_match_repr(self): |
---|
1603 | n/a | for string in '[abracadabra]', S('[abracadabra]'): |
---|
1604 | n/a | m = re.search(r'(.+)(.*?)\1', string) |
---|
1605 | n/a | self.assertEqual(repr(m), "<%s.%s object; " |
---|
1606 | n/a | "span=(1, 12), match='abracadabra'>" % |
---|
1607 | n/a | (type(m).__module__, type(m).__qualname__)) |
---|
1608 | n/a | for string in (b'[abracadabra]', B(b'[abracadabra]'), |
---|
1609 | n/a | bytearray(b'[abracadabra]'), |
---|
1610 | n/a | memoryview(b'[abracadabra]')): |
---|
1611 | n/a | m = re.search(br'(.+)(.*?)\1', string) |
---|
1612 | n/a | self.assertEqual(repr(m), "<%s.%s object; " |
---|
1613 | n/a | "span=(1, 12), match=b'abracadabra'>" % |
---|
1614 | n/a | (type(m).__module__, type(m).__qualname__)) |
---|
1615 | n/a | |
---|
1616 | n/a | first, second = list(re.finditer("(aa)|(bb)", "aa bb")) |
---|
1617 | n/a | self.assertEqual(repr(first), "<%s.%s object; " |
---|
1618 | n/a | "span=(0, 2), match='aa'>" % |
---|
1619 | n/a | (type(second).__module__, type(first).__qualname__)) |
---|
1620 | n/a | self.assertEqual(repr(second), "<%s.%s object; " |
---|
1621 | n/a | "span=(3, 5), match='bb'>" % |
---|
1622 | n/a | (type(second).__module__, type(second).__qualname__)) |
---|
1623 | n/a | |
---|
1624 | n/a | |
---|
1625 | n/a | def test_bug_2537(self): |
---|
1626 | n/a | # issue 2537: empty submatches |
---|
1627 | n/a | for outer_op in ('{0,}', '*', '+', '{1,187}'): |
---|
1628 | n/a | for inner_op in ('{0,}', '*', '?'): |
---|
1629 | n/a | r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op)) |
---|
1630 | n/a | m = r.match("xyyzy") |
---|
1631 | n/a | self.assertEqual(m.group(0), "xyy") |
---|
1632 | n/a | self.assertEqual(m.group(1), "") |
---|
1633 | n/a | self.assertEqual(m.group(2), "y") |
---|
1634 | n/a | |
---|
1635 | n/a | def test_debug_flag(self): |
---|
1636 | n/a | pat = r'(\.)(?:[ch]|py)(?(1)$|: )' |
---|
1637 | n/a | with captured_stdout() as out: |
---|
1638 | n/a | re.compile(pat, re.DEBUG) |
---|
1639 | n/a | dump = '''\ |
---|
1640 | n/a | SUBPATTERN 1 0 0 |
---|
1641 | n/a | LITERAL 46 |
---|
1642 | n/a | SUBPATTERN None 0 0 |
---|
1643 | n/a | BRANCH |
---|
1644 | n/a | IN |
---|
1645 | n/a | LITERAL 99 |
---|
1646 | n/a | LITERAL 104 |
---|
1647 | n/a | OR |
---|
1648 | n/a | LITERAL 112 |
---|
1649 | n/a | LITERAL 121 |
---|
1650 | n/a | SUBPATTERN None 0 0 |
---|
1651 | n/a | GROUPREF_EXISTS 1 |
---|
1652 | n/a | AT AT_END |
---|
1653 | n/a | ELSE |
---|
1654 | n/a | LITERAL 58 |
---|
1655 | n/a | LITERAL 32 |
---|
1656 | n/a | ''' |
---|
1657 | n/a | self.assertEqual(out.getvalue(), dump) |
---|
1658 | n/a | # Debug output is output again even a second time (bypassing |
---|
1659 | n/a | # the cache -- issue #20426). |
---|
1660 | n/a | with captured_stdout() as out: |
---|
1661 | n/a | re.compile(pat, re.DEBUG) |
---|
1662 | n/a | self.assertEqual(out.getvalue(), dump) |
---|
1663 | n/a | |
---|
1664 | n/a | def test_keyword_parameters(self): |
---|
1665 | n/a | # Issue #20283: Accepting the string keyword parameter. |
---|
1666 | n/a | pat = re.compile(r'(ab)') |
---|
1667 | n/a | self.assertEqual( |
---|
1668 | n/a | pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9)) |
---|
1669 | n/a | self.assertEqual( |
---|
1670 | n/a | pat.fullmatch(string='abracadabra', pos=7, endpos=9).span(), (7, 9)) |
---|
1671 | n/a | self.assertEqual( |
---|
1672 | n/a | pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9)) |
---|
1673 | n/a | self.assertEqual( |
---|
1674 | n/a | pat.findall(string='abracadabra', pos=3, endpos=10), ['ab']) |
---|
1675 | n/a | self.assertEqual( |
---|
1676 | n/a | pat.split(string='abracadabra', maxsplit=1), |
---|
1677 | n/a | ['', 'ab', 'racadabra']) |
---|
1678 | n/a | self.assertEqual( |
---|
1679 | n/a | pat.scanner(string='abracadabra', pos=3, endpos=10).search().span(), |
---|
1680 | n/a | (7, 9)) |
---|
1681 | n/a | |
---|
1682 | n/a | def test_bug_20998(self): |
---|
1683 | n/a | # Issue #20998: Fullmatch of repeated single character pattern |
---|
1684 | n/a | # with ignore case. |
---|
1685 | n/a | self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3)) |
---|
1686 | n/a | |
---|
1687 | n/a | def test_locale_caching(self): |
---|
1688 | n/a | # Issue #22410 |
---|
1689 | n/a | oldlocale = locale.setlocale(locale.LC_CTYPE) |
---|
1690 | n/a | self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) |
---|
1691 | n/a | for loc in 'en_US.iso88591', 'en_US.utf8': |
---|
1692 | n/a | try: |
---|
1693 | n/a | locale.setlocale(locale.LC_CTYPE, loc) |
---|
1694 | n/a | except locale.Error: |
---|
1695 | n/a | # Unsupported locale on this system |
---|
1696 | n/a | self.skipTest('test needs %s locale' % loc) |
---|
1697 | n/a | |
---|
1698 | n/a | re.purge() |
---|
1699 | n/a | self.check_en_US_iso88591() |
---|
1700 | n/a | self.check_en_US_utf8() |
---|
1701 | n/a | re.purge() |
---|
1702 | n/a | self.check_en_US_utf8() |
---|
1703 | n/a | self.check_en_US_iso88591() |
---|
1704 | n/a | |
---|
1705 | n/a | def check_en_US_iso88591(self): |
---|
1706 | n/a | locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591') |
---|
1707 | n/a | self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I)) |
---|
1708 | n/a | self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I)) |
---|
1709 | n/a | self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I)) |
---|
1710 | n/a | self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5')) |
---|
1711 | n/a | self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5')) |
---|
1712 | n/a | self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5')) |
---|
1713 | n/a | |
---|
1714 | n/a | def check_en_US_utf8(self): |
---|
1715 | n/a | locale.setlocale(locale.LC_CTYPE, 'en_US.utf8') |
---|
1716 | n/a | self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I)) |
---|
1717 | n/a | self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I)) |
---|
1718 | n/a | self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I)) |
---|
1719 | n/a | self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5')) |
---|
1720 | n/a | self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5')) |
---|
1721 | n/a | self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5')) |
---|
1722 | n/a | |
---|
1723 | n/a | def test_error(self): |
---|
1724 | n/a | with self.assertRaises(re.error) as cm: |
---|
1725 | n/a | re.compile('(\u20ac))') |
---|
1726 | n/a | err = cm.exception |
---|
1727 | n/a | self.assertIsInstance(err.pattern, str) |
---|
1728 | n/a | self.assertEqual(err.pattern, '(\u20ac))') |
---|
1729 | n/a | self.assertEqual(err.pos, 3) |
---|
1730 | n/a | self.assertEqual(err.lineno, 1) |
---|
1731 | n/a | self.assertEqual(err.colno, 4) |
---|
1732 | n/a | self.assertIn(err.msg, str(err)) |
---|
1733 | n/a | self.assertIn(' at position 3', str(err)) |
---|
1734 | n/a | self.assertNotIn(' at position 3', err.msg) |
---|
1735 | n/a | # Bytes pattern |
---|
1736 | n/a | with self.assertRaises(re.error) as cm: |
---|
1737 | n/a | re.compile(b'(\xa4))') |
---|
1738 | n/a | err = cm.exception |
---|
1739 | n/a | self.assertIsInstance(err.pattern, bytes) |
---|
1740 | n/a | self.assertEqual(err.pattern, b'(\xa4))') |
---|
1741 | n/a | self.assertEqual(err.pos, 3) |
---|
1742 | n/a | # Multiline pattern |
---|
1743 | n/a | with self.assertRaises(re.error) as cm: |
---|
1744 | n/a | re.compile(""" |
---|
1745 | n/a | ( |
---|
1746 | n/a | abc |
---|
1747 | n/a | ) |
---|
1748 | n/a | ) |
---|
1749 | n/a | ( |
---|
1750 | n/a | """, re.VERBOSE) |
---|
1751 | n/a | err = cm.exception |
---|
1752 | n/a | self.assertEqual(err.pos, 77) |
---|
1753 | n/a | self.assertEqual(err.lineno, 5) |
---|
1754 | n/a | self.assertEqual(err.colno, 17) |
---|
1755 | n/a | self.assertIn(err.msg, str(err)) |
---|
1756 | n/a | self.assertIn(' at position 77', str(err)) |
---|
1757 | n/a | self.assertIn('(line 5, column 17)', str(err)) |
---|
1758 | n/a | |
---|
1759 | n/a | def test_misc_errors(self): |
---|
1760 | n/a | self.checkPatternError(r'(', 'missing ), unterminated subpattern', 0) |
---|
1761 | n/a | self.checkPatternError(r'((a|b)', 'missing ), unterminated subpattern', 0) |
---|
1762 | n/a | self.checkPatternError(r'(a|b))', 'unbalanced parenthesis', 5) |
---|
1763 | n/a | self.checkPatternError(r'(?P', 'unexpected end of pattern', 3) |
---|
1764 | n/a | self.checkPatternError(r'(?z)', 'unknown extension ?z', 1) |
---|
1765 | n/a | self.checkPatternError(r'(?iz)', 'unknown flag', 3) |
---|
1766 | n/a | self.checkPatternError(r'(?i', 'missing -, : or )', 3) |
---|
1767 | n/a | self.checkPatternError(r'(?#abc', 'missing ), unterminated comment', 0) |
---|
1768 | n/a | self.checkPatternError(r'(?<', 'unexpected end of pattern', 3) |
---|
1769 | n/a | self.checkPatternError(r'(?<>)', 'unknown extension ?<>', 1) |
---|
1770 | n/a | self.checkPatternError(r'(?', 'unexpected end of pattern', 2) |
---|
1771 | n/a | |
---|
1772 | n/a | def test_enum(self): |
---|
1773 | n/a | # Issue #28082: Check that str(flag) returns a human readable string |
---|
1774 | n/a | # instead of an integer |
---|
1775 | n/a | self.assertIn('ASCII', str(re.A)) |
---|
1776 | n/a | self.assertIn('DOTALL', str(re.S)) |
---|
1777 | n/a | |
---|
1778 | n/a | def test_pattern_compare(self): |
---|
1779 | n/a | pattern1 = re.compile('abc', re.IGNORECASE) |
---|
1780 | n/a | |
---|
1781 | n/a | # equal to itself |
---|
1782 | n/a | self.assertEqual(pattern1, pattern1) |
---|
1783 | n/a | self.assertFalse(pattern1 != pattern1) |
---|
1784 | n/a | |
---|
1785 | n/a | # equal |
---|
1786 | n/a | re.purge() |
---|
1787 | n/a | pattern2 = re.compile('abc', re.IGNORECASE) |
---|
1788 | n/a | self.assertEqual(hash(pattern2), hash(pattern1)) |
---|
1789 | n/a | self.assertEqual(pattern2, pattern1) |
---|
1790 | n/a | |
---|
1791 | n/a | # not equal: different pattern |
---|
1792 | n/a | re.purge() |
---|
1793 | n/a | pattern3 = re.compile('XYZ', re.IGNORECASE) |
---|
1794 | n/a | # Don't test hash(pattern3) != hash(pattern1) because there is no |
---|
1795 | n/a | # warranty that hash values are different |
---|
1796 | n/a | self.assertNotEqual(pattern3, pattern1) |
---|
1797 | n/a | |
---|
1798 | n/a | # not equal: different flag (flags=0) |
---|
1799 | n/a | re.purge() |
---|
1800 | n/a | pattern4 = re.compile('abc') |
---|
1801 | n/a | self.assertNotEqual(pattern4, pattern1) |
---|
1802 | n/a | |
---|
1803 | n/a | # only == and != comparison operators are supported |
---|
1804 | n/a | with self.assertRaises(TypeError): |
---|
1805 | n/a | pattern1 < pattern2 |
---|
1806 | n/a | |
---|
1807 | n/a | def test_pattern_compare_bytes(self): |
---|
1808 | n/a | pattern1 = re.compile(b'abc') |
---|
1809 | n/a | |
---|
1810 | n/a | # equal: test bytes patterns |
---|
1811 | n/a | re.purge() |
---|
1812 | n/a | pattern2 = re.compile(b'abc') |
---|
1813 | n/a | self.assertEqual(hash(pattern2), hash(pattern1)) |
---|
1814 | n/a | self.assertEqual(pattern2, pattern1) |
---|
1815 | n/a | |
---|
1816 | n/a | # not equal: pattern of a different types (str vs bytes), |
---|
1817 | n/a | # comparison must not raise a BytesWarning |
---|
1818 | n/a | re.purge() |
---|
1819 | n/a | pattern3 = re.compile('abc') |
---|
1820 | n/a | with warnings.catch_warnings(): |
---|
1821 | n/a | warnings.simplefilter('error', BytesWarning) |
---|
1822 | n/a | self.assertNotEqual(pattern3, pattern1) |
---|
1823 | n/a | |
---|
1824 | n/a | def test_bug_29444(self): |
---|
1825 | n/a | s = bytearray(b'abcdefgh') |
---|
1826 | n/a | m = re.search(b'[a-h]+', s) |
---|
1827 | n/a | m2 = re.search(b'[e-h]+', s) |
---|
1828 | n/a | self.assertEqual(m.group(), b'abcdefgh') |
---|
1829 | n/a | self.assertEqual(m2.group(), b'efgh') |
---|
1830 | n/a | s[:] = b'xyz' |
---|
1831 | n/a | self.assertEqual(m.group(), b'xyz') |
---|
1832 | n/a | self.assertEqual(m2.group(), b'') |
---|
1833 | n/a | |
---|
1834 | n/a | |
---|
1835 | n/a | class PatternReprTests(unittest.TestCase): |
---|
1836 | n/a | def check(self, pattern, expected): |
---|
1837 | n/a | self.assertEqual(repr(re.compile(pattern)), expected) |
---|
1838 | n/a | |
---|
1839 | n/a | def check_flags(self, pattern, flags, expected): |
---|
1840 | n/a | self.assertEqual(repr(re.compile(pattern, flags)), expected) |
---|
1841 | n/a | |
---|
1842 | n/a | def test_without_flags(self): |
---|
1843 | n/a | self.check('random pattern', |
---|
1844 | n/a | "re.compile('random pattern')") |
---|
1845 | n/a | |
---|
1846 | n/a | def test_single_flag(self): |
---|
1847 | n/a | self.check_flags('random pattern', re.IGNORECASE, |
---|
1848 | n/a | "re.compile('random pattern', re.IGNORECASE)") |
---|
1849 | n/a | |
---|
1850 | n/a | def test_multiple_flags(self): |
---|
1851 | n/a | self.check_flags('random pattern', re.I|re.S|re.X, |
---|
1852 | n/a | "re.compile('random pattern', " |
---|
1853 | n/a | "re.IGNORECASE|re.DOTALL|re.VERBOSE)") |
---|
1854 | n/a | |
---|
1855 | n/a | def test_unicode_flag(self): |
---|
1856 | n/a | self.check_flags('random pattern', re.U, |
---|
1857 | n/a | "re.compile('random pattern')") |
---|
1858 | n/a | self.check_flags('random pattern', re.I|re.S|re.U, |
---|
1859 | n/a | "re.compile('random pattern', " |
---|
1860 | n/a | "re.IGNORECASE|re.DOTALL)") |
---|
1861 | n/a | |
---|
1862 | n/a | def test_inline_flags(self): |
---|
1863 | n/a | self.check('(?i)pattern', |
---|
1864 | n/a | "re.compile('(?i)pattern', re.IGNORECASE)") |
---|
1865 | n/a | |
---|
1866 | n/a | def test_unknown_flags(self): |
---|
1867 | n/a | self.check_flags('random pattern', 0x123000, |
---|
1868 | n/a | "re.compile('random pattern', 0x123000)") |
---|
1869 | n/a | self.check_flags('random pattern', 0x123000|re.I, |
---|
1870 | n/a | "re.compile('random pattern', re.IGNORECASE|0x123000)") |
---|
1871 | n/a | |
---|
1872 | n/a | def test_bytes(self): |
---|
1873 | n/a | self.check(b'bytes pattern', |
---|
1874 | n/a | "re.compile(b'bytes pattern')") |
---|
1875 | n/a | self.check_flags(b'bytes pattern', re.A, |
---|
1876 | n/a | "re.compile(b'bytes pattern', re.ASCII)") |
---|
1877 | n/a | |
---|
1878 | n/a | def test_locale(self): |
---|
1879 | n/a | self.check_flags(b'bytes pattern', re.L, |
---|
1880 | n/a | "re.compile(b'bytes pattern', re.LOCALE)") |
---|
1881 | n/a | |
---|
1882 | n/a | def test_quotes(self): |
---|
1883 | n/a | self.check('random "double quoted" pattern', |
---|
1884 | n/a | '''re.compile('random "double quoted" pattern')''') |
---|
1885 | n/a | self.check("random 'single quoted' pattern", |
---|
1886 | n/a | '''re.compile("random 'single quoted' pattern")''') |
---|
1887 | n/a | self.check('''both 'single' and "double" quotes''', |
---|
1888 | n/a | '''re.compile('both \\'single\\' and "double" quotes')''') |
---|
1889 | n/a | |
---|
1890 | n/a | def test_long_pattern(self): |
---|
1891 | n/a | pattern = 'Very %spattern' % ('long ' * 1000) |
---|
1892 | n/a | r = repr(re.compile(pattern)) |
---|
1893 | n/a | self.assertLess(len(r), 300) |
---|
1894 | n/a | self.assertEqual(r[:30], "re.compile('Very long long lon") |
---|
1895 | n/a | r = repr(re.compile(pattern, re.I)) |
---|
1896 | n/a | self.assertLess(len(r), 300) |
---|
1897 | n/a | self.assertEqual(r[:30], "re.compile('Very long long lon") |
---|
1898 | n/a | self.assertEqual(r[-16:], ", re.IGNORECASE)") |
---|
1899 | n/a | |
---|
1900 | n/a | |
---|
1901 | n/a | class ImplementationTest(unittest.TestCase): |
---|
1902 | n/a | """ |
---|
1903 | n/a | Test implementation details of the re module. |
---|
1904 | n/a | """ |
---|
1905 | n/a | |
---|
1906 | n/a | def test_overlap_table(self): |
---|
1907 | n/a | f = sre_compile._generate_overlap_table |
---|
1908 | n/a | self.assertEqual(f(""), []) |
---|
1909 | n/a | self.assertEqual(f("a"), [0]) |
---|
1910 | n/a | self.assertEqual(f("abcd"), [0, 0, 0, 0]) |
---|
1911 | n/a | self.assertEqual(f("aaaa"), [0, 1, 2, 3]) |
---|
1912 | n/a | self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1]) |
---|
1913 | n/a | self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0]) |
---|
1914 | n/a | |
---|
1915 | n/a | |
---|
1916 | n/a | class ExternalTests(unittest.TestCase): |
---|
1917 | n/a | |
---|
1918 | n/a | def test_re_benchmarks(self): |
---|
1919 | n/a | 're_tests benchmarks' |
---|
1920 | n/a | from test.re_tests import benchmarks |
---|
1921 | n/a | for pattern, s in benchmarks: |
---|
1922 | n/a | with self.subTest(pattern=pattern, string=s): |
---|
1923 | n/a | p = re.compile(pattern) |
---|
1924 | n/a | self.assertTrue(p.search(s)) |
---|
1925 | n/a | self.assertTrue(p.match(s)) |
---|
1926 | n/a | self.assertTrue(p.fullmatch(s)) |
---|
1927 | n/a | s2 = ' '*10000 + s + ' '*10000 |
---|
1928 | n/a | self.assertTrue(p.search(s2)) |
---|
1929 | n/a | self.assertTrue(p.match(s2, 10000)) |
---|
1930 | n/a | self.assertTrue(p.match(s2, 10000, 10000 + len(s))) |
---|
1931 | n/a | self.assertTrue(p.fullmatch(s2, 10000, 10000 + len(s))) |
---|
1932 | n/a | |
---|
1933 | n/a | def test_re_tests(self): |
---|
1934 | n/a | 're_tests test suite' |
---|
1935 | n/a | from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR |
---|
1936 | n/a | for t in tests: |
---|
1937 | n/a | pattern = s = outcome = repl = expected = None |
---|
1938 | n/a | if len(t) == 5: |
---|
1939 | n/a | pattern, s, outcome, repl, expected = t |
---|
1940 | n/a | elif len(t) == 3: |
---|
1941 | n/a | pattern, s, outcome = t |
---|
1942 | n/a | else: |
---|
1943 | n/a | raise ValueError('Test tuples should have 3 or 5 fields', t) |
---|
1944 | n/a | |
---|
1945 | n/a | with self.subTest(pattern=pattern, string=s): |
---|
1946 | n/a | if outcome == SYNTAX_ERROR: # Expected a syntax error |
---|
1947 | n/a | with self.assertRaises(re.error): |
---|
1948 | n/a | re.compile(pattern) |
---|
1949 | n/a | continue |
---|
1950 | n/a | |
---|
1951 | n/a | obj = re.compile(pattern) |
---|
1952 | n/a | result = obj.search(s) |
---|
1953 | n/a | if outcome == FAIL: |
---|
1954 | n/a | self.assertIsNone(result, 'Succeeded incorrectly') |
---|
1955 | n/a | continue |
---|
1956 | n/a | |
---|
1957 | n/a | with self.subTest(): |
---|
1958 | n/a | self.assertTrue(result, 'Failed incorrectly') |
---|
1959 | n/a | # Matched, as expected, so now we compute the |
---|
1960 | n/a | # result string and compare it to our expected result. |
---|
1961 | n/a | start, end = result.span(0) |
---|
1962 | n/a | vardict = {'found': result.group(0), |
---|
1963 | n/a | 'groups': result.group(), |
---|
1964 | n/a | 'flags': result.re.flags} |
---|
1965 | n/a | for i in range(1, 100): |
---|
1966 | n/a | try: |
---|
1967 | n/a | gi = result.group(i) |
---|
1968 | n/a | # Special hack because else the string concat fails: |
---|
1969 | n/a | if gi is None: |
---|
1970 | n/a | gi = "None" |
---|
1971 | n/a | except IndexError: |
---|
1972 | n/a | gi = "Error" |
---|
1973 | n/a | vardict['g%d' % i] = gi |
---|
1974 | n/a | for i in result.re.groupindex.keys(): |
---|
1975 | n/a | try: |
---|
1976 | n/a | gi = result.group(i) |
---|
1977 | n/a | if gi is None: |
---|
1978 | n/a | gi = "None" |
---|
1979 | n/a | except IndexError: |
---|
1980 | n/a | gi = "Error" |
---|
1981 | n/a | vardict[i] = gi |
---|
1982 | n/a | self.assertEqual(eval(repl, vardict), expected, |
---|
1983 | n/a | 'grouping error') |
---|
1984 | n/a | |
---|
1985 | n/a | # Try the match with both pattern and string converted to |
---|
1986 | n/a | # bytes, and check that it still succeeds. |
---|
1987 | n/a | try: |
---|
1988 | n/a | bpat = bytes(pattern, "ascii") |
---|
1989 | n/a | bs = bytes(s, "ascii") |
---|
1990 | n/a | except UnicodeEncodeError: |
---|
1991 | n/a | # skip non-ascii tests |
---|
1992 | n/a | pass |
---|
1993 | n/a | else: |
---|
1994 | n/a | with self.subTest('bytes pattern match'): |
---|
1995 | n/a | obj = re.compile(bpat) |
---|
1996 | n/a | self.assertTrue(obj.search(bs)) |
---|
1997 | n/a | |
---|
1998 | n/a | # Try the match with LOCALE enabled, and check that it |
---|
1999 | n/a | # still succeeds. |
---|
2000 | n/a | with self.subTest('locale-sensitive match'): |
---|
2001 | n/a | obj = re.compile(bpat, re.LOCALE) |
---|
2002 | n/a | result = obj.search(bs) |
---|
2003 | n/a | if result is None: |
---|
2004 | n/a | print('=== Fails on locale-sensitive match', t) |
---|
2005 | n/a | |
---|
2006 | n/a | # Try the match with the search area limited to the extent |
---|
2007 | n/a | # of the match and see if it still succeeds. \B will |
---|
2008 | n/a | # break (because it won't match at the end or start of a |
---|
2009 | n/a | # string), so we'll ignore patterns that feature it. |
---|
2010 | n/a | if (pattern[:2] != r'\B' and pattern[-2:] != r'\B' |
---|
2011 | n/a | and result is not None): |
---|
2012 | n/a | with self.subTest('range-limited match'): |
---|
2013 | n/a | obj = re.compile(pattern) |
---|
2014 | n/a | self.assertTrue(obj.search(s, start, end + 1)) |
---|
2015 | n/a | |
---|
2016 | n/a | # Try the match with IGNORECASE enabled, and check that it |
---|
2017 | n/a | # still succeeds. |
---|
2018 | n/a | with self.subTest('case-insensitive match'): |
---|
2019 | n/a | obj = re.compile(pattern, re.IGNORECASE) |
---|
2020 | n/a | self.assertTrue(obj.search(s)) |
---|
2021 | n/a | |
---|
2022 | n/a | # Try the match with UNICODE locale enabled, and check |
---|
2023 | n/a | # that it still succeeds. |
---|
2024 | n/a | with self.subTest('unicode-sensitive match'): |
---|
2025 | n/a | obj = re.compile(pattern, re.UNICODE) |
---|
2026 | n/a | self.assertTrue(obj.search(s)) |
---|
2027 | n/a | |
---|
2028 | n/a | |
---|
2029 | n/a | if __name__ == "__main__": |
---|
2030 | n/a | unittest.main() |
---|