ยปCore Development>Code coverage>Tools/stringbench/stringbench.py

Python code coverage for Tools/stringbench/stringbench.py

#countcontent
1n/a
2n/a# Various microbenchmarks comparing unicode and byte string performance
3n/a# Please keep this file both 2.x and 3.x compatible!
4n/a
5n/aimport timeit
6n/aimport itertools
7n/aimport operator
8n/aimport re
9n/aimport sys
10n/aimport datetime
11n/aimport optparse
12n/a
13n/aVERSION = '2.0'
14n/a
15n/adef p(*args):
16n/a sys.stdout.write(' '.join(str(s) for s in args) + '\n')
17n/a
18n/aif sys.version_info >= (3,):
19n/a BYTES = bytes_from_str = lambda x: x.encode('ascii')
20n/a UNICODE = unicode_from_str = lambda x: x
21n/aelse:
22n/a BYTES = bytes_from_str = lambda x: x
23n/a UNICODE = unicode_from_str = lambda x: x.decode('ascii')
24n/a
25n/aclass UnsupportedType(TypeError):
26n/a pass
27n/a
28n/a
29n/ap('stringbench v%s' % VERSION)
30n/ap(sys.version)
31n/ap(datetime.datetime.now())
32n/a
33n/aREPEAT = 1
34n/aREPEAT = 3
35n/a#REPEAT = 7
36n/a
37n/aif __name__ != "__main__":
38n/a raise SystemExit("Must run as main program")
39n/a
40n/aparser = optparse.OptionParser()
41n/aparser.add_option("-R", "--skip-re", dest="skip_re",
42n/a action="store_true",
43n/a help="skip regular expression tests")
44n/aparser.add_option("-8", "--8-bit", dest="bytes_only",
45n/a action="store_true",
46n/a help="only do 8-bit string benchmarks")
47n/aparser.add_option("-u", "--unicode", dest="unicode_only",
48n/a action="store_true",
49n/a help="only do Unicode string benchmarks")
50n/a
51n/a
52n/a_RANGE_1000 = list(range(1000))
53n/a_RANGE_100 = list(range(100))
54n/a_RANGE_10 = list(range(10))
55n/a
56n/adups = {}
57n/adef bench(s, group, repeat_count):
58n/a def blah(f):
59n/a if f.__name__ in dups:
60n/a raise AssertionError("Multiple functions with same name: %r" %
61n/a (f.__name__,))
62n/a dups[f.__name__] = 1
63n/a f.comment = s
64n/a f.is_bench = True
65n/a f.group = group
66n/a f.repeat_count = repeat_count
67n/a return f
68n/a return blah
69n/a
70n/adef uses_re(f):
71n/a f.uses_re = True
72n/a
73n/a####### 'in' comparisons
74n/a
75n/a@bench('"A" in "A"*1000', "early match, single character", 1000)
76n/adef in_test_quick_match_single_character(STR):
77n/a s1 = STR("A" * 1000)
78n/a s2 = STR("A")
79n/a for x in _RANGE_1000:
80n/a s2 in s1
81n/a
82n/a@bench('"B" in "A"*1000', "no match, single character", 1000)
83n/adef in_test_no_match_single_character(STR):
84n/a s1 = STR("A" * 1000)
85n/a s2 = STR("B")
86n/a for x in _RANGE_1000:
87n/a s2 in s1
88n/a
89n/a
90n/a@bench('"AB" in "AB"*1000', "early match, two characters", 1000)
91n/adef in_test_quick_match_two_characters(STR):
92n/a s1 = STR("AB" * 1000)
93n/a s2 = STR("AB")
94n/a for x in _RANGE_1000:
95n/a s2 in s1
96n/a
97n/a@bench('"BC" in "AB"*1000', "no match, two characters", 1000)
98n/adef in_test_no_match_two_character(STR):
99n/a s1 = STR("AB" * 1000)
100n/a s2 = STR("BC")
101n/a for x in _RANGE_1000:
102n/a s2 in s1
103n/a
104n/a@bench('"BC" in ("AB"*300+"C")', "late match, two characters", 1000)
105n/adef in_test_slow_match_two_characters(STR):
106n/a s1 = STR("AB" * 300+"C")
107n/a s2 = STR("BC")
108n/a for x in _RANGE_1000:
109n/a s2 in s1
110n/a
111n/a@bench('s="ABC"*33; (s+"E") in ((s+"D")*300+s+"E")',
112n/a "late match, 100 characters", 100)
113n/adef in_test_slow_match_100_characters(STR):
114n/a m = STR("ABC"*33)
115n/a d = STR("D")
116n/a e = STR("E")
117n/a s1 = (m+d)*300 + m+e
118n/a s2 = m+e
119n/a for x in _RANGE_100:
120n/a s2 in s1
121n/a
122n/a# Try with regex
123n/a@uses_re
124n/a@bench('s="ABC"*33; re.compile(s+"D").search((s+"D")*300+s+"E")',
125n/a "late match, 100 characters", 100)
126n/adef re_test_slow_match_100_characters(STR):
127n/a m = STR("ABC"*33)
128n/a d = STR("D")
129n/a e = STR("E")
130n/a s1 = (m+d)*300 + m+e
131n/a s2 = m+e
132n/a pat = re.compile(s2)
133n/a search = pat.search
134n/a for x in _RANGE_100:
135n/a search(s1)
136n/a
137n/a
138n/a#### same tests as 'in' but use 'find'
139n/a
140n/a@bench('("A"*1000).find("A")', "early match, single character", 1000)
141n/adef find_test_quick_match_single_character(STR):
142n/a s1 = STR("A" * 1000)
143n/a s2 = STR("A")
144n/a s1_find = s1.find
145n/a for x in _RANGE_1000:
146n/a s1_find(s2)
147n/a
148n/a@bench('("A"*1000).find("B")', "no match, single character", 1000)
149n/adef find_test_no_match_single_character(STR):
150n/a s1 = STR("A" * 1000)
151n/a s2 = STR("B")
152n/a s1_find = s1.find
153n/a for x in _RANGE_1000:
154n/a s1_find(s2)
155n/a
156n/a
157n/a@bench('("AB"*1000).find("AB")', "early match, two characters", 1000)
158n/adef find_test_quick_match_two_characters(STR):
159n/a s1 = STR("AB" * 1000)
160n/a s2 = STR("AB")
161n/a s1_find = s1.find
162n/a for x in _RANGE_1000:
163n/a s1_find(s2)
164n/a
165n/a@bench('("AB"*1000).find("BC")', "no match, two characters", 1000)
166n/adef find_test_no_match_two_character(STR):
167n/a s1 = STR("AB" * 1000)
168n/a s2 = STR("BC")
169n/a s1_find = s1.find
170n/a for x in _RANGE_1000:
171n/a s1_find(s2)
172n/a
173n/a@bench('("AB"*1000).find("CA")', "no match, two characters", 1000)
174n/adef find_test_no_match_two_character_bis(STR):
175n/a s1 = STR("AB" * 1000)
176n/a s2 = STR("CA")
177n/a s1_find = s1.find
178n/a for x in _RANGE_1000:
179n/a s1_find(s2)
180n/a
181n/a@bench('("AB"*300+"C").find("BC")', "late match, two characters", 1000)
182n/adef find_test_slow_match_two_characters(STR):
183n/a s1 = STR("AB" * 300+"C")
184n/a s2 = STR("BC")
185n/a s1_find = s1.find
186n/a for x in _RANGE_1000:
187n/a s1_find(s2)
188n/a
189n/a@bench('("AB"*300+"CA").find("CA")', "late match, two characters", 1000)
190n/adef find_test_slow_match_two_characters_bis(STR):
191n/a s1 = STR("AB" * 300+"CA")
192n/a s2 = STR("CA")
193n/a s1_find = s1.find
194n/a for x in _RANGE_1000:
195n/a s1_find(s2)
196n/a
197n/a@bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s+"E")',
198n/a "late match, 100 characters", 100)
199n/adef find_test_slow_match_100_characters(STR):
200n/a m = STR("ABC"*33)
201n/a d = STR("D")
202n/a e = STR("E")
203n/a s1 = (m+d)*500 + m+e
204n/a s2 = m+e
205n/a s1_find = s1.find
206n/a for x in _RANGE_100:
207n/a s1_find(s2)
208n/a
209n/a@bench('s="ABC"*33; ((s+"D")*500+"E"+s).find("E"+s)',
210n/a "late match, 100 characters", 100)
211n/adef find_test_slow_match_100_characters_bis(STR):
212n/a m = STR("ABC"*33)
213n/a d = STR("D")
214n/a e = STR("E")
215n/a s1 = (m+d)*500 + e+m
216n/a s2 = e+m
217n/a s1_find = s1.find
218n/a for x in _RANGE_100:
219n/a s1_find(s2)
220n/a
221n/a
222n/a#### Same tests for 'rfind'
223n/a
224n/a@bench('("A"*1000).rfind("A")', "early match, single character", 1000)
225n/adef rfind_test_quick_match_single_character(STR):
226n/a s1 = STR("A" * 1000)
227n/a s2 = STR("A")
228n/a s1_rfind = s1.rfind
229n/a for x in _RANGE_1000:
230n/a s1_rfind(s2)
231n/a
232n/a@bench('("A"*1000).rfind("B")', "no match, single character", 1000)
233n/adef rfind_test_no_match_single_character(STR):
234n/a s1 = STR("A" * 1000)
235n/a s2 = STR("B")
236n/a s1_rfind = s1.rfind
237n/a for x in _RANGE_1000:
238n/a s1_rfind(s2)
239n/a
240n/a
241n/a@bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000)
242n/adef rfind_test_quick_match_two_characters(STR):
243n/a s1 = STR("AB" * 1000)
244n/a s2 = STR("AB")
245n/a s1_rfind = s1.rfind
246n/a for x in _RANGE_1000:
247n/a s1_rfind(s2)
248n/a
249n/a@bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000)
250n/adef rfind_test_no_match_two_character(STR):
251n/a s1 = STR("AB" * 1000)
252n/a s2 = STR("BC")
253n/a s1_rfind = s1.rfind
254n/a for x in _RANGE_1000:
255n/a s1_rfind(s2)
256n/a
257n/a@bench('("AB"*1000).rfind("CA")', "no match, two characters", 1000)
258n/adef rfind_test_no_match_two_character_bis(STR):
259n/a s1 = STR("AB" * 1000)
260n/a s2 = STR("CA")
261n/a s1_rfind = s1.rfind
262n/a for x in _RANGE_1000:
263n/a s1_rfind(s2)
264n/a
265n/a@bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000)
266n/adef rfind_test_slow_match_two_characters(STR):
267n/a s1 = STR("C" + "AB" * 300)
268n/a s2 = STR("CA")
269n/a s1_rfind = s1.rfind
270n/a for x in _RANGE_1000:
271n/a s1_rfind(s2)
272n/a
273n/a@bench('("BC"+"AB"*300).rfind("BC")', "late match, two characters", 1000)
274n/adef rfind_test_slow_match_two_characters_bis(STR):
275n/a s1 = STR("BC" + "AB" * 300)
276n/a s2 = STR("BC")
277n/a s1_rfind = s1.rfind
278n/a for x in _RANGE_1000:
279n/a s1_rfind(s2)
280n/a
281n/a@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rfind("E"+s)',
282n/a "late match, 100 characters", 100)
283n/adef rfind_test_slow_match_100_characters(STR):
284n/a m = STR("ABC"*33)
285n/a d = STR("D")
286n/a e = STR("E")
287n/a s1 = e+m + (d+m)*500
288n/a s2 = e+m
289n/a s1_rfind = s1.rfind
290n/a for x in _RANGE_100:
291n/a s1_rfind(s2)
292n/a
293n/a@bench('s="ABC"*33; (s+"E"+("D"+s)*500).rfind(s+"E")',
294n/a "late match, 100 characters", 100)
295n/adef rfind_test_slow_match_100_characters_bis(STR):
296n/a m = STR("ABC"*33)
297n/a d = STR("D")
298n/a e = STR("E")
299n/a s1 = m+e + (d+m)*500
300n/a s2 = m+e
301n/a s1_rfind = s1.rfind
302n/a for x in _RANGE_100:
303n/a s1_rfind(s2)
304n/a
305n/a
306n/a#### Now with index.
307n/a# Skip the ones which fail because that would include exception overhead.
308n/a
309n/a@bench('("A"*1000).index("A")', "early match, single character", 1000)
310n/adef index_test_quick_match_single_character(STR):
311n/a s1 = STR("A" * 1000)
312n/a s2 = STR("A")
313n/a s1_index = s1.index
314n/a for x in _RANGE_1000:
315n/a s1_index(s2)
316n/a
317n/a@bench('("AB"*1000).index("AB")', "early match, two characters", 1000)
318n/adef index_test_quick_match_two_characters(STR):
319n/a s1 = STR("AB" * 1000)
320n/a s2 = STR("AB")
321n/a s1_index = s1.index
322n/a for x in _RANGE_1000:
323n/a s1_index(s2)
324n/a
325n/a@bench('("AB"*300+"C").index("BC")', "late match, two characters", 1000)
326n/adef index_test_slow_match_two_characters(STR):
327n/a s1 = STR("AB" * 300+"C")
328n/a s2 = STR("BC")
329n/a s1_index = s1.index
330n/a for x in _RANGE_1000:
331n/a s1_index(s2)
332n/a
333n/a@bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s+"E")',
334n/a "late match, 100 characters", 100)
335n/adef index_test_slow_match_100_characters(STR):
336n/a m = STR("ABC"*33)
337n/a d = STR("D")
338n/a e = STR("E")
339n/a s1 = (m+d)*500 + m+e
340n/a s2 = m+e
341n/a s1_index = s1.index
342n/a for x in _RANGE_100:
343n/a s1_index(s2)
344n/a
345n/a
346n/a#### Same for rindex
347n/a
348n/a@bench('("A"*1000).rindex("A")', "early match, single character", 1000)
349n/adef rindex_test_quick_match_single_character(STR):
350n/a s1 = STR("A" * 1000)
351n/a s2 = STR("A")
352n/a s1_rindex = s1.rindex
353n/a for x in _RANGE_1000:
354n/a s1_rindex(s2)
355n/a
356n/a@bench('("AB"*1000).rindex("AB")', "early match, two characters", 1000)
357n/adef rindex_test_quick_match_two_characters(STR):
358n/a s1 = STR("AB" * 1000)
359n/a s2 = STR("AB")
360n/a s1_rindex = s1.rindex
361n/a for x in _RANGE_1000:
362n/a s1_rindex(s2)
363n/a
364n/a@bench('("C"+"AB"*300).rindex("CA")', "late match, two characters", 1000)
365n/adef rindex_test_slow_match_two_characters(STR):
366n/a s1 = STR("C" + "AB" * 300)
367n/a s2 = STR("CA")
368n/a s1_rindex = s1.rindex
369n/a for x in _RANGE_1000:
370n/a s1_rindex(s2)
371n/a
372n/a@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rindex("E"+s)',
373n/a "late match, 100 characters", 100)
374n/adef rindex_test_slow_match_100_characters(STR):
375n/a m = STR("ABC"*33)
376n/a d = STR("D")
377n/a e = STR("E")
378n/a s1 = e + m + (d+m)*500
379n/a s2 = e + m
380n/a s1_rindex = s1.rindex
381n/a for x in _RANGE_100:
382n/a s1_rindex(s2)
383n/a
384n/a
385n/a#### Same for partition
386n/a
387n/a@bench('("A"*1000).partition("A")', "early match, single character", 1000)
388n/adef partition_test_quick_match_single_character(STR):
389n/a s1 = STR("A" * 1000)
390n/a s2 = STR("A")
391n/a s1_partition = s1.partition
392n/a for x in _RANGE_1000:
393n/a s1_partition(s2)
394n/a
395n/a@bench('("A"*1000).partition("B")', "no match, single character", 1000)
396n/adef partition_test_no_match_single_character(STR):
397n/a s1 = STR("A" * 1000)
398n/a s2 = STR("B")
399n/a s1_partition = s1.partition
400n/a for x in _RANGE_1000:
401n/a s1_partition(s2)
402n/a
403n/a
404n/a@bench('("AB"*1000).partition("AB")', "early match, two characters", 1000)
405n/adef partition_test_quick_match_two_characters(STR):
406n/a s1 = STR("AB" * 1000)
407n/a s2 = STR("AB")
408n/a s1_partition = s1.partition
409n/a for x in _RANGE_1000:
410n/a s1_partition(s2)
411n/a
412n/a@bench('("AB"*1000).partition("BC")', "no match, two characters", 1000)
413n/adef partition_test_no_match_two_character(STR):
414n/a s1 = STR("AB" * 1000)
415n/a s2 = STR("BC")
416n/a s1_partition = s1.partition
417n/a for x in _RANGE_1000:
418n/a s1_partition(s2)
419n/a
420n/a@bench('("AB"*300+"C").partition("BC")', "late match, two characters", 1000)
421n/adef partition_test_slow_match_two_characters(STR):
422n/a s1 = STR("AB" * 300+"C")
423n/a s2 = STR("BC")
424n/a s1_partition = s1.partition
425n/a for x in _RANGE_1000:
426n/a s1_partition(s2)
427n/a
428n/a@bench('s="ABC"*33; ((s+"D")*500+s+"E").partition(s+"E")',
429n/a "late match, 100 characters", 100)
430n/adef partition_test_slow_match_100_characters(STR):
431n/a m = STR("ABC"*33)
432n/a d = STR("D")
433n/a e = STR("E")
434n/a s1 = (m+d)*500 + m+e
435n/a s2 = m+e
436n/a s1_partition = s1.partition
437n/a for x in _RANGE_100:
438n/a s1_partition(s2)
439n/a
440n/a
441n/a#### Same for rpartition
442n/a
443n/a@bench('("A"*1000).rpartition("A")', "early match, single character", 1000)
444n/adef rpartition_test_quick_match_single_character(STR):
445n/a s1 = STR("A" * 1000)
446n/a s2 = STR("A")
447n/a s1_rpartition = s1.rpartition
448n/a for x in _RANGE_1000:
449n/a s1_rpartition(s2)
450n/a
451n/a@bench('("A"*1000).rpartition("B")', "no match, single character", 1000)
452n/adef rpartition_test_no_match_single_character(STR):
453n/a s1 = STR("A" * 1000)
454n/a s2 = STR("B")
455n/a s1_rpartition = s1.rpartition
456n/a for x in _RANGE_1000:
457n/a s1_rpartition(s2)
458n/a
459n/a
460n/a@bench('("AB"*1000).rpartition("AB")', "early match, two characters", 1000)
461n/adef rpartition_test_quick_match_two_characters(STR):
462n/a s1 = STR("AB" * 1000)
463n/a s2 = STR("AB")
464n/a s1_rpartition = s1.rpartition
465n/a for x in _RANGE_1000:
466n/a s1_rpartition(s2)
467n/a
468n/a@bench('("AB"*1000).rpartition("BC")', "no match, two characters", 1000)
469n/adef rpartition_test_no_match_two_character(STR):
470n/a s1 = STR("AB" * 1000)
471n/a s2 = STR("BC")
472n/a s1_rpartition = s1.rpartition
473n/a for x in _RANGE_1000:
474n/a s1_rpartition(s2)
475n/a
476n/a@bench('("C"+"AB"*300).rpartition("CA")', "late match, two characters", 1000)
477n/adef rpartition_test_slow_match_two_characters(STR):
478n/a s1 = STR("C" + "AB" * 300)
479n/a s2 = STR("CA")
480n/a s1_rpartition = s1.rpartition
481n/a for x in _RANGE_1000:
482n/a s1_rpartition(s2)
483n/a
484n/a@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rpartition("E"+s)',
485n/a "late match, 100 characters", 100)
486n/adef rpartition_test_slow_match_100_characters(STR):
487n/a m = STR("ABC"*33)
488n/a d = STR("D")
489n/a e = STR("E")
490n/a s1 = e + m + (d+m)*500
491n/a s2 = e + m
492n/a s1_rpartition = s1.rpartition
493n/a for x in _RANGE_100:
494n/a s1_rpartition(s2)
495n/a
496n/a
497n/a#### Same for split(s, 1)
498n/a
499n/a@bench('("A"*1000).split("A", 1)', "early match, single character", 1000)
500n/adef split_test_quick_match_single_character(STR):
501n/a s1 = STR("A" * 1000)
502n/a s2 = STR("A")
503n/a s1_split = s1.split
504n/a for x in _RANGE_1000:
505n/a s1_split(s2, 1)
506n/a
507n/a@bench('("A"*1000).split("B", 1)', "no match, single character", 1000)
508n/adef split_test_no_match_single_character(STR):
509n/a s1 = STR("A" * 1000)
510n/a s2 = STR("B")
511n/a s1_split = s1.split
512n/a for x in _RANGE_1000:
513n/a s1_split(s2, 1)
514n/a
515n/a
516n/a@bench('("AB"*1000).split("AB", 1)', "early match, two characters", 1000)
517n/adef split_test_quick_match_two_characters(STR):
518n/a s1 = STR("AB" * 1000)
519n/a s2 = STR("AB")
520n/a s1_split = s1.split
521n/a for x in _RANGE_1000:
522n/a s1_split(s2, 1)
523n/a
524n/a@bench('("AB"*1000).split("BC", 1)', "no match, two characters", 1000)
525n/adef split_test_no_match_two_character(STR):
526n/a s1 = STR("AB" * 1000)
527n/a s2 = STR("BC")
528n/a s1_split = s1.split
529n/a for x in _RANGE_1000:
530n/a s1_split(s2, 1)
531n/a
532n/a@bench('("AB"*300+"C").split("BC", 1)', "late match, two characters", 1000)
533n/adef split_test_slow_match_two_characters(STR):
534n/a s1 = STR("AB" * 300+"C")
535n/a s2 = STR("BC")
536n/a s1_split = s1.split
537n/a for x in _RANGE_1000:
538n/a s1_split(s2, 1)
539n/a
540n/a@bench('s="ABC"*33; ((s+"D")*500+s+"E").split(s+"E", 1)',
541n/a "late match, 100 characters", 100)
542n/adef split_test_slow_match_100_characters(STR):
543n/a m = STR("ABC"*33)
544n/a d = STR("D")
545n/a e = STR("E")
546n/a s1 = (m+d)*500 + m+e
547n/a s2 = m+e
548n/a s1_split = s1.split
549n/a for x in _RANGE_100:
550n/a s1_split(s2, 1)
551n/a
552n/a
553n/a#### Same for rsplit(s, 1)
554n/a
555n/a@bench('("A"*1000).rsplit("A", 1)', "early match, single character", 1000)
556n/adef rsplit_test_quick_match_single_character(STR):
557n/a s1 = STR("A" * 1000)
558n/a s2 = STR("A")
559n/a s1_rsplit = s1.rsplit
560n/a for x in _RANGE_1000:
561n/a s1_rsplit(s2, 1)
562n/a
563n/a@bench('("A"*1000).rsplit("B", 1)', "no match, single character", 1000)
564n/adef rsplit_test_no_match_single_character(STR):
565n/a s1 = STR("A" * 1000)
566n/a s2 = STR("B")
567n/a s1_rsplit = s1.rsplit
568n/a for x in _RANGE_1000:
569n/a s1_rsplit(s2, 1)
570n/a
571n/a
572n/a@bench('("AB"*1000).rsplit("AB", 1)', "early match, two characters", 1000)
573n/adef rsplit_test_quick_match_two_characters(STR):
574n/a s1 = STR("AB" * 1000)
575n/a s2 = STR("AB")
576n/a s1_rsplit = s1.rsplit
577n/a for x in _RANGE_1000:
578n/a s1_rsplit(s2, 1)
579n/a
580n/a@bench('("AB"*1000).rsplit("BC", 1)', "no match, two characters", 1000)
581n/adef rsplit_test_no_match_two_character(STR):
582n/a s1 = STR("AB" * 1000)
583n/a s2 = STR("BC")
584n/a s1_rsplit = s1.rsplit
585n/a for x in _RANGE_1000:
586n/a s1_rsplit(s2, 1)
587n/a
588n/a@bench('("C"+"AB"*300).rsplit("CA", 1)', "late match, two characters", 1000)
589n/adef rsplit_test_slow_match_two_characters(STR):
590n/a s1 = STR("C" + "AB" * 300)
591n/a s2 = STR("CA")
592n/a s1_rsplit = s1.rsplit
593n/a for x in _RANGE_1000:
594n/a s1_rsplit(s2, 1)
595n/a
596n/a@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rsplit("E"+s, 1)',
597n/a "late match, 100 characters", 100)
598n/adef rsplit_test_slow_match_100_characters(STR):
599n/a m = STR("ABC"*33)
600n/a d = STR("D")
601n/a e = STR("E")
602n/a s1 = e + m + (d+m)*500
603n/a s2 = e + m
604n/a s1_rsplit = s1.rsplit
605n/a for x in _RANGE_100:
606n/a s1_rsplit(s2, 1)
607n/a
608n/a
609n/a#### Benchmark the operator-based methods
610n/a
611n/a@bench('"A"*10', "repeat 1 character 10 times", 1000)
612n/adef repeat_single_10_times(STR):
613n/a s = STR("A")
614n/a for x in _RANGE_1000:
615n/a s * 10
616n/a
617n/a@bench('"A"*1000', "repeat 1 character 1000 times", 1000)
618n/adef repeat_single_1000_times(STR):
619n/a s = STR("A")
620n/a for x in _RANGE_1000:
621n/a s * 1000
622n/a
623n/a@bench('"ABCDE"*10', "repeat 5 characters 10 times", 1000)
624n/adef repeat_5_10_times(STR):
625n/a s = STR("ABCDE")
626n/a for x in _RANGE_1000:
627n/a s * 10
628n/a
629n/a@bench('"ABCDE"*1000', "repeat 5 characters 1000 times", 1000)
630n/adef repeat_5_1000_times(STR):
631n/a s = STR("ABCDE")
632n/a for x in _RANGE_1000:
633n/a s * 1000
634n/a
635n/a# + for concat
636n/a
637n/a@bench('"Andrew"+"Dalke"', "concat two strings", 1000)
638n/adef concat_two_strings(STR):
639n/a s1 = STR("Andrew")
640n/a s2 = STR("Dalke")
641n/a for x in _RANGE_1000:
642n/a s1+s2
643n/a
644n/a@bench('s1+s2+s3+s4+...+s20', "concat 20 strings of words length 4 to 15",
645n/a 1000)
646n/adef concat_many_strings(STR):
647n/a s1=STR('TIXSGYNREDCVBHJ')
648n/a s2=STR('PUMTLXBZVDO')
649n/a s3=STR('FVZNJ')
650n/a s4=STR('OGDXUW')
651n/a s5=STR('WEIMRNCOYVGHKB')
652n/a s6=STR('FCQTNMXPUZH')
653n/a s7=STR('TICZJYRLBNVUEAK')
654n/a s8=STR('REYB')
655n/a s9=STR('PWUOQ')
656n/a s10=STR('EQHCMKBS')
657n/a s11=STR('AEVDFOH')
658n/a s12=STR('IFHVD')
659n/a s13=STR('JGTCNLXWOHQ')
660n/a s14=STR('ITSKEPYLROZAWXF')
661n/a s15=STR('THEK')
662n/a s16=STR('GHPZFBUYCKMNJIT')
663n/a s17=STR('JMUZ')
664n/a s18=STR('WLZQMTB')
665n/a s19=STR('KPADCBW')
666n/a s20=STR('TNJHZQAGBU')
667n/a for x in _RANGE_1000:
668n/a (s1 + s2+ s3+ s4+ s5+ s6+ s7+ s8+ s9+s10+
669n/a s11+s12+s13+s14+s15+s16+s17+s18+s19+s20)
670n/a
671n/a
672n/a#### Benchmark join
673n/a
674n/adef get_bytes_yielding_seq(STR, arg):
675n/a if STR is BYTES and sys.version_info >= (3,):
676n/a raise UnsupportedType
677n/a return STR(arg)
678n/a
679n/a@bench('"A".join("")',
680n/a "join empty string, with 1 character sep", 100)
681n/adef join_empty_single(STR):
682n/a sep = STR("A")
683n/a s2 = get_bytes_yielding_seq(STR, "")
684n/a sep_join = sep.join
685n/a for x in _RANGE_100:
686n/a sep_join(s2)
687n/a
688n/a@bench('"ABCDE".join("")',
689n/a "join empty string, with 5 character sep", 100)
690n/adef join_empty_5(STR):
691n/a sep = STR("ABCDE")
692n/a s2 = get_bytes_yielding_seq(STR, "")
693n/a sep_join = sep.join
694n/a for x in _RANGE_100:
695n/a sep_join(s2)
696n/a
697n/a@bench('"A".join("ABC..Z")',
698n/a "join string with 26 characters, with 1 character sep", 1000)
699n/adef join_alphabet_single(STR):
700n/a sep = STR("A")
701n/a s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ")
702n/a sep_join = sep.join
703n/a for x in _RANGE_1000:
704n/a sep_join(s2)
705n/a
706n/a@bench('"ABCDE".join("ABC..Z")',
707n/a "join string with 26 characters, with 5 character sep", 1000)
708n/adef join_alphabet_5(STR):
709n/a sep = STR("ABCDE")
710n/a s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ")
711n/a sep_join = sep.join
712n/a for x in _RANGE_1000:
713n/a sep_join(s2)
714n/a
715n/a@bench('"A".join(list("ABC..Z"))',
716n/a "join list of 26 characters, with 1 character sep", 1000)
717n/adef join_alphabet_list_single(STR):
718n/a sep = STR("A")
719n/a s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"]
720n/a sep_join = sep.join
721n/a for x in _RANGE_1000:
722n/a sep_join(s2)
723n/a
724n/a@bench('"ABCDE".join(list("ABC..Z"))',
725n/a "join list of 26 characters, with 5 character sep", 1000)
726n/adef join_alphabet_list_five(STR):
727n/a sep = STR("ABCDE")
728n/a s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"]
729n/a sep_join = sep.join
730n/a for x in _RANGE_1000:
731n/a sep_join(s2)
732n/a
733n/a@bench('"A".join(["Bob"]*100))',
734n/a "join list of 100 words, with 1 character sep", 1000)
735n/adef join_100_words_single(STR):
736n/a sep = STR("A")
737n/a s2 = [STR("Bob")]*100
738n/a sep_join = sep.join
739n/a for x in _RANGE_1000:
740n/a sep_join(s2)
741n/a
742n/a@bench('"ABCDE".join(["Bob"]*100))',
743n/a "join list of 100 words, with 5 character sep", 1000)
744n/adef join_100_words_5(STR):
745n/a sep = STR("ABCDE")
746n/a s2 = [STR("Bob")]*100
747n/a sep_join = sep.join
748n/a for x in _RANGE_1000:
749n/a sep_join(s2)
750n/a
751n/a#### split tests
752n/a
753n/a@bench('("Here are some words. "*2).split()', "split whitespace (small)", 1000)
754n/adef whitespace_split(STR):
755n/a s = STR("Here are some words. "*2)
756n/a s_split = s.split
757n/a for x in _RANGE_1000:
758n/a s_split()
759n/a
760n/a@bench('("Here are some words. "*2).rsplit()', "split whitespace (small)", 1000)
761n/adef whitespace_rsplit(STR):
762n/a s = STR("Here are some words. "*2)
763n/a s_rsplit = s.rsplit
764n/a for x in _RANGE_1000:
765n/a s_rsplit()
766n/a
767n/a@bench('("Here are some words. "*2).split(None, 1)',
768n/a "split 1 whitespace", 1000)
769n/adef whitespace_split_1(STR):
770n/a s = STR("Here are some words. "*2)
771n/a s_split = s.split
772n/a N = None
773n/a for x in _RANGE_1000:
774n/a s_split(N, 1)
775n/a
776n/a@bench('("Here are some words. "*2).rsplit(None, 1)',
777n/a "split 1 whitespace", 1000)
778n/adef whitespace_rsplit_1(STR):
779n/a s = STR("Here are some words. "*2)
780n/a s_rsplit = s.rsplit
781n/a N = None
782n/a for x in _RANGE_1000:
783n/a s_rsplit(N, 1)
784n/a
785n/a@bench('("Here are some words. "*2).partition(" ")',
786n/a "split 1 whitespace", 1000)
787n/adef whitespace_partition(STR):
788n/a sep = STR(" ")
789n/a s = STR("Here are some words. "*2)
790n/a s_partition = s.partition
791n/a for x in _RANGE_1000:
792n/a s_partition(sep)
793n/a
794n/a@bench('("Here are some words. "*2).rpartition(" ")',
795n/a "split 1 whitespace", 1000)
796n/adef whitespace_rpartition(STR):
797n/a sep = STR(" ")
798n/a s = STR("Here are some words. "*2)
799n/a s_rpartition = s.rpartition
800n/a for x in _RANGE_1000:
801n/a s_rpartition(sep)
802n/a
803n/ahuman_text = """\
804n/aPython is a dynamic object-oriented programming language that can be
805n/aused for many kinds of software development. It offers strong support
806n/afor integration with other languages and tools, comes with extensive
807n/astandard libraries, and can be learned in a few days. Many Python
808n/aprogrammers report substantial productivity gains and feel the language
809n/aencourages the development of higher quality, more maintainable code.
810n/a
811n/aPython runs on Windows, Linux/Unix, Mac OS X, Amiga, Palm
812n/aHandhelds, and Nokia mobile phones. Python has also been ported to the
813n/aJava and .NET virtual machines.
814n/a
815n/aPython is distributed under an OSI-approved open source license that
816n/amakes it free to use, even for commercial products.
817n/a"""*25
818n/ahuman_text_bytes = bytes_from_str(human_text)
819n/ahuman_text_unicode = unicode_from_str(human_text)
820n/adef _get_human_text(STR):
821n/a if STR is UNICODE:
822n/a return human_text_unicode
823n/a if STR is BYTES:
824n/a return human_text_bytes
825n/a raise AssertionError
826n/a
827n/a@bench('human_text.split()', "split whitespace (huge)", 10)
828n/adef whitespace_split_huge(STR):
829n/a s = _get_human_text(STR)
830n/a s_split = s.split
831n/a for x in _RANGE_10:
832n/a s_split()
833n/a
834n/a@bench('human_text.rsplit()', "split whitespace (huge)", 10)
835n/adef whitespace_rsplit_huge(STR):
836n/a s = _get_human_text(STR)
837n/a s_rsplit = s.rsplit
838n/a for x in _RANGE_10:
839n/a s_rsplit()
840n/a
841n/a
842n/a
843n/a@bench('"this\\nis\\na\\ntest\\n".split("\\n")', "split newlines", 1000)
844n/adef newlines_split(STR):
845n/a s = STR("this\nis\na\ntest\n")
846n/a s_split = s.split
847n/a nl = STR("\n")
848n/a for x in _RANGE_1000:
849n/a s_split(nl)
850n/a
851n/a
852n/a@bench('"this\\nis\\na\\ntest\\n".rsplit("\\n")', "split newlines", 1000)
853n/adef newlines_rsplit(STR):
854n/a s = STR("this\nis\na\ntest\n")
855n/a s_rsplit = s.rsplit
856n/a nl = STR("\n")
857n/a for x in _RANGE_1000:
858n/a s_rsplit(nl)
859n/a
860n/a@bench('"this\\nis\\na\\ntest\\n".splitlines()', "split newlines", 1000)
861n/adef newlines_splitlines(STR):
862n/a s = STR("this\nis\na\ntest\n")
863n/a s_splitlines = s.splitlines
864n/a for x in _RANGE_1000:
865n/a s_splitlines()
866n/a
867n/a## split text with 2000 newlines
868n/a
869n/adef _make_2000_lines():
870n/a import random
871n/a r = random.Random(100)
872n/a chars = list(map(chr, range(32, 128)))
873n/a i = 0
874n/a while i < len(chars):
875n/a chars[i] = " "
876n/a i += r.randrange(9)
877n/a s = "".join(chars)
878n/a s = s*4
879n/a words = []
880n/a for i in range(2000):
881n/a start = r.randrange(96)
882n/a n = r.randint(5, 65)
883n/a words.append(s[start:start+n])
884n/a return "\n".join(words)+"\n"
885n/a
886n/a_text_with_2000_lines = _make_2000_lines()
887n/a_text_with_2000_lines_bytes = bytes_from_str(_text_with_2000_lines)
888n/a_text_with_2000_lines_unicode = unicode_from_str(_text_with_2000_lines)
889n/adef _get_2000_lines(STR):
890n/a if STR is UNICODE:
891n/a return _text_with_2000_lines_unicode
892n/a if STR is BYTES:
893n/a return _text_with_2000_lines_bytes
894n/a raise AssertionError
895n/a
896n/a
897n/a@bench('"...text...".split("\\n")', "split 2000 newlines", 10)
898n/adef newlines_split_2000(STR):
899n/a s = _get_2000_lines(STR)
900n/a s_split = s.split
901n/a nl = STR("\n")
902n/a for x in _RANGE_10:
903n/a s_split(nl)
904n/a
905n/a@bench('"...text...".rsplit("\\n")', "split 2000 newlines", 10)
906n/adef newlines_rsplit_2000(STR):
907n/a s = _get_2000_lines(STR)
908n/a s_rsplit = s.rsplit
909n/a nl = STR("\n")
910n/a for x in _RANGE_10:
911n/a s_rsplit(nl)
912n/a
913n/a@bench('"...text...".splitlines()', "split 2000 newlines", 10)
914n/adef newlines_splitlines_2000(STR):
915n/a s = _get_2000_lines(STR)
916n/a s_splitlines = s.splitlines
917n/a for x in _RANGE_10:
918n/a s_splitlines()
919n/a
920n/a
921n/a## split text on "--" characters
922n/a@bench(
923n/a '"this--is--a--test--of--the--emergency--broadcast--system".split("--")',
924n/a "split on multicharacter separator (small)", 1000)
925n/adef split_multichar_sep_small(STR):
926n/a s = STR("this--is--a--test--of--the--emergency--broadcast--system")
927n/a s_split = s.split
928n/a pat = STR("--")
929n/a for x in _RANGE_1000:
930n/a s_split(pat)
931n/a@bench(
932n/a '"this--is--a--test--of--the--emergency--broadcast--system".rsplit("--")',
933n/a "split on multicharacter separator (small)", 1000)
934n/adef rsplit_multichar_sep_small(STR):
935n/a s = STR("this--is--a--test--of--the--emergency--broadcast--system")
936n/a s_rsplit = s.rsplit
937n/a pat = STR("--")
938n/a for x in _RANGE_1000:
939n/a s_rsplit(pat)
940n/a
941n/a## split dna text on "ACTAT" characters
942n/a@bench('dna.split("ACTAT")',
943n/a "split on multicharacter separator (dna)", 10)
944n/adef split_multichar_sep_dna(STR):
945n/a s = _get_dna(STR)
946n/a s_split = s.split
947n/a pat = STR("ACTAT")
948n/a for x in _RANGE_10:
949n/a s_split(pat)
950n/a
951n/a@bench('dna.rsplit("ACTAT")',
952n/a "split on multicharacter separator (dna)", 10)
953n/adef rsplit_multichar_sep_dna(STR):
954n/a s = _get_dna(STR)
955n/a s_rsplit = s.rsplit
956n/a pat = STR("ACTAT")
957n/a for x in _RANGE_10:
958n/a s_rsplit(pat)
959n/a
960n/a
961n/a
962n/a## split with limits
963n/a
964n/aGFF3_example = "\t".join([
965n/a "I", "Genomic_canonical", "region", "357208", "396183", ".", "+", ".",
966n/a "ID=Sequence:R119;note=Clone R119%3B Genbank AF063007;Name=R119"])
967n/a
968n/a@bench('GFF3_example.split("\\t")', "tab split", 1000)
969n/adef tab_split_no_limit(STR):
970n/a sep = STR("\t")
971n/a s = STR(GFF3_example)
972n/a s_split = s.split
973n/a for x in _RANGE_1000:
974n/a s_split(sep)
975n/a
976n/a@bench('GFF3_example.split("\\t", 8)', "tab split", 1000)
977n/adef tab_split_limit(STR):
978n/a sep = STR("\t")
979n/a s = STR(GFF3_example)
980n/a s_split = s.split
981n/a for x in _RANGE_1000:
982n/a s_split(sep, 8)
983n/a
984n/a@bench('GFF3_example.rsplit("\\t")', "tab split", 1000)
985n/adef tab_rsplit_no_limit(STR):
986n/a sep = STR("\t")
987n/a s = STR(GFF3_example)
988n/a s_rsplit = s.rsplit
989n/a for x in _RANGE_1000:
990n/a s_rsplit(sep)
991n/a
992n/a@bench('GFF3_example.rsplit("\\t", 8)', "tab split", 1000)
993n/adef tab_rsplit_limit(STR):
994n/a sep = STR("\t")
995n/a s = STR(GFF3_example)
996n/a s_rsplit = s.rsplit
997n/a for x in _RANGE_1000:
998n/a s_rsplit(sep, 8)
999n/a
1000n/a#### Count characters
1001n/a
1002n/a@bench('...text.with.2000.newlines.count("\\n")',
1003n/a "count newlines", 10)
1004n/adef count_newlines(STR):
1005n/a s = _get_2000_lines(STR)
1006n/a s_count = s.count
1007n/a nl = STR("\n")
1008n/a for x in _RANGE_10:
1009n/a s_count(nl)
1010n/a
1011n/a# Orchid sequences concatenated, from Biopython
1012n/a_dna = """
1013n/aCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGGGTT
1014n/aAATCTGGAGGATCTGTTTACTTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGAATTGCCATCG
1015n/aAGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGCAGTTTTGCTCCAAGTCGTT
1016n/aTGACACATAATTGGTGAAGGGGGTGGCATCCTTCCCTGACCCTCCCCCAACTATTTTTTTAACAACTCTC
1017n/aAGCAACGGAGACTCAGTCTTCGGCAAATGCGATAAATGGTGTGAATTGCAGAATCCCGTGCACCATCGAG
1018n/aTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCATTGCGAGTCATAT
1019n/aCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCGGATGTGAGTTTGGCCCCTTGTTCTT
1020n/aTGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAGGTGGACGAACTAT
1021n/aGCTACAACAAAATTGTTGTGCAGAGGCCCCGGGTTGTCGTATTAGATGGGCCACCGTAATCTGAAGACCC
1022n/aTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGCGACCCCAGGTCAG
1023n/aGTGAGCAACAGCTGTCGTAACAAGGTTTCCGTAGGGTGAACTGCGGAAGGATCATTGTTGAGATCACATA
1024n/aATAATTGATCGAGTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGAC
1025n/aCTAGATTTGCCATCGAGCCTCCTTGGGAGCATCCTTGTTGGCGATATCTAAACCCTCAATTTTTCCCCCA
1026n/aATCAAATTACACAAAATTGGTGGAGGGGGTGGCATTCTTCCCTTACCCTCCCCCAAATATTTTTTTAACA
1027n/aACTCTCAGCAACGGATATCTCAGCTCTTGCATCGATGAAGAACCCACCGAAATGCGATAAATGGTGTGAA
1028n/aTTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACG
1029n/aCCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCG
1030n/aGATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGATGCATGGGCTTTTGATGGTCCTAA
1031n/aATACGGCAAGAGGTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATAAG
1032n/aATGGGCCACCGATATCTGAAGACCCTTTTGGACCCCATTGGAGCCCATCAACCCATGTCAGTTGATGGCC
1033n/aATTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGA
1034n/aGTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCA
1035n/aTCGAGCCTCCTTGGGAGCTTTCTTGTTGGCGATATCTAAACCCTTGCCCGGCAGAGTTTTGGGAATCCCG
1036n/aTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCAT
1037n/aTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACACACCTGTTCAGCCGGTGCGGATGTGAGTTTG
1038n/aGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAG
1039n/aGTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATTAGATGGGCCACCAT
1040n/aAATCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGC
1041n/aGACCCAGTCAGGTGAGGGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGAG
1042n/aTTAATCTGGAGGATCTGTTTACTTTGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCAT
1043n/aCGAGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTTGGCGCCAAGTCA
1044n/aTATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAACAACTC
1045n/aTCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGAATTGC
1046n/aAGAATCCCGTGAACCATCGAGTCTTTGGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCT
1047n/aGCCTGGGCATTGGGAATCATATCTCTCCCCTAACGAGGCTATCCAAACATACTGTTCATCCGGTGCGGAT
1048n/aGTGAGTTTGGCCCCTTGTTCTTTGGTACCGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTCAAAA
1049n/aCGGCAAGAGGTGGACGAACTATGCCACAACAAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTAGATG
1050n/aGGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGACCA
1051n/aTTTGTTGCGACCCCAGTCAGCTGAGCAACCCGCTGAGTGGAAGGTCATTGCCGATATCACATAATAATTG
1052n/aATCGAGTTAATCTGGAGGATCTGTTTACTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGATTT
1053n/aGCCATCGAGCCTCCTTGGGAGTTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTGTGCGCCA
1054n/aAGTCATATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAAC
1055n/aAACTCTCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGA
1056n/aATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCAC
1057n/aGCCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCATCCGGTGC
1058n/aGGATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTC
1059n/aAAAACGGCAAGAGGTGGACGAACTATGCTACAACCAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTA
1060n/aGATGGGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATG
1061n/aACCATGTGTTGCGACCCCAGTCAGCTGAGCAACGCGCTGAGCGTAACAAGGTTTCCGTAGGTGGACCTCC
1062n/aGGGAGGATCATTGTTGAGATCACATAATAATTGATCGAGGTAATCTGGAGGATCTGCATATTTTGGTCAC
1063n/a"""
1064n/a_dna = "".join(_dna.splitlines())
1065n/a_dna = _dna * 25
1066n/a_dna_bytes = bytes_from_str(_dna)
1067n/a_dna_unicode = unicode_from_str(_dna)
1068n/a
1069n/adef _get_dna(STR):
1070n/a if STR is UNICODE:
1071n/a return _dna_unicode
1072n/a if STR is BYTES:
1073n/a return _dna_bytes
1074n/a raise AssertionError
1075n/a
1076n/a@bench('dna.count("AACT")', "count AACT substrings in DNA example", 10)
1077n/adef count_aact(STR):
1078n/a seq = _get_dna(STR)
1079n/a seq_count = seq.count
1080n/a needle = STR("AACT")
1081n/a for x in _RANGE_10:
1082n/a seq_count(needle)
1083n/a
1084n/a##### startswith and endswith
1085n/a
1086n/a@bench('"Andrew".startswith("A")', 'startswith single character', 1000)
1087n/adef startswith_single(STR):
1088n/a s1 = STR("Andrew")
1089n/a s2 = STR("A")
1090n/a s1_startswith = s1.startswith
1091n/a for x in _RANGE_1000:
1092n/a s1_startswith(s2)
1093n/a
1094n/a@bench('"Andrew".startswith("Andrew")', 'startswith multiple characters',
1095n/a 1000)
1096n/adef startswith_multiple(STR):
1097n/a s1 = STR("Andrew")
1098n/a s2 = STR("Andrew")
1099n/a s1_startswith = s1.startswith
1100n/a for x in _RANGE_1000:
1101n/a s1_startswith(s2)
1102n/a
1103n/a@bench('"Andrew".startswith("Anders")',
1104n/a 'startswith multiple characters - not!', 1000)
1105n/adef startswith_multiple_not(STR):
1106n/a s1 = STR("Andrew")
1107n/a s2 = STR("Anders")
1108n/a s1_startswith = s1.startswith
1109n/a for x in _RANGE_1000:
1110n/a s1_startswith(s2)
1111n/a
1112n/a
1113n/a# endswith
1114n/a
1115n/a@bench('"Andrew".endswith("w")', 'endswith single character', 1000)
1116n/adef endswith_single(STR):
1117n/a s1 = STR("Andrew")
1118n/a s2 = STR("w")
1119n/a s1_endswith = s1.endswith
1120n/a for x in _RANGE_1000:
1121n/a s1_endswith(s2)
1122n/a
1123n/a@bench('"Andrew".endswith("Andrew")', 'endswith multiple characters', 1000)
1124n/adef endswith_multiple(STR):
1125n/a s1 = STR("Andrew")
1126n/a s2 = STR("Andrew")
1127n/a s1_endswith = s1.endswith
1128n/a for x in _RANGE_1000:
1129n/a s1_endswith(s2)
1130n/a
1131n/a@bench('"Andrew".endswith("Anders")',
1132n/a 'endswith multiple characters - not!', 1000)
1133n/adef endswith_multiple_not(STR):
1134n/a s1 = STR("Andrew")
1135n/a s2 = STR("Anders")
1136n/a s1_endswith = s1.endswith
1137n/a for x in _RANGE_1000:
1138n/a s1_endswith(s2)
1139n/a
1140n/a#### Strip
1141n/a
1142n/a@bench('"Hello!\\n".strip()', 'strip terminal newline', 1000)
1143n/adef terminal_newline_strip_right(STR):
1144n/a s = STR("Hello!\n")
1145n/a s_strip = s.strip
1146n/a for x in _RANGE_1000:
1147n/a s_strip()
1148n/a
1149n/a@bench('"Hello!\\n".rstrip()', 'strip terminal newline', 1000)
1150n/adef terminal_newline_rstrip(STR):
1151n/a s = STR("Hello!\n")
1152n/a s_rstrip = s.rstrip
1153n/a for x in _RANGE_1000:
1154n/a s_rstrip()
1155n/a
1156n/a@bench('"\\nHello!".strip()', 'strip terminal newline', 1000)
1157n/adef terminal_newline_strip_left(STR):
1158n/a s = STR("\nHello!")
1159n/a s_strip = s.strip
1160n/a for x in _RANGE_1000:
1161n/a s_strip()
1162n/a
1163n/a@bench('"\\nHello!\\n".strip()', 'strip terminal newline', 1000)
1164n/adef terminal_newline_strip_both(STR):
1165n/a s = STR("\nHello!\n")
1166n/a s_strip = s.strip
1167n/a for x in _RANGE_1000:
1168n/a s_strip()
1169n/a
1170n/a@bench('"\\nHello!".rstrip()', 'strip terminal newline', 1000)
1171n/adef terminal_newline_lstrip(STR):
1172n/a s = STR("\nHello!")
1173n/a s_lstrip = s.lstrip
1174n/a for x in _RANGE_1000:
1175n/a s_lstrip()
1176n/a
1177n/a@bench('s="Hello!\\n"; s[:-1] if s[-1]=="\\n" else s',
1178n/a 'strip terminal newline', 1000)
1179n/adef terminal_newline_if_else(STR):
1180n/a s = STR("Hello!\n")
1181n/a NL = STR("\n")
1182n/a for x in _RANGE_1000:
1183n/a s[:-1] if (s[-1] == NL) else s
1184n/a
1185n/a
1186n/a# Strip multiple spaces or tabs
1187n/a
1188n/a@bench('"Hello\\t \\t".strip()', 'strip terminal spaces and tabs', 1000)
1189n/adef terminal_space_strip(STR):
1190n/a s = STR("Hello\t \t!")
1191n/a s_strip = s.strip
1192n/a for x in _RANGE_1000:
1193n/a s_strip()
1194n/a
1195n/a@bench('"Hello\\t \\t".rstrip()', 'strip terminal spaces and tabs', 1000)
1196n/adef terminal_space_rstrip(STR):
1197n/a s = STR("Hello!\t \t")
1198n/a s_rstrip = s.rstrip
1199n/a for x in _RANGE_1000:
1200n/a s_rstrip()
1201n/a
1202n/a@bench('"\\t \\tHello".rstrip()', 'strip terminal spaces and tabs', 1000)
1203n/adef terminal_space_lstrip(STR):
1204n/a s = STR("\t \tHello!")
1205n/a s_lstrip = s.lstrip
1206n/a for x in _RANGE_1000:
1207n/a s_lstrip()
1208n/a
1209n/a
1210n/a#### replace
1211n/a@bench('"This is a test".replace(" ", "\\t")', 'replace single character',
1212n/a 1000)
1213n/adef replace_single_character(STR):
1214n/a s = STR("This is a test!")
1215n/a from_str = STR(" ")
1216n/a to_str = STR("\t")
1217n/a s_replace = s.replace
1218n/a for x in _RANGE_1000:
1219n/a s_replace(from_str, to_str)
1220n/a
1221n/a@uses_re
1222n/a@bench('re.sub(" ", "\\t", "This is a test"', 'replace single character',
1223n/a 1000)
1224n/adef replace_single_character_re(STR):
1225n/a s = STR("This is a test!")
1226n/a pat = re.compile(STR(" "))
1227n/a to_str = STR("\t")
1228n/a pat_sub = pat.sub
1229n/a for x in _RANGE_1000:
1230n/a pat_sub(to_str, s)
1231n/a
1232n/a@bench('"...text.with.2000.lines...replace("\\n", " ")',
1233n/a 'replace single character, big string', 10)
1234n/adef replace_single_character_big(STR):
1235n/a s = _get_2000_lines(STR)
1236n/a from_str = STR("\n")
1237n/a to_str = STR(" ")
1238n/a s_replace = s.replace
1239n/a for x in _RANGE_10:
1240n/a s_replace(from_str, to_str)
1241n/a
1242n/a@uses_re
1243n/a@bench('re.sub("\\n", " ", "...text.with.2000.lines...")',
1244n/a 'replace single character, big string', 10)
1245n/adef replace_single_character_big_re(STR):
1246n/a s = _get_2000_lines(STR)
1247n/a pat = re.compile(STR("\n"))
1248n/a to_str = STR(" ")
1249n/a pat_sub = pat.sub
1250n/a for x in _RANGE_10:
1251n/a pat_sub(to_str, s)
1252n/a
1253n/a
1254n/a@bench('dna.replace("ATC", "ATT")',
1255n/a 'replace multiple characters, dna', 10)
1256n/adef replace_multiple_characters_dna(STR):
1257n/a seq = _get_dna(STR)
1258n/a from_str = STR("ATC")
1259n/a to_str = STR("ATT")
1260n/a seq_replace = seq.replace
1261n/a for x in _RANGE_10:
1262n/a seq_replace(from_str, to_str)
1263n/a
1264n/a# This increases the character count
1265n/a@bench('"...text.with.2000.newlines...replace("\\n", "\\r\\n")',
1266n/a 'replace and expand multiple characters, big string', 10)
1267n/adef replace_multiple_character_big(STR):
1268n/a s = _get_2000_lines(STR)
1269n/a from_str = STR("\n")
1270n/a to_str = STR("\r\n")
1271n/a s_replace = s.replace
1272n/a for x in _RANGE_10:
1273n/a s_replace(from_str, to_str)
1274n/a
1275n/a
1276n/a# This decreases the character count
1277n/a@bench('"When shall we three meet again?".replace("ee", "")',
1278n/a 'replace/remove multiple characters', 1000)
1279n/adef replace_multiple_character_remove(STR):
1280n/a s = STR("When shall we three meet again?")
1281n/a from_str = STR("ee")
1282n/a to_str = STR("")
1283n/a s_replace = s.replace
1284n/a for x in _RANGE_1000:
1285n/a s_replace(from_str, to_str)
1286n/a
1287n/a
1288n/abig_s = "A" + ("Z"*128*1024)
1289n/abig_s_bytes = bytes_from_str(big_s)
1290n/abig_s_unicode = unicode_from_str(big_s)
1291n/adef _get_big_s(STR):
1292n/a if STR is UNICODE: return big_s_unicode
1293n/a if STR is BYTES: return big_s_bytes
1294n/a raise AssertionError
1295n/a
1296n/a# The older replace implementation counted all matches in
1297n/a# the string even when it only needed to make one replacement.
1298n/a@bench('("A" + ("Z"*128*1024)).replace("A", "BB", 1)',
1299n/a 'quick replace single character match', 10)
1300n/adef quick_replace_single_match(STR):
1301n/a s = _get_big_s(STR)
1302n/a from_str = STR("A")
1303n/a to_str = STR("BB")
1304n/a s_replace = s.replace
1305n/a for x in _RANGE_10:
1306n/a s_replace(from_str, to_str, 1)
1307n/a
1308n/a@bench('("A" + ("Z"*128*1024)).replace("AZZ", "BBZZ", 1)',
1309n/a 'quick replace multiple character match', 10)
1310n/adef quick_replace_multiple_match(STR):
1311n/a s = _get_big_s(STR)
1312n/a from_str = STR("AZZ")
1313n/a to_str = STR("BBZZ")
1314n/a s_replace = s.replace
1315n/a for x in _RANGE_10:
1316n/a s_replace(from_str, to_str, 1)
1317n/a
1318n/a
1319n/a####
1320n/a
1321n/a# CCP does a lot of this, for internationalisation of ingame messages.
1322n/a_format = "The %(thing)s is %(place)s the %(location)s."
1323n/a_format_dict = { "thing":"THING", "place":"PLACE", "location":"LOCATION", }
1324n/a_format_bytes = bytes_from_str(_format)
1325n/a_format_unicode = unicode_from_str(_format)
1326n/a_format_dict_bytes = dict((bytes_from_str(k), bytes_from_str(v)) for (k,v) in _format_dict.items())
1327n/a_format_dict_unicode = dict((unicode_from_str(k), unicode_from_str(v)) for (k,v) in _format_dict.items())
1328n/a
1329n/adef _get_format(STR):
1330n/a if STR is UNICODE:
1331n/a return _format_unicode
1332n/a if STR is BYTES:
1333n/a if sys.version_info >= (3,):
1334n/a raise UnsupportedType
1335n/a return _format_bytes
1336n/a raise AssertionError
1337n/a
1338n/adef _get_format_dict(STR):
1339n/a if STR is UNICODE:
1340n/a return _format_dict_unicode
1341n/a if STR is BYTES:
1342n/a if sys.version_info >= (3,):
1343n/a raise UnsupportedType
1344n/a return _format_dict_bytes
1345n/a raise AssertionError
1346n/a
1347n/a# Formatting.
1348n/a@bench('"The %(k1)s is %(k2)s the %(k3)s."%{"k1":"x","k2":"y","k3":"z",}',
1349n/a 'formatting a string type with a dict', 1000)
1350n/adef format_with_dict(STR):
1351n/a s = _get_format(STR)
1352n/a d = _get_format_dict(STR)
1353n/a for x in _RANGE_1000:
1354n/a s % d
1355n/a
1356n/a
1357n/a#### Upper- and lower- case conversion
1358n/a
1359n/a@bench('("Where in the world is Carmen San Deigo?"*10).lower()',
1360n/a "case conversion -- rare", 1000)
1361n/adef lower_conversion_rare(STR):
1362n/a s = STR("Where in the world is Carmen San Deigo?"*10)
1363n/a s_lower = s.lower
1364n/a for x in _RANGE_1000:
1365n/a s_lower()
1366n/a
1367n/a@bench('("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10).lower()',
1368n/a "case conversion -- dense", 1000)
1369n/adef lower_conversion_dense(STR):
1370n/a s = STR("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10)
1371n/a s_lower = s.lower
1372n/a for x in _RANGE_1000:
1373n/a s_lower()
1374n/a
1375n/a
1376n/a@bench('("wHERE IN THE WORLD IS cARMEN sAN dEIGO?"*10).upper()',
1377n/a "case conversion -- rare", 1000)
1378n/adef upper_conversion_rare(STR):
1379n/a s = STR("Where in the world is Carmen San Deigo?"*10)
1380n/a s_upper = s.upper
1381n/a for x in _RANGE_1000:
1382n/a s_upper()
1383n/a
1384n/a@bench('("where in the world is carmen san deigo?"*10).upper()',
1385n/a "case conversion -- dense", 1000)
1386n/adef upper_conversion_dense(STR):
1387n/a s = STR("where in the world is carmen san deigo?"*10)
1388n/a s_upper = s.upper
1389n/a for x in _RANGE_1000:
1390n/a s_upper()
1391n/a
1392n/a
1393n/a# end of benchmarks
1394n/a
1395n/a#################
1396n/a
1397n/aclass BenchTimer(timeit.Timer):
1398n/a def best(self, repeat=1):
1399n/a for i in range(1, 10):
1400n/a number = 10**i
1401n/a x = self.timeit(number)
1402n/a if x > 0.02:
1403n/a break
1404n/a times = [x]
1405n/a for i in range(1, repeat):
1406n/a times.append(self.timeit(number))
1407n/a return min(times) / number
1408n/a
1409n/adef main():
1410n/a (options, test_names) = parser.parse_args()
1411n/a if options.bytes_only and options.unicode_only:
1412n/a raise SystemExit("Only one of --8-bit and --unicode are allowed")
1413n/a
1414n/a bench_functions = []
1415n/a for (k,v) in globals().items():
1416n/a if hasattr(v, "is_bench"):
1417n/a if test_names:
1418n/a for name in test_names:
1419n/a if name in v.group:
1420n/a break
1421n/a else:
1422n/a # Not selected, ignore
1423n/a continue
1424n/a if options.skip_re and hasattr(v, "uses_re"):
1425n/a continue
1426n/a
1427n/a bench_functions.append( (v.group, k, v) )
1428n/a bench_functions.sort()
1429n/a
1430n/a p("bytes\tunicode")
1431n/a p("(in ms)\t(in ms)\t%\tcomment")
1432n/a
1433n/a bytes_total = uni_total = 0.0
1434n/a
1435n/a for title, group in itertools.groupby(bench_functions,
1436n/a operator.itemgetter(0)):
1437n/a # Flush buffer before each group
1438n/a sys.stdout.flush()
1439n/a p("="*10, title)
1440n/a for (_, k, v) in group:
1441n/a if hasattr(v, "is_bench"):
1442n/a bytes_time = 0.0
1443n/a bytes_time_s = " - "
1444n/a if not options.unicode_only:
1445n/a try:
1446n/a bytes_time = BenchTimer("__main__.%s(__main__.BYTES)" % (k,),
1447n/a "import __main__").best(REPEAT)
1448n/a bytes_time_s = "%.2f" % (1000 * bytes_time)
1449n/a bytes_total += bytes_time
1450n/a except UnsupportedType:
1451n/a bytes_time_s = "N/A"
1452n/a uni_time = 0.0
1453n/a uni_time_s = " - "
1454n/a if not options.bytes_only:
1455n/a try:
1456n/a uni_time = BenchTimer("__main__.%s(__main__.UNICODE)" % (k,),
1457n/a "import __main__").best(REPEAT)
1458n/a uni_time_s = "%.2f" % (1000 * uni_time)
1459n/a uni_total += uni_time
1460n/a except UnsupportedType:
1461n/a uni_time_s = "N/A"
1462n/a try:
1463n/a average = bytes_time/uni_time
1464n/a except (TypeError, ZeroDivisionError):
1465n/a average = 0.0
1466n/a p("%s\t%s\t%.1f\t%s (*%d)" % (
1467n/a bytes_time_s, uni_time_s, 100.*average,
1468n/a v.comment, v.repeat_count))
1469n/a
1470n/a if bytes_total == uni_total == 0.0:
1471n/a p("That was zippy!")
1472n/a else:
1473n/a try:
1474n/a ratio = bytes_total/uni_total
1475n/a except ZeroDivisionError:
1476n/a ratio = 0.0
1477n/a p("%.2f\t%.2f\t%.1f\t%s" % (
1478n/a 1000*bytes_total, 1000*uni_total, 100.*ratio,
1479n/a "TOTAL"))
1480n/a
1481n/aif __name__ == "__main__":
1482n/a main()