ยปCore Development>Code coverage>Tools/scripts/parseentities.py

Python code coverage for Tools/scripts/parseentities.py

#countcontent
1n/a#!/usr/bin/env python3
2n/a""" Utility for parsing HTML entity definitions available from:
3n/a
4n/a http://www.w3.org/ as e.g.
5n/a http://www.w3.org/TR/REC-html40/HTMLlat1.ent
6n/a
7n/a Input is read from stdin, output is written to stdout in form of a
8n/a Python snippet defining a dictionary "entitydefs" mapping literal
9n/a entity name to character or numeric entity.
10n/a
11n/a Marc-Andre Lemburg, mal@lemburg.com, 1999.
12n/a Use as you like. NO WARRANTIES.
13n/a
14n/a"""
15n/aimport re,sys
16n/a
17n/aentityRE = re.compile(r'<!ENTITY +(\w+) +CDATA +"([^"]+)" +-- +((?:.|\n)+?) *-->')
18n/a
19n/adef parse(text,pos=0,endpos=None):
20n/a
21n/a pos = 0
22n/a if endpos is None:
23n/a endpos = len(text)
24n/a d = {}
25n/a while 1:
26n/a m = entityRE.search(text,pos,endpos)
27n/a if not m:
28n/a break
29n/a name,charcode,comment = m.groups()
30n/a d[name] = charcode,comment
31n/a pos = m.end()
32n/a return d
33n/a
34n/adef writefile(f,defs):
35n/a
36n/a f.write("entitydefs = {\n")
37n/a items = sorted(defs.items())
38n/a for name, (charcode,comment) in items:
39n/a if charcode[:2] == '&#':
40n/a code = int(charcode[2:-1])
41n/a if code < 256:
42n/a charcode = r"'\%o'" % code
43n/a else:
44n/a charcode = repr(charcode)
45n/a else:
46n/a charcode = repr(charcode)
47n/a comment = ' '.join(comment.split())
48n/a f.write(" '%s':\t%s, \t# %s\n" % (name,charcode,comment))
49n/a f.write('\n}\n')
50n/a
51n/aif __name__ == '__main__':
52n/a if len(sys.argv) > 1:
53n/a infile = open(sys.argv[1])
54n/a else:
55n/a infile = sys.stdin
56n/a if len(sys.argv) > 2:
57n/a outfile = open(sys.argv[2],'w')
58n/a else:
59n/a outfile = sys.stdout
60n/a text = infile.read()
61n/a defs = parse(text)
62n/a writefile(outfile,defs)