ยปCore Development>Code coverage>Tools/scripts/parse_html5_entities.py

Python code coverage for Tools/scripts/parse_html5_entities.py

#countcontent
1n/a#!/usr/bin/env python3
2n/a"""
3n/aUtility for parsing HTML5 entity definitions available from:
4n/a
5n/a http://dev.w3.org/html5/spec/entities.json
6n/a
7n/aWritten by Ezio Melotti and Iuliia Proskurnia.
8n/a
9n/a"""
10n/a
11n/aimport os
12n/aimport sys
13n/aimport json
14n/afrom urllib.request import urlopen
15n/afrom html.entities import html5
16n/a
17n/aentities_url = 'http://dev.w3.org/html5/spec/entities.json'
18n/a
19n/adef get_json(url):
20n/a """Download the json file from the url and returns a decoded object."""
21n/a with urlopen(url) as f:
22n/a data = f.read().decode('utf-8')
23n/a return json.loads(data)
24n/a
25n/adef create_dict(entities):
26n/a """Create the html5 dict from the decoded json object."""
27n/a new_html5 = {}
28n/a for name, value in entities.items():
29n/a new_html5[name.lstrip('&')] = value['characters']
30n/a return new_html5
31n/a
32n/adef compare_dicts(old, new):
33n/a """Compare the old and new dicts and print the differences."""
34n/a added = new.keys() - old.keys()
35n/a if added:
36n/a print('{} entitie(s) have been added:'.format(len(added)))
37n/a for name in sorted(added):
38n/a print(' {!r}: {!r}'.format(name, new[name]))
39n/a removed = old.keys() - new.keys()
40n/a if removed:
41n/a print('{} entitie(s) have been removed:'.format(len(removed)))
42n/a for name in sorted(removed):
43n/a print(' {!r}: {!r}'.format(name, old[name]))
44n/a changed = set()
45n/a for name in (old.keys() & new.keys()):
46n/a if old[name] != new[name]:
47n/a changed.add((name, old[name], new[name]))
48n/a if changed:
49n/a print('{} entitie(s) have been modified:'.format(len(changed)))
50n/a for item in sorted(changed):
51n/a print(' {!r}: {!r} -> {!r}'.format(*item))
52n/a
53n/adef write_items(entities, file=sys.stdout):
54n/a """Write the items of the dictionary in the specified file."""
55n/a # The keys in the generated dictionary should be sorted
56n/a # in a case-insensitive way, however, when two keys are equal,
57n/a # the uppercase version should come first so that the result
58n/a # looks like: ['Aacute', 'aacute', 'Aacute;', 'aacute;', ...]
59n/a # To do this we first sort in a case-sensitive way (so all the
60n/a # uppercase chars come first) and then sort with key=str.lower.
61n/a # Since the sorting is stable the uppercase keys will eventually
62n/a # be before their equivalent lowercase version.
63n/a keys = sorted(entities.keys())
64n/a keys = sorted(keys, key=str.lower)
65n/a print('html5 = {', file=file)
66n/a for name in keys:
67n/a print(' {!r}: {!a},'.format(name, entities[name]), file=file)
68n/a print('}', file=file)
69n/a
70n/a
71n/aif __name__ == '__main__':
72n/a # without args print a diff between html.entities.html5 and new_html5
73n/a # with --create print the new html5 dict
74n/a # with --patch patch the Lib/html/entities.py file
75n/a new_html5 = create_dict(get_json(entities_url))
76n/a if '--create' in sys.argv:
77n/a print('# map the HTML5 named character references to the '
78n/a 'equivalent Unicode character(s)')
79n/a print('# Generated by {}. Do not edit manually.'.format(__file__))
80n/a write_items(new_html5)
81n/a elif '--patch' in sys.argv:
82n/a fname = 'Lib/html/entities.py'
83n/a temp_fname = fname + '.temp'
84n/a with open(fname) as f1, open(temp_fname, 'w') as f2:
85n/a skip = False
86n/a for line in f1:
87n/a if line.startswith('html5 = {'):
88n/a write_items(new_html5, file=f2)
89n/a skip = True
90n/a continue
91n/a if skip:
92n/a # skip the old items until the }
93n/a if line.startswith('}'):
94n/a skip = False
95n/a continue
96n/a f2.write(line)
97n/a os.remove(fname)
98n/a os.rename(temp_fname, fname)
99n/a else:
100n/a if html5 == new_html5:
101n/a print('The current dictionary is updated.')
102n/a else:
103n/a compare_dicts(html5, new_html5)
104n/a print('Run "./python {0} --patch" to update Lib/html/entities.html '
105n/a 'or "./python {0} --create" to see the generated ' 'dictionary.'.format(__file__))