Python code coverage for Tools/scripts/highlight.py

#	count	content
1	n/a	#!/usr/bin/env python3
2	n/a	'''Add syntax highlighting to Python source code'''
3	n/a
4	n/a	__author__ = 'Raymond Hettinger'
5	n/a
6	n/a	import builtins
7	n/a	import functools
8	n/a	import html as html_module
9	n/a	import keyword
10	n/a	import re
11	n/a	import tokenize
12	n/a
13	n/a	#### Analyze Python Source #################################
14	n/a
15	n/a	def is_builtin(s):
16	n/a	'Return True if s is the name of a builtin'
17	n/a	return hasattr(builtins, s)
18	n/a
19	n/a	def combine_range(lines, start, end):
20	n/a	'Join content from a range of lines between start and end'
21	n/a	(srow, scol), (erow, ecol) = start, end
22	n/a	if srow == erow:
23	n/a	return lines[srow-1][scol:ecol], end
24	n/a	rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]]
25	n/a	return ''.join(rows), end
26	n/a
27	n/a	def analyze_python(source):
28	n/a	'''Generate and classify chunks of Python for syntax highlighting.
29	n/a	Yields tuples in the form: (category, categorized_text).
30	n/a	'''
31	n/a	lines = source.splitlines(True)
32	n/a	lines.append('')
33	n/a	readline = functools.partial(next, iter(lines), '')
34	n/a	kind = tok_str = ''
35	n/a	tok_type = tokenize.COMMENT
36	n/a	written = (1, 0)
37	n/a	for tok in tokenize.generate_tokens(readline):
38	n/a	prev_tok_type, prev_tok_str = tok_type, tok_str
39	n/a	tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok
40	n/a	kind = ''
41	n/a	if tok_type == tokenize.COMMENT:
42	n/a	kind = 'comment'
43	n/a	elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;@':
44	n/a	kind = 'operator'
45	n/a	elif tok_type == tokenize.STRING:
46	n/a	kind = 'string'
47	n/a	if prev_tok_type == tokenize.INDENT or scol==0:
48	n/a	kind = 'docstring'
49	n/a	elif tok_type == tokenize.NAME:
50	n/a	if tok_str in ('def', 'class', 'import', 'from'):
51	n/a	kind = 'definition'
52	n/a	elif prev_tok_str in ('def', 'class'):
53	n/a	kind = 'defname'
54	n/a	elif keyword.iskeyword(tok_str):
55	n/a	kind = 'keyword'
56	n/a	elif is_builtin(tok_str) and prev_tok_str != '.':
57	n/a	kind = 'builtin'
58	n/a	if kind:
59	n/a	text, written = combine_range(lines, written, (srow, scol))
60	n/a	yield '', text
61	n/a	text, written = tok_str, (erow, ecol)
62	n/a	yield kind, text
63	n/a	line_upto_token, written = combine_range(lines, written, (erow, ecol))
64	n/a	yield '', line_upto_token
65	n/a
66	n/a	#### Raw Output ###########################################
67	n/a
68	n/a	def raw_highlight(classified_text):
69	n/a	'Straight text display of text classifications'
70	n/a	result = []
71	n/a	for kind, text in classified_text:
72	n/a	result.append('%15s: %r\n' % (kind or 'plain', text))
73	n/a	return ''.join(result)
74	n/a
75	n/a	#### ANSI Output ###########################################
76	n/a
77	n/a	default_ansi = {
78	n/a	'comment': ('\033[0;31m', '\033[0m'),
79	n/a	'string': ('\033[0;32m', '\033[0m'),
80	n/a	'docstring': ('\033[0;32m', '\033[0m'),
81	n/a	'keyword': ('\033[0;33m', '\033[0m'),
82	n/a	'builtin': ('\033[0;35m', '\033[0m'),
83	n/a	'definition': ('\033[0;33m', '\033[0m'),
84	n/a	'defname': ('\033[0;34m', '\033[0m'),
85	n/a	'operator': ('\033[0;33m', '\033[0m'),
86	n/a	}
87	n/a
88	n/a	def ansi_highlight(classified_text, colors=default_ansi):
89	n/a	'Add syntax highlighting to source code using ANSI escape sequences'
90	n/a	# http://en.wikipedia.org/wiki/ANSI_escape_code
91	n/a	result = []
92	n/a	for kind, text in classified_text:
93	n/a	opener, closer = colors.get(kind, ('', ''))
94	n/a	result += [opener, text, closer]
95	n/a	return ''.join(result)
96	n/a
97	n/a	#### HTML Output ###########################################
98	n/a
99	n/a	def html_highlight(classified_text,opener='<pre class="python">\n', closer='</pre>\n'):
100	n/a	'Convert classified text to an HTML fragment'
101	n/a	result = [opener]
102	n/a	for kind, text in classified_text:
103	n/a	if kind:
104	n/a	result.append('<span class="%s">' % kind)
105	n/a	result.append(html_module.escape(text))
106	n/a	if kind:
107	n/a	result.append('</span>')
108	n/a	result.append(closer)
109	n/a	return ''.join(result)
110	n/a
111	n/a	default_css = {
112	n/a	'.comment': '{color: crimson;}',
113	n/a	'.string': '{color: forestgreen;}',
114	n/a	'.docstring': '{color: forestgreen; font-style:italic;}',
115	n/a	'.keyword': '{color: darkorange;}',
116	n/a	'.builtin': '{color: purple;}',
117	n/a	'.definition': '{color: darkorange; font-weight:bold;}',
118	n/a	'.defname': '{color: blue;}',
119	n/a	'.operator': '{color: brown;}',
120	n/a	}
121	n/a
122	n/a	default_html = '''\
123	n/a	<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
124	n/a	"http://www.w3.org/TR/html4/strict.dtd">
125	n/a	<html>
126	n/a	<head>
127	n/a	<meta http-equiv="Content-type" content="text/html;charset=UTF-8">
128	n/a	<title> {title} </title>
129	n/a	<style type="text/css">
130	n/a	{css}
131	n/a	</style>
132	n/a	</head>
133	n/a	<body>
134	n/a	{body}
135	n/a	</body>
136	n/a	</html>
137	n/a	'''
138	n/a
139	n/a	def build_html_page(classified_text, title='python',
140	n/a	css=default_css, html=default_html):
141	n/a	'Create a complete HTML page with colorized source code'
142	n/a	css_str = '\n'.join(['%s %s' % item for item in css.items()])
143	n/a	result = html_highlight(classified_text)
144	n/a	title = html_module.escape(title)
145	n/a	return html.format(title=title, css=css_str, body=result)
146	n/a
147	n/a	#### LaTeX Output ##########################################
148	n/a
149	n/a	default_latex_commands = {
150	n/a	'comment': r'{\color{red}#1}',
151	n/a	'string': r'{\color{ForestGreen}#1}',
152	n/a	'docstring': r'{\emph{\color{ForestGreen}#1}}',
153	n/a	'keyword': r'{\color{orange}#1}',
154	n/a	'builtin': r'{\color{purple}#1}',
155	n/a	'definition': r'{\color{orange}#1}',
156	n/a	'defname': r'{\color{blue}#1}',
157	n/a	'operator': r'{\color{brown}#1}',
158	n/a	}
159	n/a
160	n/a	default_latex_document = r'''
161	n/a	\documentclass{article}
162	n/a	\usepackage{alltt}
163	n/a	\usepackage{upquote}
164	n/a	\usepackage{color}
165	n/a	\usepackage[usenames,dvipsnames]{xcolor}
166	n/a	\usepackage[cm]{fullpage}
167	n/a	%(macros)s
168	n/a	\begin{document}
169	n/a	\center{\LARGE{%(title)s}}
170	n/a	\begin{alltt}
171	n/a	%(body)s
172	n/a	\end{alltt}
173	n/a	\end{document}
174	n/a	'''
175	n/a
176	n/a	def alltt_escape(s):
177	n/a	'Replace backslash and braces with their escaped equivalents'
178	n/a	xlat = {'{': r'\{', '}': r'\}', '\\': r'\textbackslash{}'}
179	n/a	return re.sub(r'[\\{}]', lambda mo: xlat[mo.group()], s)
180	n/a
181	n/a	def latex_highlight(classified_text, title = 'python',
182	n/a	commands = default_latex_commands,
183	n/a	document = default_latex_document):
184	n/a	'Create a complete LaTeX document with colorized source code'
185	n/a	macros = '\n'.join(r'\newcommand{\py%s}[1]{%s}' % c for c in commands.items())
186	n/a	result = []
187	n/a	for kind, text in classified_text:
188	n/a	if kind:
189	n/a	result.append(r'\py%s{' % kind)
190	n/a	result.append(alltt_escape(text))
191	n/a	if kind:
192	n/a	result.append('}')
193	n/a	return default_latex_document % dict(title=title, macros=macros, body=''.join(result))
194	n/a
195	n/a
196	n/a	if __name__ == '__main__':
197	n/a	import argparse
198	n/a	import os.path
199	n/a	import sys
200	n/a	import textwrap
201	n/a	import webbrowser
202	n/a
203	n/a	parser = argparse.ArgumentParser(
204	n/a	description = 'Add syntax highlighting to Python source code',
205	n/a	formatter_class=argparse.RawDescriptionHelpFormatter,
206	n/a	epilog = textwrap.dedent('''
207	n/a	examples:
208	n/a
209	n/a	# Show syntax highlighted code in the terminal window
210	n/a	$ ./highlight.py myfile.py
211	n/a
212	n/a	# Colorize myfile.py and display in a browser
213	n/a	$ ./highlight.py -b myfile.py
214	n/a
215	n/a	# Create an HTML section to embed in an existing webpage
216	n/a	./highlight.py -s myfile.py
217	n/a
218	n/a	# Create a complete HTML file
219	n/a	$ ./highlight.py -c myfile.py > myfile.html
220	n/a
221	n/a	# Create a PDF using LaTeX
222	n/a	$ ./highlight.py -l myfile.py \| pdflatex
223	n/a
224	n/a	'''))
225	n/a	parser.add_argument('sourcefile', metavar = 'SOURCEFILE',
226	n/a	help = 'file containing Python sourcecode')
227	n/a	parser.add_argument('-b', '--browser', action = 'store_true',
228	n/a	help = 'launch a browser to show results')
229	n/a	parser.add_argument('-c', '--complete', action = 'store_true',
230	n/a	help = 'build a complete html webpage')
231	n/a	parser.add_argument('-l', '--latex', action = 'store_true',
232	n/a	help = 'build a LaTeX document')
233	n/a	parser.add_argument('-r', '--raw', action = 'store_true',
234	n/a	help = 'raw parse of categorized text')
235	n/a	parser.add_argument('-s', '--section', action = 'store_true',
236	n/a	help = 'show an HTML section rather than a complete webpage')
237	n/a	args = parser.parse_args()
238	n/a
239	n/a	if args.section and (args.browser or args.complete):
240	n/a	parser.error('The -s/--section option is incompatible with '
241	n/a	'the -b/--browser or -c/--complete options')
242	n/a
243	n/a	sourcefile = args.sourcefile
244	n/a	with open(sourcefile) as f:
245	n/a	source = f.read()
246	n/a	classified_text = analyze_python(source)
247	n/a
248	n/a	if args.raw:
249	n/a	encoded = raw_highlight(classified_text)
250	n/a	elif args.complete or args.browser:
251	n/a	encoded = build_html_page(classified_text, title=sourcefile)
252	n/a	elif args.section:
253	n/a	encoded = html_highlight(classified_text)
254	n/a	elif args.latex:
255	n/a	encoded = latex_highlight(classified_text, title=sourcefile)
256	n/a	else:
257	n/a	encoded = ansi_highlight(classified_text)
258	n/a
259	n/a	if args.browser:
260	n/a	htmlfile = os.path.splitext(os.path.basename(sourcefile))[0] + '.html'
261	n/a	with open(htmlfile, 'w') as f:
262	n/a	f.write(encoded)
263	n/a	webbrowser.open('file://' + os.path.abspath(htmlfile))
264	n/a	else:
265	n/a	sys.stdout.write(encoded)