Python code coverage for Tools/scripts/pindent.py

#	count	content
1	n/a	#! /usr/bin/env python3
2	n/a
3	n/a	# This file contains a class and a main program that perform three
4	n/a	# related (though complimentary) formatting operations on Python
5	n/a	# programs. When called as "pindent -c", it takes a valid Python
6	n/a	# program as input and outputs a version augmented with block-closing
7	n/a	# comments. When called as "pindent -d", it assumes its input is a
8	n/a	# Python program with block-closing comments and outputs a commentless
9	n/a	# version. When called as "pindent -r" it assumes its input is a
10	n/a	# Python program with block-closing comments but with its indentation
11	n/a	# messed up, and outputs a properly indented version.
12	n/a
13	n/a	# A "block-closing comment" is a comment of the form '# end <keyword>'
14	n/a	# where <keyword> is the keyword that opened the block. If the
15	n/a	# opening keyword is 'def' or 'class', the function or class name may
16	n/a	# be repeated in the block-closing comment as well. Here is an
17	n/a	# example of a program fully augmented with block-closing comments:
18	n/a
19	n/a	# def foobar(a, b):
20	n/a	# if a == b:
21	n/a	# a = a+1
22	n/a	# elif a < b:
23	n/a	# b = b-1
24	n/a	# if b > a: a = a-1
25	n/a	# # end if
26	n/a	# else:
27	n/a	# print 'oops!'
28	n/a	# # end if
29	n/a	# # end def foobar
30	n/a
31	n/a	# Note that only the last part of an if...elif...else... block needs a
32	n/a	# block-closing comment; the same is true for other compound
33	n/a	# statements (e.g. try...except). Also note that "short-form" blocks
34	n/a	# like the second 'if' in the example must be closed as well;
35	n/a	# otherwise the 'else' in the example would be ambiguous (remember
36	n/a	# that indentation is not significant when interpreting block-closing
37	n/a	# comments).
38	n/a
39	n/a	# The operations are idempotent (i.e. applied to their own output
40	n/a	# they yield an identical result). Running first "pindent -c" and
41	n/a	# then "pindent -r" on a valid Python program produces a program that
42	n/a	# is semantically identical to the input (though its indentation may
43	n/a	# be different). Running "pindent -e" on that output produces a
44	n/a	# program that only differs from the original in indentation.
45	n/a
46	n/a	# Other options:
47	n/a	# -s stepsize: set the indentation step size (default 8)
48	n/a	# -t tabsize : set the number of spaces a tab character is worth (default 8)
49	n/a	# -e : expand TABs into spaces
50	n/a	# file ... : input file(s) (default standard input)
51	n/a	# The results always go to standard output
52	n/a
53	n/a	# Caveats:
54	n/a	# - comments ending in a backslash will be mistaken for continued lines
55	n/a	# - continuations using backslash are always left unchanged
56	n/a	# - continuations inside parentheses are not extra indented by -r
57	n/a	# but must be indented for -c to work correctly (this breaks
58	n/a	# idempotency!)
59	n/a	# - continued lines inside triple-quoted strings are totally garbled
60	n/a
61	n/a	# Secret feature:
62	n/a	# - On input, a block may also be closed with an "end statement" --
63	n/a	# this is a block-closing comment without the '#' sign.
64	n/a
65	n/a	# Possible improvements:
66	n/a	# - check syntax based on transitions in 'next' table
67	n/a	# - better error reporting
68	n/a	# - better error recovery
69	n/a	# - check identifier after class/def
70	n/a
71	n/a	# The following wishes need a more complete tokenization of the source:
72	n/a	# - Don't get fooled by comments ending in backslash
73	n/a	# - reindent continuation lines indicated by backslash
74	n/a	# - handle continuation lines inside parentheses/braces/brackets
75	n/a	# - handle triple quoted strings spanning lines
76	n/a	# - realign comments
77	n/a	# - optionally do much more thorough reformatting, a la C indent
78	n/a
79	n/a	# Defaults
80	n/a	STEPSIZE = 8
81	n/a	TABSIZE = 8
82	n/a	EXPANDTABS = False
83	n/a
84	n/a	import io
85	n/a	import re
86	n/a	import sys
87	n/a
88	n/a	next = {}
89	n/a	next['if'] = next['elif'] = 'elif', 'else', 'end'
90	n/a	next['while'] = next['for'] = 'else', 'end'
91	n/a	next['try'] = 'except', 'finally'
92	n/a	next['except'] = 'except', 'else', 'finally', 'end'
93	n/a	next['else'] = next['finally'] = next['with'] = \
94	n/a	next['def'] = next['class'] = 'end'
95	n/a	next['end'] = ()
96	n/a	start = 'if', 'while', 'for', 'try', 'with', 'def', 'class'
97	n/a
98	n/a	class PythonIndenter:
99	n/a
100	n/a	def __init__(self, fpi = sys.stdin, fpo = sys.stdout,
101	n/a	indentsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
102	n/a	self.fpi = fpi
103	n/a	self.fpo = fpo
104	n/a	self.indentsize = indentsize
105	n/a	self.tabsize = tabsize
106	n/a	self.lineno = 0
107	n/a	self.expandtabs = expandtabs
108	n/a	self._write = fpo.write
109	n/a	self.kwprog = re.compile(
110	n/a	r'^(?:\s\|\\\n)*(?P<kw>[a-z]+)'
111	n/a	r'((?:\s\|\\\n)+(?P<id>[a-zA-Z_]\w*))?'
112	n/a	r'[^\w]')
113	n/a	self.endprog = re.compile(
114	n/a	r'^(?:\s\|\\\n)#?\send\s+(?P<kw>[a-z]+)'
115	n/a	r'(\s+(?P<id>[a-zA-Z_]\w*))?'
116	n/a	r'[^\w]')
117	n/a	self.wsprog = re.compile(r'^[ \t]*')
118	n/a	# end def __init__
119	n/a
120	n/a	def write(self, line):
121	n/a	if self.expandtabs:
122	n/a	self._write(line.expandtabs(self.tabsize))
123	n/a	else:
124	n/a	self._write(line)
125	n/a	# end if
126	n/a	# end def write
127	n/a
128	n/a	def readline(self):
129	n/a	line = self.fpi.readline()
130	n/a	if line: self.lineno += 1
131	n/a	# end if
132	n/a	return line
133	n/a	# end def readline
134	n/a
135	n/a	def error(self, fmt, *args):
136	n/a	if args: fmt = fmt % args
137	n/a	# end if
138	n/a	sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt))
139	n/a	self.write('### %s ###\n' % fmt)
140	n/a	# end def error
141	n/a
142	n/a	def getline(self):
143	n/a	line = self.readline()
144	n/a	while line[-2:] == '\\\n':
145	n/a	line2 = self.readline()
146	n/a	if not line2: break
147	n/a	# end if
148	n/a	line += line2
149	n/a	# end while
150	n/a	return line
151	n/a	# end def getline
152	n/a
153	n/a	def putline(self, line, indent):
154	n/a	tabs, spaces = divmod(indent*self.indentsize, self.tabsize)
155	n/a	i = self.wsprog.match(line).end()
156	n/a	line = line[i:]
157	n/a	if line[:1] not in ('\n', '\r', ''):
158	n/a	line = '\t'tabs + ' 'spaces + line
159	n/a	# end if
160	n/a	self.write(line)
161	n/a	# end def putline
162	n/a
163	n/a	def reformat(self):
164	n/a	stack = []
165	n/a	while True:
166	n/a	line = self.getline()
167	n/a	if not line: break # EOF
168	n/a	# end if
169	n/a	m = self.endprog.match(line)
170	n/a	if m:
171	n/a	kw = 'end'
172	n/a	kw2 = m.group('kw')
173	n/a	if not stack:
174	n/a	self.error('unexpected end')
175	n/a	elif stack.pop()[0] != kw2:
176	n/a	self.error('unmatched end')
177	n/a	# end if
178	n/a	self.putline(line, len(stack))
179	n/a	continue
180	n/a	# end if
181	n/a	m = self.kwprog.match(line)
182	n/a	if m:
183	n/a	kw = m.group('kw')
184	n/a	if kw in start:
185	n/a	self.putline(line, len(stack))
186	n/a	stack.append((kw, kw))
187	n/a	continue
188	n/a	# end if
189	n/a	if kw in next and stack:
190	n/a	self.putline(line, len(stack)-1)
191	n/a	kwa, kwb = stack[-1]
192	n/a	stack[-1] = kwa, kw
193	n/a	continue
194	n/a	# end if
195	n/a	# end if
196	n/a	self.putline(line, len(stack))
197	n/a	# end while
198	n/a	if stack:
199	n/a	self.error('unterminated keywords')
200	n/a	for kwa, kwb in stack:
201	n/a	self.write('\t%s\n' % kwa)
202	n/a	# end for
203	n/a	# end if
204	n/a	# end def reformat
205	n/a
206	n/a	def delete(self):
207	n/a	begin_counter = 0
208	n/a	end_counter = 0
209	n/a	while True:
210	n/a	line = self.getline()
211	n/a	if not line: break # EOF
212	n/a	# end if
213	n/a	m = self.endprog.match(line)
214	n/a	if m:
215	n/a	end_counter += 1
216	n/a	continue
217	n/a	# end if
218	n/a	m = self.kwprog.match(line)
219	n/a	if m:
220	n/a	kw = m.group('kw')
221	n/a	if kw in start:
222	n/a	begin_counter += 1
223	n/a	# end if
224	n/a	# end if
225	n/a	self.write(line)
226	n/a	# end while
227	n/a	if begin_counter - end_counter < 0:
228	n/a	sys.stderr.write('Warning: input contained more end tags than expected\n')
229	n/a	elif begin_counter - end_counter > 0:
230	n/a	sys.stderr.write('Warning: input contained less end tags than expected\n')
231	n/a	# end if
232	n/a	# end def delete
233	n/a
234	n/a	def complete(self):
235	n/a	stack = []
236	n/a	todo = []
237	n/a	currentws = thisid = firstkw = lastkw = topid = ''
238	n/a	while True:
239	n/a	line = self.getline()
240	n/a	i = self.wsprog.match(line).end()
241	n/a	m = self.endprog.match(line)
242	n/a	if m:
243	n/a	thiskw = 'end'
244	n/a	endkw = m.group('kw')
245	n/a	thisid = m.group('id')
246	n/a	else:
247	n/a	m = self.kwprog.match(line)
248	n/a	if m:
249	n/a	thiskw = m.group('kw')
250	n/a	if thiskw not in next:
251	n/a	thiskw = ''
252	n/a	# end if
253	n/a	if thiskw in ('def', 'class'):
254	n/a	thisid = m.group('id')
255	n/a	else:
256	n/a	thisid = ''
257	n/a	# end if
258	n/a	elif line[i:i+1] in ('\n', '#'):
259	n/a	todo.append(line)
260	n/a	continue
261	n/a	else:
262	n/a	thiskw = ''
263	n/a	# end if
264	n/a	# end if
265	n/a	indentws = line[:i]
266	n/a	indent = len(indentws.expandtabs(self.tabsize))
267	n/a	current = len(currentws.expandtabs(self.tabsize))
268	n/a	while indent < current:
269	n/a	if firstkw:
270	n/a	if topid:
271	n/a	s = '# end %s %s\n' % (
272	n/a	firstkw, topid)
273	n/a	else:
274	n/a	s = '# end %s\n' % firstkw
275	n/a	# end if
276	n/a	self.write(currentws + s)
277	n/a	firstkw = lastkw = ''
278	n/a	# end if
279	n/a	currentws, firstkw, lastkw, topid = stack.pop()
280	n/a	current = len(currentws.expandtabs(self.tabsize))
281	n/a	# end while
282	n/a	if indent == current and firstkw:
283	n/a	if thiskw == 'end':
284	n/a	if endkw != firstkw:
285	n/a	self.error('mismatched end')
286	n/a	# end if
287	n/a	firstkw = lastkw = ''
288	n/a	elif not thiskw or thiskw in start:
289	n/a	if topid:
290	n/a	s = '# end %s %s\n' % (
291	n/a	firstkw, topid)
292	n/a	else:
293	n/a	s = '# end %s\n' % firstkw
294	n/a	# end if
295	n/a	self.write(currentws + s)
296	n/a	firstkw = lastkw = topid = ''
297	n/a	# end if
298	n/a	# end if
299	n/a	if indent > current:
300	n/a	stack.append((currentws, firstkw, lastkw, topid))
301	n/a	if thiskw and thiskw not in start:
302	n/a	# error
303	n/a	thiskw = ''
304	n/a	# end if
305	n/a	currentws, firstkw, lastkw, topid = \
306	n/a	indentws, thiskw, thiskw, thisid
307	n/a	# end if
308	n/a	if thiskw:
309	n/a	if thiskw in start:
310	n/a	firstkw = lastkw = thiskw
311	n/a	topid = thisid
312	n/a	else:
313	n/a	lastkw = thiskw
314	n/a	# end if
315	n/a	# end if
316	n/a	for l in todo: self.write(l)
317	n/a	# end for
318	n/a	todo = []
319	n/a	if not line: break
320	n/a	# end if
321	n/a	self.write(line)
322	n/a	# end while
323	n/a	# end def complete
324	n/a	# end class PythonIndenter
325	n/a
326	n/a	# Simplified user interface
327	n/a	# - xxx_filter(input, output): read and write file objects
328	n/a	# - xxx_string(s): take and return string object
329	n/a	# - xxx_file(filename): process file in place, return true iff changed
330	n/a
331	n/a	def complete_filter(input = sys.stdin, output = sys.stdout,
332	n/a	stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
333	n/a	pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
334	n/a	pi.complete()
335	n/a	# end def complete_filter
336	n/a
337	n/a	def delete_filter(input= sys.stdin, output = sys.stdout,
338	n/a	stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
339	n/a	pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
340	n/a	pi.delete()
341	n/a	# end def delete_filter
342	n/a
343	n/a	def reformat_filter(input = sys.stdin, output = sys.stdout,
344	n/a	stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
345	n/a	pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
346	n/a	pi.reformat()
347	n/a	# end def reformat_filter
348	n/a
349	n/a	def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
350	n/a	input = io.StringIO(source)
351	n/a	output = io.StringIO()
352	n/a	pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
353	n/a	pi.complete()
354	n/a	return output.getvalue()
355	n/a	# end def complete_string
356	n/a
357	n/a	def delete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
358	n/a	input = io.StringIO(source)
359	n/a	output = io.StringIO()
360	n/a	pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
361	n/a	pi.delete()
362	n/a	return output.getvalue()
363	n/a	# end def delete_string
364	n/a
365	n/a	def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
366	n/a	input = io.StringIO(source)
367	n/a	output = io.StringIO()
368	n/a	pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
369	n/a	pi.reformat()
370	n/a	return output.getvalue()
371	n/a	# end def reformat_string
372	n/a
373	n/a	def make_backup(filename):
374	n/a	import os, os.path
375	n/a	backup = filename + '~'
376	n/a	if os.path.lexists(backup):
377	n/a	try:
378	n/a	os.remove(backup)
379	n/a	except OSError:
380	n/a	print("Can't remove backup %r" % (backup,), file=sys.stderr)
381	n/a	# end try
382	n/a	# end if
383	n/a	try:
384	n/a	os.rename(filename, backup)
385	n/a	except OSError:
386	n/a	print("Can't rename %r to %r" % (filename, backup), file=sys.stderr)
387	n/a	# end try
388	n/a	# end def make_backup
389	n/a
390	n/a	def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
391	n/a	with open(filename, 'r') as f:
392	n/a	source = f.read()
393	n/a	# end with
394	n/a	result = complete_string(source, stepsize, tabsize, expandtabs)
395	n/a	if source == result: return 0
396	n/a	# end if
397	n/a	make_backup(filename)
398	n/a	with open(filename, 'w') as f:
399	n/a	f.write(result)
400	n/a	# end with
401	n/a	return 1
402	n/a	# end def complete_file
403	n/a
404	n/a	def delete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
405	n/a	with open(filename, 'r') as f:
406	n/a	source = f.read()
407	n/a	# end with
408	n/a	result = delete_string(source, stepsize, tabsize, expandtabs)
409	n/a	if source == result: return 0
410	n/a	# end if
411	n/a	make_backup(filename)
412	n/a	with open(filename, 'w') as f:
413	n/a	f.write(result)
414	n/a	# end with
415	n/a	return 1
416	n/a	# end def delete_file
417	n/a
418	n/a	def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
419	n/a	with open(filename, 'r') as f:
420	n/a	source = f.read()
421	n/a	# end with
422	n/a	result = reformat_string(source, stepsize, tabsize, expandtabs)
423	n/a	if source == result: return 0
424	n/a	# end if
425	n/a	make_backup(filename)
426	n/a	with open(filename, 'w') as f:
427	n/a	f.write(result)
428	n/a	# end with
429	n/a	return 1
430	n/a	# end def reformat_file
431	n/a
432	n/a	# Test program when called as a script
433	n/a
434	n/a	usage = """
435	n/a	usage: pindent (-c\|-d\|-r) [-s stepsize] [-t tabsize] [-e] [file] ...
436	n/a	-c : complete a correctly indented program (add #end directives)
437	n/a	-d : delete #end directives
438	n/a	-r : reformat a completed program (use #end directives)
439	n/a	-s stepsize: indentation step (default %(STEPSIZE)d)
440	n/a	-t tabsize : the worth in spaces of a tab (default %(TABSIZE)d)
441	n/a	-e : expand TABs into spaces (default OFF)
442	n/a	[file] ... : files are changed in place, with backups in file~
443	n/a	If no files are specified or a single - is given,
444	n/a	the program acts as a filter (reads stdin, writes stdout).
445	n/a	""" % vars()
446	n/a
447	n/a	def error_both(op1, op2):
448	n/a	sys.stderr.write('Error: You can not specify both '+op1+' and -'+op2[0]+' at the same time\n')
449	n/a	sys.stderr.write(usage)
450	n/a	sys.exit(2)
451	n/a	# end def error_both
452	n/a
453	n/a	def test():
454	n/a	import getopt
455	n/a	try:
456	n/a	opts, args = getopt.getopt(sys.argv[1:], 'cdrs:t:e')
457	n/a	except getopt.error as msg:
458	n/a	sys.stderr.write('Error: %s\n' % msg)
459	n/a	sys.stderr.write(usage)
460	n/a	sys.exit(2)
461	n/a	# end try
462	n/a	action = None
463	n/a	stepsize = STEPSIZE
464	n/a	tabsize = TABSIZE
465	n/a	expandtabs = EXPANDTABS
466	n/a	for o, a in opts:
467	n/a	if o == '-c':
468	n/a	if action: error_both(o, action)
469	n/a	# end if
470	n/a	action = 'complete'
471	n/a	elif o == '-d':
472	n/a	if action: error_both(o, action)
473	n/a	# end if
474	n/a	action = 'delete'
475	n/a	elif o == '-r':
476	n/a	if action: error_both(o, action)
477	n/a	# end if
478	n/a	action = 'reformat'
479	n/a	elif o == '-s':
480	n/a	stepsize = int(a)
481	n/a	elif o == '-t':
482	n/a	tabsize = int(a)
483	n/a	elif o == '-e':
484	n/a	expandtabs = True
485	n/a	# end if
486	n/a	# end for
487	n/a	if not action:
488	n/a	sys.stderr.write(
489	n/a	'You must specify -c(omplete), -d(elete) or -r(eformat)\n')
490	n/a	sys.stderr.write(usage)
491	n/a	sys.exit(2)
492	n/a	# end if
493	n/a	if not args or args == ['-']:
494	n/a	action = eval(action + '_filter')
495	n/a	action(sys.stdin, sys.stdout, stepsize, tabsize, expandtabs)
496	n/a	else:
497	n/a	action = eval(action + '_file')
498	n/a	for filename in args:
499	n/a	action(filename, stepsize, tabsize, expandtabs)
500	n/a	# end for
501	n/a	# end if
502	n/a	# end def test
503	n/a
504	n/a	if __name__ == '__main__':
505	n/a	test()
506	n/a	# end if