Python code coverage for Tools/scripts/reindent.py

#	count	content
1	n/a	#! /usr/bin/env python3
2	n/a
3	n/a	# Released to the public domain, by Tim Peters, 03 October 2000.
4	n/a
5	n/a	"""reindent [-d][-r][-v] [ path ... ]
6	n/a
7	n/a	-d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8	n/a	-r (--recurse) Recurse. Search for all .py files in subdirectories too.
9	n/a	-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10	n/a	-v (--verbose) Verbose. Print informative msgs; else no output.
11	n/a	(--newline) Newline. Specify the newline character to use (CRLF, LF).
12	n/a	Default is the same as the original file.
13	n/a	-h (--help) Help. Print this usage information and exit.
14	n/a
15	n/a	Change Python (.py) files to use 4-space indents and no hard tab characters.
16	n/a	Also trim excess spaces and tabs from ends of lines, and remove empty lines
17	n/a	at the end of files. Also ensure the last line ends with a newline.
18	n/a
19	n/a	If no paths are given on the command line, reindent operates as a filter,
20	n/a	reading a single source file from standard input and writing the transformed
21	n/a	source to standard output. In this case, the -d, -r and -v flags are
22	n/a	ignored.
23	n/a
24	n/a	You can pass one or more file and/or directory paths. When a directory
25	n/a	path, all .py files within the directory will be examined, and, if the -r
26	n/a	option is given, likewise recursively for subdirectories.
27	n/a
28	n/a	If output is not to standard output, reindent overwrites files in place,
29	n/a	renaming the originals with a .bak extension. If it finds nothing to
30	n/a	change, the file is left alone. If reindent does change a file, the changed
31	n/a	file is a fixed-point for future runs (i.e., running reindent on the
32	n/a	resulting .py file won't change it again).
33	n/a
34	n/a	The hard part of reindenting is figuring out what to do with comment
35	n/a	lines. So long as the input files get a clean bill of health from
36	n/a	tabnanny.py, reindent should do a good job.
37	n/a
38	n/a	The backup file is a copy of the one that is being reindented. The ".bak"
39	n/a	file is generated with shutil.copy(), but some corner cases regarding
40	n/a	user/group and permissions could leave the backup file more readable than
41	n/a	you'd prefer. You can always use the --nobackup option to prevent this.
42	n/a	"""
43	n/a
44	n/a	__version__ = "1"
45	n/a
46	n/a	import tokenize
47	n/a	import os
48	n/a	import shutil
49	n/a	import sys
50	n/a
51	n/a	verbose = False
52	n/a	recurse = False
53	n/a	dryrun = False
54	n/a	makebackup = True
55	n/a	# A specified newline to be used in the output (set by --newline option)
56	n/a	spec_newline = None
57	n/a
58	n/a
59	n/a	def usage(msg=None):
60	n/a	if msg is None:
61	n/a	msg = __doc__
62	n/a	print(msg, file=sys.stderr)
63	n/a
64	n/a
65	n/a	def errprint(*args):
66	n/a	sys.stderr.write(" ".join(str(arg) for arg in args))
67	n/a	sys.stderr.write("\n")
68	n/a
69	n/a	def main():
70	n/a	import getopt
71	n/a	global verbose, recurse, dryrun, makebackup, spec_newline
72	n/a	try:
73	n/a	opts, args = getopt.getopt(sys.argv[1:], "drnvh",
74	n/a	["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"])
75	n/a	except getopt.error as msg:
76	n/a	usage(msg)
77	n/a	return
78	n/a	for o, a in opts:
79	n/a	if o in ('-d', '--dryrun'):
80	n/a	dryrun = True
81	n/a	elif o in ('-r', '--recurse'):
82	n/a	recurse = True
83	n/a	elif o in ('-n', '--nobackup'):
84	n/a	makebackup = False
85	n/a	elif o in ('-v', '--verbose'):
86	n/a	verbose = True
87	n/a	elif o in ('--newline',):
88	n/a	if not a.upper() in ('CRLF', 'LF'):
89	n/a	usage()
90	n/a	return
91	n/a	spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()]
92	n/a	elif o in ('-h', '--help'):
93	n/a	usage()
94	n/a	return
95	n/a	if not args:
96	n/a	r = Reindenter(sys.stdin)
97	n/a	r.run()
98	n/a	r.write(sys.stdout)
99	n/a	return
100	n/a	for arg in args:
101	n/a	check(arg)
102	n/a
103	n/a
104	n/a	def check(file):
105	n/a	if os.path.isdir(file) and not os.path.islink(file):
106	n/a	if verbose:
107	n/a	print("listing directory", file)
108	n/a	names = os.listdir(file)
109	n/a	for name in names:
110	n/a	fullname = os.path.join(file, name)
111	n/a	if ((recurse and os.path.isdir(fullname) and
112	n/a	not os.path.islink(fullname) and
113	n/a	not os.path.split(fullname)[1].startswith("."))
114	n/a	or name.lower().endswith(".py")):
115	n/a	check(fullname)
116	n/a	return
117	n/a
118	n/a	if verbose:
119	n/a	print("checking", file, "...", end=' ')
120	n/a	with open(file, 'rb') as f:
121	n/a	encoding, _ = tokenize.detect_encoding(f.readline)
122	n/a	try:
123	n/a	with open(file, encoding=encoding) as f:
124	n/a	r = Reindenter(f)
125	n/a	except IOError as msg:
126	n/a	errprint("%s: I/O Error: %s" % (file, str(msg)))
127	n/a	return
128	n/a
129	n/a	newline = spec_newline if spec_newline else r.newlines
130	n/a	if isinstance(newline, tuple):
131	n/a	errprint("%s: mixed newlines detected; cannot continue without --newline" % file)
132	n/a	return
133	n/a
134	n/a	if r.run():
135	n/a	if verbose:
136	n/a	print("changed.")
137	n/a	if dryrun:
138	n/a	print("But this is a dry run, so leaving it alone.")
139	n/a	if not dryrun:
140	n/a	bak = file + ".bak"
141	n/a	if makebackup:
142	n/a	shutil.copyfile(file, bak)
143	n/a	if verbose:
144	n/a	print("backed up", file, "to", bak)
145	n/a	with open(file, "w", encoding=encoding, newline=newline) as f:
146	n/a	r.write(f)
147	n/a	if verbose:
148	n/a	print("wrote new", file)
149	n/a	return True
150	n/a	else:
151	n/a	if verbose:
152	n/a	print("unchanged.")
153	n/a	return False
154	n/a
155	n/a
156	n/a	def _rstrip(line, JUNK='\n \t'):
157	n/a	"""Return line stripped of trailing spaces, tabs, newlines.
158	n/a
159	n/a	Note that line.rstrip() instead also strips sundry control characters,
160	n/a	but at least one known Emacs user expects to keep junk like that, not
161	n/a	mentioning Barry by name or anything <wink>.
162	n/a	"""
163	n/a
164	n/a	i = len(line)
165	n/a	while i > 0 and line[i - 1] in JUNK:
166	n/a	i -= 1
167	n/a	return line[:i]
168	n/a
169	n/a
170	n/a	class Reindenter:
171	n/a
172	n/a	def __init__(self, f):
173	n/a	self.find_stmt = 1 # next token begins a fresh stmt?
174	n/a	self.level = 0 # current indent level
175	n/a
176	n/a	# Raw file lines.
177	n/a	self.raw = f.readlines()
178	n/a
179	n/a	# File lines, rstripped & tab-expanded. Dummy at start is so
180	n/a	# that we can use tokenize's 1-based line numbering easily.
181	n/a	# Note that a line is all-blank iff it's "\n".
182	n/a	self.lines = [_rstrip(line).expandtabs() + "\n"
183	n/a	for line in self.raw]
184	n/a	self.lines.insert(0, None)
185	n/a	self.index = 1 # index into self.lines of next line
186	n/a
187	n/a	# List of (lineno, indentlevel) pairs, one for each stmt and
188	n/a	# comment line. indentlevel is -1 for comment lines, as a
189	n/a	# signal that tokenize doesn't know what to do about them;
190	n/a	# indeed, they're our headache!
191	n/a	self.stats = []
192	n/a
193	n/a	# Save the newlines found in the file so they can be used to
194	n/a	# create output without mutating the newlines.
195	n/a	self.newlines = f.newlines
196	n/a
197	n/a	def run(self):
198	n/a	tokens = tokenize.generate_tokens(self.getline)
199	n/a	for _token in tokens:
200	n/a	self.tokeneater(*_token)
201	n/a	# Remove trailing empty lines.
202	n/a	lines = self.lines
203	n/a	while lines and lines[-1] == "\n":
204	n/a	lines.pop()
205	n/a	# Sentinel.
206	n/a	stats = self.stats
207	n/a	stats.append((len(lines), 0))
208	n/a	# Map count of leading spaces to # we want.
209	n/a	have2want = {}
210	n/a	# Program after transformation.
211	n/a	after = self.after = []
212	n/a	# Copy over initial empty lines -- there's nothing to do until
213	n/a	# we see a line with something on it.
214	n/a	i = stats[0][0]
215	n/a	after.extend(lines[1:i])
216	n/a	for i in range(len(stats) - 1):
217	n/a	thisstmt, thislevel = stats[i]
218	n/a	nextstmt = stats[i + 1][0]
219	n/a	have = getlspace(lines[thisstmt])
220	n/a	want = thislevel * 4
221	n/a	if want < 0:
222	n/a	# A comment line.
223	n/a	if have:
224	n/a	# An indented comment line. If we saw the same
225	n/a	# indentation before, reuse what it most recently
226	n/a	# mapped to.
227	n/a	want = have2want.get(have, -1)
228	n/a	if want < 0:
229	n/a	# Then it probably belongs to the next real stmt.
230	n/a	for j in range(i + 1, len(stats) - 1):
231	n/a	jline, jlevel = stats[j]
232	n/a	if jlevel >= 0:
233	n/a	if have == getlspace(lines[jline]):
234	n/a	want = jlevel * 4
235	n/a	break
236	n/a	if want < 0: # Maybe it's a hanging
237	n/a	# comment like this one,
238	n/a	# in which case we should shift it like its base
239	n/a	# line got shifted.
240	n/a	for j in range(i - 1, -1, -1):
241	n/a	jline, jlevel = stats[j]
242	n/a	if jlevel >= 0:
243	n/a	want = have + (getlspace(after[jline - 1]) -
244	n/a	getlspace(lines[jline]))
245	n/a	break
246	n/a	if want < 0:
247	n/a	# Still no luck -- leave it alone.
248	n/a	want = have
249	n/a	else:
250	n/a	want = 0
251	n/a	assert want >= 0
252	n/a	have2want[have] = want
253	n/a	diff = want - have
254	n/a	if diff == 0 or have == 0:
255	n/a	after.extend(lines[thisstmt:nextstmt])
256	n/a	else:
257	n/a	for line in lines[thisstmt:nextstmt]:
258	n/a	if diff > 0:
259	n/a	if line == "\n":
260	n/a	after.append(line)
261	n/a	else:
262	n/a	after.append(" " * diff + line)
263	n/a	else:
264	n/a	remove = min(getlspace(line), -diff)
265	n/a	after.append(line[remove:])
266	n/a	return self.raw != self.after
267	n/a
268	n/a	def write(self, f):
269	n/a	f.writelines(self.after)
270	n/a
271	n/a	# Line-getter for tokenize.
272	n/a	def getline(self):
273	n/a	if self.index >= len(self.lines):
274	n/a	line = ""
275	n/a	else:
276	n/a	line = self.lines[self.index]
277	n/a	self.index += 1
278	n/a	return line
279	n/a
280	n/a	# Line-eater for tokenize.
281	n/a	def tokeneater(self, type, token, slinecol, end, line,
282	n/a	INDENT=tokenize.INDENT,
283	n/a	DEDENT=tokenize.DEDENT,
284	n/a	NEWLINE=tokenize.NEWLINE,
285	n/a	COMMENT=tokenize.COMMENT,
286	n/a	NL=tokenize.NL):
287	n/a
288	n/a	if type == NEWLINE:
289	n/a	# A program statement, or ENDMARKER, will eventually follow,
290	n/a	# after some (possibly empty) run of tokens of the form
291	n/a	# (NL \| COMMENT)* (INDENT \| DEDENT+)?
292	n/a	self.find_stmt = 1
293	n/a
294	n/a	elif type == INDENT:
295	n/a	self.find_stmt = 1
296	n/a	self.level += 1
297	n/a
298	n/a	elif type == DEDENT:
299	n/a	self.find_stmt = 1
300	n/a	self.level -= 1
301	n/a
302	n/a	elif type == COMMENT:
303	n/a	if self.find_stmt:
304	n/a	self.stats.append((slinecol[0], -1))
305	n/a	# but we're still looking for a new stmt, so leave
306	n/a	# find_stmt alone
307	n/a
308	n/a	elif type == NL:
309	n/a	pass
310	n/a
311	n/a	elif self.find_stmt:
312	n/a	# This is the first "real token" following a NEWLINE, so it
313	n/a	# must be the first token of the next program statement, or an
314	n/a	# ENDMARKER.
315	n/a	self.find_stmt = 0
316	n/a	if line: # not endmarker
317	n/a	self.stats.append((slinecol[0], self.level))
318	n/a
319	n/a
320	n/a	# Count number of leading blanks.
321	n/a	def getlspace(line):
322	n/a	i, n = 0, len(line)
323	n/a	while i < n and line[i] == " ":
324	n/a	i += 1
325	n/a	return i
326	n/a
327	n/a
328	n/a	if __name__ == '__main__':
329	n/a	main()