Python code coverage for Lib/textwrap.py

#	count	content
1	n/a	"""Text wrapping and filling.
2	n/a	"""
3	n/a
4	n/a	# Copyright (C) 1999-2001 Gregory P. Ward.
5	n/a	# Copyright (C) 2002, 2003 Python Software Foundation.
6	n/a	# Written by Greg Ward <gward@python.net>
7	n/a
8	n/a	import re
9	n/a
10	n/a	__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten']
11	n/a
12	n/a	# Hardcode the recognized whitespace characters to the US-ASCII
13	n/a	# whitespace characters. The main reason for doing this is that
14	n/a	# some Unicode spaces (like \u00a0) are non-breaking whitespaces.
15	n/a	_whitespace = '\t\n\x0b\x0c\r '
16	n/a
17	n/a	class TextWrapper:
18	n/a	"""
19	n/a	Object for wrapping/filling text. The public interface consists of
20	n/a	the wrap() and fill() methods; the other methods are just there for
21	n/a	subclasses to override in order to tweak the default behaviour.
22	n/a	If you want to completely replace the main wrapping algorithm,
23	n/a	you'll probably have to override _wrap_chunks().
24	n/a
25	n/a	Several instance attributes control various aspects of wrapping:
26	n/a	width (default: 70)
27	n/a	the maximum width of wrapped lines (unless break_long_words
28	n/a	is false)
29	n/a	initial_indent (default: "")
30	n/a	string that will be prepended to the first line of wrapped
31	n/a	output. Counts towards the line's width.
32	n/a	subsequent_indent (default: "")
33	n/a	string that will be prepended to all lines save the first
34	n/a	of wrapped output; also counts towards each line's width.
35	n/a	expand_tabs (default: true)
36	n/a	Expand tabs in input text to spaces before further processing.
37	n/a	Each tab will become 0 .. 'tabsize' spaces, depending on its position
38	n/a	in its line. If false, each tab is treated as a single character.
39	n/a	tabsize (default: 8)
40	n/a	Expand tabs in input text to 0 .. 'tabsize' spaces, unless
41	n/a	'expand_tabs' is false.
42	n/a	replace_whitespace (default: true)
43	n/a	Replace all whitespace characters in the input text by spaces
44	n/a	after tab expansion. Note that if expand_tabs is false and
45	n/a	replace_whitespace is true, every tab will be converted to a
46	n/a	single space!
47	n/a	fix_sentence_endings (default: false)
48	n/a	Ensure that sentence-ending punctuation is always followed
49	n/a	by two spaces. Off by default because the algorithm is
50	n/a	(unavoidably) imperfect.
51	n/a	break_long_words (default: true)
52	n/a	Break words longer than 'width'. If false, those words will not
53	n/a	be broken, and some lines might be longer than 'width'.
54	n/a	break_on_hyphens (default: true)
55	n/a	Allow breaking hyphenated words. If true, wrapping will occur
56	n/a	preferably on whitespaces and right after hyphens part of
57	n/a	compound words.
58	n/a	drop_whitespace (default: true)
59	n/a	Drop leading and trailing whitespace from lines.
60	n/a	max_lines (default: None)
61	n/a	Truncate wrapped lines.
62	n/a	placeholder (default: ' [...]')
63	n/a	Append to the last line of truncated text.
64	n/a	"""
65	n/a
66	n/a	unicode_whitespace_trans = {}
67	n/a	uspace = ord(' ')
68	n/a	for x in _whitespace:
69	n/a	unicode_whitespace_trans[ord(x)] = uspace
70	n/a
71	n/a	# This funky little regex is just the trick for splitting
72	n/a	# text up into word-wrappable chunks. E.g.
73	n/a	# "Hello there -- you goof-ball, use the -b option!"
74	n/a	# splits into
75	n/a	# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
76	n/a	# (after stripping out empty strings).
77	n/a	word_punct = r'[\w!"\'&.,?]'
78	n/a	letter = r'[^\d\W]'
79	n/a	whitespace = r'[%s]' % re.escape(_whitespace)
80	n/a	nowhitespace = '[^' + whitespace[1:]
81	n/a	wordsep_re = re.compile(r'''
82	n/a	( # any whitespace
83	n/a	%(ws)s+
84	n/a	\| # em-dash between words
85	n/a	(?<=%(wp)s) -{2,} (?=\w)
86	n/a	\| # word, possibly hyphenated
87	n/a	%(nws)s+? (?:
88	n/a	# hyphenated word
89	n/a	-(?: (?<=%(lt)s{2}-) \| (?<=%(lt)s-%(lt)s-))
90	n/a	(?= %(lt)s -? %(lt)s)
91	n/a	\| # end of word
92	n/a	(?=%(ws)s\|\Z)
93	n/a	\| # em-dash
94	n/a	(?<=%(wp)s) (?=-{2,}\w)
95	n/a	)
96	n/a	)''' % {'wp': word_punct, 'lt': letter,
97	n/a	'ws': whitespace, 'nws': nowhitespace},
98	n/a	re.VERBOSE)
99	n/a	del word_punct, letter, nowhitespace
100	n/a
101	n/a	# This less funky little regex just split on recognized spaces. E.g.
102	n/a	# "Hello there -- you goof-ball, use the -b option!"
103	n/a	# splits into
104	n/a	# Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
105	n/a	wordsep_simple_re = re.compile(r'(%s+)' % whitespace)
106	n/a	del whitespace
107	n/a
108	n/a	# XXX this is not locale- or charset-aware -- string.lowercase
109	n/a	# is US-ASCII only (and therefore English-only)
110	n/a	sentence_end_re = re.compile(r'[a-z]' # lowercase letter
111	n/a	r'[\.\!\?]' # sentence-ending punct.
112	n/a	r'[\"\']?' # optional end-of-quote
113	n/a	r'\Z') # end of chunk
114	n/a
115	n/a	def __init__(self,
116	n/a	width=70,
117	n/a	initial_indent="",
118	n/a	subsequent_indent="",
119	n/a	expand_tabs=True,
120	n/a	replace_whitespace=True,
121	n/a	fix_sentence_endings=False,
122	n/a	break_long_words=True,
123	n/a	drop_whitespace=True,
124	n/a	break_on_hyphens=True,
125	n/a	tabsize=8,
126	n/a	*,
127	n/a	max_lines=None,
128	n/a	placeholder=' [...]'):
129	n/a	self.width = width
130	n/a	self.initial_indent = initial_indent
131	n/a	self.subsequent_indent = subsequent_indent
132	n/a	self.expand_tabs = expand_tabs
133	n/a	self.replace_whitespace = replace_whitespace
134	n/a	self.fix_sentence_endings = fix_sentence_endings
135	n/a	self.break_long_words = break_long_words
136	n/a	self.drop_whitespace = drop_whitespace
137	n/a	self.break_on_hyphens = break_on_hyphens
138	n/a	self.tabsize = tabsize
139	n/a	self.max_lines = max_lines
140	n/a	self.placeholder = placeholder
141	n/a
142	n/a
143	n/a	# -- Private methods -----------------------------------------------
144	n/a	# (possibly useful for subclasses to override)
145	n/a
146	n/a	def _munge_whitespace(self, text):
147	n/a	"""_munge_whitespace(text : string) -> string
148	n/a
149	n/a	Munge whitespace in text: expand tabs and convert all other
150	n/a	whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz"
151	n/a	becomes " foo bar baz".
152	n/a	"""
153	n/a	if self.expand_tabs:
154	n/a	text = text.expandtabs(self.tabsize)
155	n/a	if self.replace_whitespace:
156	n/a	text = text.translate(self.unicode_whitespace_trans)
157	n/a	return text
158	n/a
159	n/a
160	n/a	def _split(self, text):
161	n/a	"""_split(text : string) -> [string]
162	n/a
163	n/a	Split the text to wrap into indivisible chunks. Chunks are
164	n/a	not quite the same as words; see _wrap_chunks() for full
165	n/a	details. As an example, the text
166	n/a	Look, goof-ball -- use the -b option!
167	n/a	breaks into the following chunks:
168	n/a	'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
169	n/a	'use', ' ', 'the', ' ', '-b', ' ', 'option!'
170	n/a	if break_on_hyphens is True, or in:
171	n/a	'Look,', ' ', 'goof-ball', ' ', '--', ' ',
172	n/a	'use', ' ', 'the', ' ', '-b', ' ', option!'
173	n/a	otherwise.
174	n/a	"""
175	n/a	if self.break_on_hyphens is True:
176	n/a	chunks = self.wordsep_re.split(text)
177	n/a	else:
178	n/a	chunks = self.wordsep_simple_re.split(text)
179	n/a	chunks = [c for c in chunks if c]
180	n/a	return chunks
181	n/a
182	n/a	def _fix_sentence_endings(self, chunks):
183	n/a	"""_fix_sentence_endings(chunks : [string])
184	n/a
185	n/a	Correct for sentence endings buried in 'chunks'. Eg. when the
186	n/a	original text contains "... foo.\\nBar ...", munge_whitespace()
187	n/a	and split() will convert that to [..., "foo.", " ", "Bar", ...]
188	n/a	which has one too few spaces; this method simply changes the one
189	n/a	space to two.
190	n/a	"""
191	n/a	i = 0
192	n/a	patsearch = self.sentence_end_re.search
193	n/a	while i < len(chunks)-1:
194	n/a	if chunks[i+1] == " " and patsearch(chunks[i]):
195	n/a	chunks[i+1] = " "
196	n/a	i += 2
197	n/a	else:
198	n/a	i += 1
199	n/a
200	n/a	def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
201	n/a	"""_handle_long_word(chunks : [string],
202	n/a	cur_line : [string],
203	n/a	cur_len : int, width : int)
204	n/a
205	n/a	Handle a chunk of text (most likely a word, not whitespace) that
206	n/a	is too long to fit in any line.
207	n/a	"""
208	n/a	# Figure out when indent is larger than the specified width, and make
209	n/a	# sure at least one character is stripped off on every pass
210	n/a	if width < 1:
211	n/a	space_left = 1
212	n/a	else:
213	n/a	space_left = width - cur_len
214	n/a
215	n/a	# If we're allowed to break long words, then do so: put as much
216	n/a	# of the next chunk onto the current line as will fit.
217	n/a	if self.break_long_words:
218	n/a	cur_line.append(reversed_chunks[-1][:space_left])
219	n/a	reversed_chunks[-1] = reversed_chunks[-1][space_left:]
220	n/a
221	n/a	# Otherwise, we have to preserve the long word intact. Only add
222	n/a	# it to the current line if there's nothing already there --
223	n/a	# that minimizes how much we violate the width constraint.
224	n/a	elif not cur_line:
225	n/a	cur_line.append(reversed_chunks.pop())
226	n/a
227	n/a	# If we're not allowed to break long words, and there's already
228	n/a	# text on the current line, do nothing. Next time through the
229	n/a	# main loop of _wrap_chunks(), we'll wind up here again, but
230	n/a	# cur_len will be zero, so the next line will be entirely
231	n/a	# devoted to the long word that we can't handle right now.
232	n/a
233	n/a	def _wrap_chunks(self, chunks):
234	n/a	"""_wrap_chunks(chunks : [string]) -> [string]
235	n/a
236	n/a	Wrap a sequence of text chunks and return a list of lines of
237	n/a	length 'self.width' or less. (If 'break_long_words' is false,
238	n/a	some lines may be longer than this.) Chunks correspond roughly
239	n/a	to words and the whitespace between them: each chunk is
240	n/a	indivisible (modulo 'break_long_words'), but a line break can
241	n/a	come between any two chunks. Chunks should not have internal
242	n/a	whitespace; ie. a chunk is either all whitespace or a "word".
243	n/a	Whitespace chunks will be removed from the beginning and end of
244	n/a	lines, but apart from that whitespace is preserved.
245	n/a	"""
246	n/a	lines = []
247	n/a	if self.width <= 0:
248	n/a	raise ValueError("invalid width %r (must be > 0)" % self.width)
249	n/a	if self.max_lines is not None:
250	n/a	if self.max_lines > 1:
251	n/a	indent = self.subsequent_indent
252	n/a	else:
253	n/a	indent = self.initial_indent
254	n/a	if len(indent) + len(self.placeholder.lstrip()) > self.width:
255	n/a	raise ValueError("placeholder too large for max width")
256	n/a
257	n/a	# Arrange in reverse order so items can be efficiently popped
258	n/a	# from a stack of chucks.
259	n/a	chunks.reverse()
260	n/a
261	n/a	while chunks:
262	n/a
263	n/a	# Start the list of chunks that will make up the current line.
264	n/a	# cur_len is just the length of all the chunks in cur_line.
265	n/a	cur_line = []
266	n/a	cur_len = 0
267	n/a
268	n/a	# Figure out which static string will prefix this line.
269	n/a	if lines:
270	n/a	indent = self.subsequent_indent
271	n/a	else:
272	n/a	indent = self.initial_indent
273	n/a
274	n/a	# Maximum width for this line.
275	n/a	width = self.width - len(indent)
276	n/a
277	n/a	# First chunk on line is whitespace -- drop it, unless this
278	n/a	# is the very beginning of the text (ie. no lines started yet).
279	n/a	if self.drop_whitespace and chunks[-1].strip() == '' and lines:
280	n/a	del chunks[-1]
281	n/a
282	n/a	while chunks:
283	n/a	l = len(chunks[-1])
284	n/a
285	n/a	# Can at least squeeze this chunk onto the current line.
286	n/a	if cur_len + l <= width:
287	n/a	cur_line.append(chunks.pop())
288	n/a	cur_len += l
289	n/a
290	n/a	# Nope, this line is full.
291	n/a	else:
292	n/a	break
293	n/a
294	n/a	# The current line is full, and the next chunk is too big to
295	n/a	# fit on any line (not just this one).
296	n/a	if chunks and len(chunks[-1]) > width:
297	n/a	self._handle_long_word(chunks, cur_line, cur_len, width)
298	n/a	cur_len = sum(map(len, cur_line))
299	n/a
300	n/a	# If the last chunk on this line is all whitespace, drop it.
301	n/a	if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
302	n/a	cur_len -= len(cur_line[-1])
303	n/a	del cur_line[-1]
304	n/a
305	n/a	if cur_line:
306	n/a	if (self.max_lines is None or
307	n/a	len(lines) + 1 < self.max_lines or
308	n/a	(not chunks or
309	n/a	self.drop_whitespace and
310	n/a	len(chunks) == 1 and
311	n/a	not chunks[0].strip()) and cur_len <= width):
312	n/a	# Convert current line back to a string and store it in
313	n/a	# list of all lines (return value).
314	n/a	lines.append(indent + ''.join(cur_line))
315	n/a	else:
316	n/a	while cur_line:
317	n/a	if (cur_line[-1].strip() and
318	n/a	cur_len + len(self.placeholder) <= width):
319	n/a	cur_line.append(self.placeholder)
320	n/a	lines.append(indent + ''.join(cur_line))
321	n/a	break
322	n/a	cur_len -= len(cur_line[-1])
323	n/a	del cur_line[-1]
324	n/a	else:
325	n/a	if lines:
326	n/a	prev_line = lines[-1].rstrip()
327	n/a	if (len(prev_line) + len(self.placeholder) <=
328	n/a	self.width):
329	n/a	lines[-1] = prev_line + self.placeholder
330	n/a	break
331	n/a	lines.append(indent + self.placeholder.lstrip())
332	n/a	break
333	n/a
334	n/a	return lines
335	n/a
336	n/a	def _split_chunks(self, text):
337	n/a	text = self._munge_whitespace(text)
338	n/a	return self._split(text)
339	n/a
340	n/a	# -- Public interface ----------------------------------------------
341	n/a
342	n/a	def wrap(self, text):
343	n/a	"""wrap(text : string) -> [string]
344	n/a
345	n/a	Reformat the single paragraph in 'text' so it fits in lines of
346	n/a	no more than 'self.width' columns, and return a list of wrapped
347	n/a	lines. Tabs in 'text' are expanded with string.expandtabs(),
348	n/a	and all other whitespace characters (including newline) are
349	n/a	converted to space.
350	n/a	"""
351	n/a	chunks = self._split_chunks(text)
352	n/a	if self.fix_sentence_endings:
353	n/a	self._fix_sentence_endings(chunks)
354	n/a	return self._wrap_chunks(chunks)
355	n/a
356	n/a	def fill(self, text):
357	n/a	"""fill(text : string) -> string
358	n/a
359	n/a	Reformat the single paragraph in 'text' to fit in lines of no
360	n/a	more than 'self.width' columns, and return a new string
361	n/a	containing the entire wrapped paragraph.
362	n/a	"""
363	n/a	return "\n".join(self.wrap(text))
364	n/a
365	n/a
366	n/a	# -- Convenience interface ---------------------------------------------
367	n/a
368	n/a	def wrap(text, width=70, **kwargs):
369	n/a	"""Wrap a single paragraph of text, returning a list of wrapped lines.
370	n/a
371	n/a	Reformat the single paragraph in 'text' so it fits in lines of no
372	n/a	more than 'width' columns, and return a list of wrapped lines. By
373	n/a	default, tabs in 'text' are expanded with string.expandtabs(), and
374	n/a	all other whitespace characters (including newline) are converted to
375	n/a	space. See TextWrapper class for available keyword args to customize
376	n/a	wrapping behaviour.
377	n/a	"""
378	n/a	w = TextWrapper(width=width, **kwargs)
379	n/a	return w.wrap(text)
380	n/a
381	n/a	def fill(text, width=70, **kwargs):
382	n/a	"""Fill a single paragraph of text, returning a new string.
383	n/a
384	n/a	Reformat the single paragraph in 'text' to fit in lines of no more
385	n/a	than 'width' columns, and return a new string containing the entire
386	n/a	wrapped paragraph. As with wrap(), tabs are expanded and other
387	n/a	whitespace characters converted to space. See TextWrapper class for
388	n/a	available keyword args to customize wrapping behaviour.
389	n/a	"""
390	n/a	w = TextWrapper(width=width, **kwargs)
391	n/a	return w.fill(text)
392	n/a
393	n/a	def shorten(text, width, **kwargs):
394	n/a	"""Collapse and truncate the given text to fit in the given width.
395	n/a
396	n/a	The text first has its whitespace collapsed. If it then fits in
397	n/a	the width, it is returned as is. Otherwise, as many words
398	n/a	as possible are joined and then the placeholder is appended::
399	n/a
400	n/a	>>> textwrap.shorten("Hello world!", width=12)
401	n/a	'Hello world!'
402	n/a	>>> textwrap.shorten("Hello world!", width=11)
403	n/a	'Hello [...]'
404	n/a	"""
405	n/a	w = TextWrapper(width=width, max_lines=1, **kwargs)
406	n/a	return w.fill(' '.join(text.strip().split()))
407	n/a
408	n/a
409	n/a	# -- Loosely related functionality -------------------------------------
410	n/a
411	n/a	_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
412	n/a	_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
413	n/a
414	n/a	def dedent(text):
415	n/a	"""Remove any common leading whitespace from every line in `text`.
416	n/a
417	n/a	This can be used to make triple-quoted strings line up with the left
418	n/a	edge of the display, while still presenting them in the source code
419	n/a	in indented form.
420	n/a
421	n/a	Note that tabs and spaces are both treated as whitespace, but they
422	n/a	are not equal: the lines " hello" and "\\thello" are
423	n/a	considered to have no common leading whitespace. (This behaviour is
424	n/a	new in Python 2.5; older versions of this module incorrectly
425	n/a	expanded tabs before searching for common leading whitespace.)
426	n/a	"""
427	n/a	# Look for the longest leading string of spaces and tabs common to
428	n/a	# all lines.
429	n/a	margin = None
430	n/a	text = _whitespace_only_re.sub('', text)
431	n/a	indents = _leading_whitespace_re.findall(text)
432	n/a	for indent in indents:
433	n/a	if margin is None:
434	n/a	margin = indent
435	n/a
436	n/a	# Current line more deeply indented than previous winner:
437	n/a	# no change (previous winner is still on top).
438	n/a	elif indent.startswith(margin):
439	n/a	pass
440	n/a
441	n/a	# Current line consistent with and no deeper than previous winner:
442	n/a	# it's the new winner.
443	n/a	elif margin.startswith(indent):
444	n/a	margin = indent
445	n/a
446	n/a	# Find the largest common whitespace between current line and previous
447	n/a	# winner.
448	n/a	else:
449	n/a	for i, (x, y) in enumerate(zip(margin, indent)):
450	n/a	if x != y:
451	n/a	margin = margin[:i]
452	n/a	break
453	n/a	else:
454	n/a	margin = margin[:len(indent)]
455	n/a
456	n/a	# sanity check (testing/debugging only)
457	n/a	if 0 and margin:
458	n/a	for line in text.split("\n"):
459	n/a	assert not line or line.startswith(margin), \
460	n/a	"line = %r, margin = %r" % (line, margin)
461	n/a
462	n/a	if margin:
463	n/a	text = re.sub(r'(?m)^' + margin, '', text)
464	n/a	return text
465	n/a
466	n/a
467	n/a	def indent(text, prefix, predicate=None):
468	n/a	"""Adds 'prefix' to the beginning of selected lines in 'text'.
469	n/a
470	n/a	If 'predicate' is provided, 'prefix' will only be added to the lines
471	n/a	where 'predicate(line)' is True. If 'predicate' is not provided,
472	n/a	it will default to adding 'prefix' to all non-empty lines that do not
473	n/a	consist solely of whitespace characters.
474	n/a	"""
475	n/a	if predicate is None:
476	n/a	def predicate(line):
477	n/a	return line.strip()
478	n/a
479	n/a	def prefixed_lines():
480	n/a	for line in text.splitlines(True):
481	n/a	yield (prefix + line if predicate(line) else line)
482	n/a	return ''.join(prefixed_lines())
483	n/a
484	n/a
485	n/a	if __name__ == "__main__":
486	n/a	#print dedent("\tfoo\n\tbar")
487	n/a	#print dedent(" \thello there\n \t how are you?")
488	n/a	print(dedent("Hello there.\n This is indented."))