ยปCore Development>Code coverage>Lib/textwrap.py

Python code coverage for Lib/textwrap.py

#countcontent
1n/a"""Text wrapping and filling.
2n/a"""
3n/a
4n/a# Copyright (C) 1999-2001 Gregory P. Ward.
5n/a# Copyright (C) 2002, 2003 Python Software Foundation.
6n/a# Written by Greg Ward <gward@python.net>
7n/a
8n/aimport re
9n/a
10n/a__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten']
11n/a
12n/a# Hardcode the recognized whitespace characters to the US-ASCII
13n/a# whitespace characters. The main reason for doing this is that
14n/a# some Unicode spaces (like \u00a0) are non-breaking whitespaces.
15n/a_whitespace = '\t\n\x0b\x0c\r '
16n/a
17n/aclass TextWrapper:
18n/a """
19n/a Object for wrapping/filling text. The public interface consists of
20n/a the wrap() and fill() methods; the other methods are just there for
21n/a subclasses to override in order to tweak the default behaviour.
22n/a If you want to completely replace the main wrapping algorithm,
23n/a you'll probably have to override _wrap_chunks().
24n/a
25n/a Several instance attributes control various aspects of wrapping:
26n/a width (default: 70)
27n/a the maximum width of wrapped lines (unless break_long_words
28n/a is false)
29n/a initial_indent (default: "")
30n/a string that will be prepended to the first line of wrapped
31n/a output. Counts towards the line's width.
32n/a subsequent_indent (default: "")
33n/a string that will be prepended to all lines save the first
34n/a of wrapped output; also counts towards each line's width.
35n/a expand_tabs (default: true)
36n/a Expand tabs in input text to spaces before further processing.
37n/a Each tab will become 0 .. 'tabsize' spaces, depending on its position
38n/a in its line. If false, each tab is treated as a single character.
39n/a tabsize (default: 8)
40n/a Expand tabs in input text to 0 .. 'tabsize' spaces, unless
41n/a 'expand_tabs' is false.
42n/a replace_whitespace (default: true)
43n/a Replace all whitespace characters in the input text by spaces
44n/a after tab expansion. Note that if expand_tabs is false and
45n/a replace_whitespace is true, every tab will be converted to a
46n/a single space!
47n/a fix_sentence_endings (default: false)
48n/a Ensure that sentence-ending punctuation is always followed
49n/a by two spaces. Off by default because the algorithm is
50n/a (unavoidably) imperfect.
51n/a break_long_words (default: true)
52n/a Break words longer than 'width'. If false, those words will not
53n/a be broken, and some lines might be longer than 'width'.
54n/a break_on_hyphens (default: true)
55n/a Allow breaking hyphenated words. If true, wrapping will occur
56n/a preferably on whitespaces and right after hyphens part of
57n/a compound words.
58n/a drop_whitespace (default: true)
59n/a Drop leading and trailing whitespace from lines.
60n/a max_lines (default: None)
61n/a Truncate wrapped lines.
62n/a placeholder (default: ' [...]')
63n/a Append to the last line of truncated text.
64n/a """
65n/a
66n/a unicode_whitespace_trans = {}
67n/a uspace = ord(' ')
68n/a for x in _whitespace:
69n/a unicode_whitespace_trans[ord(x)] = uspace
70n/a
71n/a # This funky little regex is just the trick for splitting
72n/a # text up into word-wrappable chunks. E.g.
73n/a # "Hello there -- you goof-ball, use the -b option!"
74n/a # splits into
75n/a # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
76n/a # (after stripping out empty strings).
77n/a word_punct = r'[\w!"\'&.,?]'
78n/a letter = r'[^\d\W]'
79n/a whitespace = r'[%s]' % re.escape(_whitespace)
80n/a nowhitespace = '[^' + whitespace[1:]
81n/a wordsep_re = re.compile(r'''
82n/a ( # any whitespace
83n/a %(ws)s+
84n/a | # em-dash between words
85n/a (?<=%(wp)s) -{2,} (?=\w)
86n/a | # word, possibly hyphenated
87n/a %(nws)s+? (?:
88n/a # hyphenated word
89n/a -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
90n/a (?= %(lt)s -? %(lt)s)
91n/a | # end of word
92n/a (?=%(ws)s|\Z)
93n/a | # em-dash
94n/a (?<=%(wp)s) (?=-{2,}\w)
95n/a )
96n/a )''' % {'wp': word_punct, 'lt': letter,
97n/a 'ws': whitespace, 'nws': nowhitespace},
98n/a re.VERBOSE)
99n/a del word_punct, letter, nowhitespace
100n/a
101n/a # This less funky little regex just split on recognized spaces. E.g.
102n/a # "Hello there -- you goof-ball, use the -b option!"
103n/a # splits into
104n/a # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
105n/a wordsep_simple_re = re.compile(r'(%s+)' % whitespace)
106n/a del whitespace
107n/a
108n/a # XXX this is not locale- or charset-aware -- string.lowercase
109n/a # is US-ASCII only (and therefore English-only)
110n/a sentence_end_re = re.compile(r'[a-z]' # lowercase letter
111n/a r'[\.\!\?]' # sentence-ending punct.
112n/a r'[\"\']?' # optional end-of-quote
113n/a r'\Z') # end of chunk
114n/a
115n/a def __init__(self,
116n/a width=70,
117n/a initial_indent="",
118n/a subsequent_indent="",
119n/a expand_tabs=True,
120n/a replace_whitespace=True,
121n/a fix_sentence_endings=False,
122n/a break_long_words=True,
123n/a drop_whitespace=True,
124n/a break_on_hyphens=True,
125n/a tabsize=8,
126n/a *,
127n/a max_lines=None,
128n/a placeholder=' [...]'):
129n/a self.width = width
130n/a self.initial_indent = initial_indent
131n/a self.subsequent_indent = subsequent_indent
132n/a self.expand_tabs = expand_tabs
133n/a self.replace_whitespace = replace_whitespace
134n/a self.fix_sentence_endings = fix_sentence_endings
135n/a self.break_long_words = break_long_words
136n/a self.drop_whitespace = drop_whitespace
137n/a self.break_on_hyphens = break_on_hyphens
138n/a self.tabsize = tabsize
139n/a self.max_lines = max_lines
140n/a self.placeholder = placeholder
141n/a
142n/a
143n/a # -- Private methods -----------------------------------------------
144n/a # (possibly useful for subclasses to override)
145n/a
146n/a def _munge_whitespace(self, text):
147n/a """_munge_whitespace(text : string) -> string
148n/a
149n/a Munge whitespace in text: expand tabs and convert all other
150n/a whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz"
151n/a becomes " foo bar baz".
152n/a """
153n/a if self.expand_tabs:
154n/a text = text.expandtabs(self.tabsize)
155n/a if self.replace_whitespace:
156n/a text = text.translate(self.unicode_whitespace_trans)
157n/a return text
158n/a
159n/a
160n/a def _split(self, text):
161n/a """_split(text : string) -> [string]
162n/a
163n/a Split the text to wrap into indivisible chunks. Chunks are
164n/a not quite the same as words; see _wrap_chunks() for full
165n/a details. As an example, the text
166n/a Look, goof-ball -- use the -b option!
167n/a breaks into the following chunks:
168n/a 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
169n/a 'use', ' ', 'the', ' ', '-b', ' ', 'option!'
170n/a if break_on_hyphens is True, or in:
171n/a 'Look,', ' ', 'goof-ball', ' ', '--', ' ',
172n/a 'use', ' ', 'the', ' ', '-b', ' ', option!'
173n/a otherwise.
174n/a """
175n/a if self.break_on_hyphens is True:
176n/a chunks = self.wordsep_re.split(text)
177n/a else:
178n/a chunks = self.wordsep_simple_re.split(text)
179n/a chunks = [c for c in chunks if c]
180n/a return chunks
181n/a
182n/a def _fix_sentence_endings(self, chunks):
183n/a """_fix_sentence_endings(chunks : [string])
184n/a
185n/a Correct for sentence endings buried in 'chunks'. Eg. when the
186n/a original text contains "... foo.\\nBar ...", munge_whitespace()
187n/a and split() will convert that to [..., "foo.", " ", "Bar", ...]
188n/a which has one too few spaces; this method simply changes the one
189n/a space to two.
190n/a """
191n/a i = 0
192n/a patsearch = self.sentence_end_re.search
193n/a while i < len(chunks)-1:
194n/a if chunks[i+1] == " " and patsearch(chunks[i]):
195n/a chunks[i+1] = " "
196n/a i += 2
197n/a else:
198n/a i += 1
199n/a
200n/a def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
201n/a """_handle_long_word(chunks : [string],
202n/a cur_line : [string],
203n/a cur_len : int, width : int)
204n/a
205n/a Handle a chunk of text (most likely a word, not whitespace) that
206n/a is too long to fit in any line.
207n/a """
208n/a # Figure out when indent is larger than the specified width, and make
209n/a # sure at least one character is stripped off on every pass
210n/a if width < 1:
211n/a space_left = 1
212n/a else:
213n/a space_left = width - cur_len
214n/a
215n/a # If we're allowed to break long words, then do so: put as much
216n/a # of the next chunk onto the current line as will fit.
217n/a if self.break_long_words:
218n/a cur_line.append(reversed_chunks[-1][:space_left])
219n/a reversed_chunks[-1] = reversed_chunks[-1][space_left:]
220n/a
221n/a # Otherwise, we have to preserve the long word intact. Only add
222n/a # it to the current line if there's nothing already there --
223n/a # that minimizes how much we violate the width constraint.
224n/a elif not cur_line:
225n/a cur_line.append(reversed_chunks.pop())
226n/a
227n/a # If we're not allowed to break long words, and there's already
228n/a # text on the current line, do nothing. Next time through the
229n/a # main loop of _wrap_chunks(), we'll wind up here again, but
230n/a # cur_len will be zero, so the next line will be entirely
231n/a # devoted to the long word that we can't handle right now.
232n/a
233n/a def _wrap_chunks(self, chunks):
234n/a """_wrap_chunks(chunks : [string]) -> [string]
235n/a
236n/a Wrap a sequence of text chunks and return a list of lines of
237n/a length 'self.width' or less. (If 'break_long_words' is false,
238n/a some lines may be longer than this.) Chunks correspond roughly
239n/a to words and the whitespace between them: each chunk is
240n/a indivisible (modulo 'break_long_words'), but a line break can
241n/a come between any two chunks. Chunks should not have internal
242n/a whitespace; ie. a chunk is either all whitespace or a "word".
243n/a Whitespace chunks will be removed from the beginning and end of
244n/a lines, but apart from that whitespace is preserved.
245n/a """
246n/a lines = []
247n/a if self.width <= 0:
248n/a raise ValueError("invalid width %r (must be > 0)" % self.width)
249n/a if self.max_lines is not None:
250n/a if self.max_lines > 1:
251n/a indent = self.subsequent_indent
252n/a else:
253n/a indent = self.initial_indent
254n/a if len(indent) + len(self.placeholder.lstrip()) > self.width:
255n/a raise ValueError("placeholder too large for max width")
256n/a
257n/a # Arrange in reverse order so items can be efficiently popped
258n/a # from a stack of chucks.
259n/a chunks.reverse()
260n/a
261n/a while chunks:
262n/a
263n/a # Start the list of chunks that will make up the current line.
264n/a # cur_len is just the length of all the chunks in cur_line.
265n/a cur_line = []
266n/a cur_len = 0
267n/a
268n/a # Figure out which static string will prefix this line.
269n/a if lines:
270n/a indent = self.subsequent_indent
271n/a else:
272n/a indent = self.initial_indent
273n/a
274n/a # Maximum width for this line.
275n/a width = self.width - len(indent)
276n/a
277n/a # First chunk on line is whitespace -- drop it, unless this
278n/a # is the very beginning of the text (ie. no lines started yet).
279n/a if self.drop_whitespace and chunks[-1].strip() == '' and lines:
280n/a del chunks[-1]
281n/a
282n/a while chunks:
283n/a l = len(chunks[-1])
284n/a
285n/a # Can at least squeeze this chunk onto the current line.
286n/a if cur_len + l <= width:
287n/a cur_line.append(chunks.pop())
288n/a cur_len += l
289n/a
290n/a # Nope, this line is full.
291n/a else:
292n/a break
293n/a
294n/a # The current line is full, and the next chunk is too big to
295n/a # fit on *any* line (not just this one).
296n/a if chunks and len(chunks[-1]) > width:
297n/a self._handle_long_word(chunks, cur_line, cur_len, width)
298n/a cur_len = sum(map(len, cur_line))
299n/a
300n/a # If the last chunk on this line is all whitespace, drop it.
301n/a if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
302n/a cur_len -= len(cur_line[-1])
303n/a del cur_line[-1]
304n/a
305n/a if cur_line:
306n/a if (self.max_lines is None or
307n/a len(lines) + 1 < self.max_lines or
308n/a (not chunks or
309n/a self.drop_whitespace and
310n/a len(chunks) == 1 and
311n/a not chunks[0].strip()) and cur_len <= width):
312n/a # Convert current line back to a string and store it in
313n/a # list of all lines (return value).
314n/a lines.append(indent + ''.join(cur_line))
315n/a else:
316n/a while cur_line:
317n/a if (cur_line[-1].strip() and
318n/a cur_len + len(self.placeholder) <= width):
319n/a cur_line.append(self.placeholder)
320n/a lines.append(indent + ''.join(cur_line))
321n/a break
322n/a cur_len -= len(cur_line[-1])
323n/a del cur_line[-1]
324n/a else:
325n/a if lines:
326n/a prev_line = lines[-1].rstrip()
327n/a if (len(prev_line) + len(self.placeholder) <=
328n/a self.width):
329n/a lines[-1] = prev_line + self.placeholder
330n/a break
331n/a lines.append(indent + self.placeholder.lstrip())
332n/a break
333n/a
334n/a return lines
335n/a
336n/a def _split_chunks(self, text):
337n/a text = self._munge_whitespace(text)
338n/a return self._split(text)
339n/a
340n/a # -- Public interface ----------------------------------------------
341n/a
342n/a def wrap(self, text):
343n/a """wrap(text : string) -> [string]
344n/a
345n/a Reformat the single paragraph in 'text' so it fits in lines of
346n/a no more than 'self.width' columns, and return a list of wrapped
347n/a lines. Tabs in 'text' are expanded with string.expandtabs(),
348n/a and all other whitespace characters (including newline) are
349n/a converted to space.
350n/a """
351n/a chunks = self._split_chunks(text)
352n/a if self.fix_sentence_endings:
353n/a self._fix_sentence_endings(chunks)
354n/a return self._wrap_chunks(chunks)
355n/a
356n/a def fill(self, text):
357n/a """fill(text : string) -> string
358n/a
359n/a Reformat the single paragraph in 'text' to fit in lines of no
360n/a more than 'self.width' columns, and return a new string
361n/a containing the entire wrapped paragraph.
362n/a """
363n/a return "\n".join(self.wrap(text))
364n/a
365n/a
366n/a# -- Convenience interface ---------------------------------------------
367n/a
368n/adef wrap(text, width=70, **kwargs):
369n/a """Wrap a single paragraph of text, returning a list of wrapped lines.
370n/a
371n/a Reformat the single paragraph in 'text' so it fits in lines of no
372n/a more than 'width' columns, and return a list of wrapped lines. By
373n/a default, tabs in 'text' are expanded with string.expandtabs(), and
374n/a all other whitespace characters (including newline) are converted to
375n/a space. See TextWrapper class for available keyword args to customize
376n/a wrapping behaviour.
377n/a """
378n/a w = TextWrapper(width=width, **kwargs)
379n/a return w.wrap(text)
380n/a
381n/adef fill(text, width=70, **kwargs):
382n/a """Fill a single paragraph of text, returning a new string.
383n/a
384n/a Reformat the single paragraph in 'text' to fit in lines of no more
385n/a than 'width' columns, and return a new string containing the entire
386n/a wrapped paragraph. As with wrap(), tabs are expanded and other
387n/a whitespace characters converted to space. See TextWrapper class for
388n/a available keyword args to customize wrapping behaviour.
389n/a """
390n/a w = TextWrapper(width=width, **kwargs)
391n/a return w.fill(text)
392n/a
393n/adef shorten(text, width, **kwargs):
394n/a """Collapse and truncate the given text to fit in the given width.
395n/a
396n/a The text first has its whitespace collapsed. If it then fits in
397n/a the *width*, it is returned as is. Otherwise, as many words
398n/a as possible are joined and then the placeholder is appended::
399n/a
400n/a >>> textwrap.shorten("Hello world!", width=12)
401n/a 'Hello world!'
402n/a >>> textwrap.shorten("Hello world!", width=11)
403n/a 'Hello [...]'
404n/a """
405n/a w = TextWrapper(width=width, max_lines=1, **kwargs)
406n/a return w.fill(' '.join(text.strip().split()))
407n/a
408n/a
409n/a# -- Loosely related functionality -------------------------------------
410n/a
411n/a_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
412n/a_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
413n/a
414n/adef dedent(text):
415n/a """Remove any common leading whitespace from every line in `text`.
416n/a
417n/a This can be used to make triple-quoted strings line up with the left
418n/a edge of the display, while still presenting them in the source code
419n/a in indented form.
420n/a
421n/a Note that tabs and spaces are both treated as whitespace, but they
422n/a are not equal: the lines " hello" and "\\thello" are
423n/a considered to have no common leading whitespace. (This behaviour is
424n/a new in Python 2.5; older versions of this module incorrectly
425n/a expanded tabs before searching for common leading whitespace.)
426n/a """
427n/a # Look for the longest leading string of spaces and tabs common to
428n/a # all lines.
429n/a margin = None
430n/a text = _whitespace_only_re.sub('', text)
431n/a indents = _leading_whitespace_re.findall(text)
432n/a for indent in indents:
433n/a if margin is None:
434n/a margin = indent
435n/a
436n/a # Current line more deeply indented than previous winner:
437n/a # no change (previous winner is still on top).
438n/a elif indent.startswith(margin):
439n/a pass
440n/a
441n/a # Current line consistent with and no deeper than previous winner:
442n/a # it's the new winner.
443n/a elif margin.startswith(indent):
444n/a margin = indent
445n/a
446n/a # Find the largest common whitespace between current line and previous
447n/a # winner.
448n/a else:
449n/a for i, (x, y) in enumerate(zip(margin, indent)):
450n/a if x != y:
451n/a margin = margin[:i]
452n/a break
453n/a else:
454n/a margin = margin[:len(indent)]
455n/a
456n/a # sanity check (testing/debugging only)
457n/a if 0 and margin:
458n/a for line in text.split("\n"):
459n/a assert not line or line.startswith(margin), \
460n/a "line = %r, margin = %r" % (line, margin)
461n/a
462n/a if margin:
463n/a text = re.sub(r'(?m)^' + margin, '', text)
464n/a return text
465n/a
466n/a
467n/adef indent(text, prefix, predicate=None):
468n/a """Adds 'prefix' to the beginning of selected lines in 'text'.
469n/a
470n/a If 'predicate' is provided, 'prefix' will only be added to the lines
471n/a where 'predicate(line)' is True. If 'predicate' is not provided,
472n/a it will default to adding 'prefix' to all non-empty lines that do not
473n/a consist solely of whitespace characters.
474n/a """
475n/a if predicate is None:
476n/a def predicate(line):
477n/a return line.strip()
478n/a
479n/a def prefixed_lines():
480n/a for line in text.splitlines(True):
481n/a yield (prefix + line if predicate(line) else line)
482n/a return ''.join(prefixed_lines())
483n/a
484n/a
485n/aif __name__ == "__main__":
486n/a #print dedent("\tfoo\n\tbar")
487n/a #print dedent(" \thello there\n \t how are you?")
488n/a print(dedent("Hello there.\n This is indented."))