ยปCore Development>Code coverage>Lib/distutils/text_file.py

Python code coverage for Lib/distutils/text_file.py

#countcontent
1n/a"""text_file
2n/a
3n/aprovides the TextFile class, which gives an interface to text files
4n/athat (optionally) takes care of stripping comments, ignoring blank
5n/alines, and joining lines with backslashes."""
6n/a
7n/aimport sys, io
8n/a
9n/a
10n/aclass TextFile:
11n/a """Provides a file-like object that takes care of all the things you
12n/a commonly want to do when processing a text file that has some
13n/a line-by-line syntax: strip comments (as long as "#" is your
14n/a comment character), skip blank lines, join adjacent lines by
15n/a escaping the newline (ie. backslash at end of line), strip
16n/a leading and/or trailing whitespace. All of these are optional
17n/a and independently controllable.
18n/a
19n/a Provides a 'warn()' method so you can generate warning messages that
20n/a report physical line number, even if the logical line in question
21n/a spans multiple physical lines. Also provides 'unreadline()' for
22n/a implementing line-at-a-time lookahead.
23n/a
24n/a Constructor is called as:
25n/a
26n/a TextFile (filename=None, file=None, **options)
27n/a
28n/a It bombs (RuntimeError) if both 'filename' and 'file' are None;
29n/a 'filename' should be a string, and 'file' a file object (or
30n/a something that provides 'readline()' and 'close()' methods). It is
31n/a recommended that you supply at least 'filename', so that TextFile
32n/a can include it in warning messages. If 'file' is not supplied,
33n/a TextFile creates its own using 'io.open()'.
34n/a
35n/a The options are all boolean, and affect the value returned by
36n/a 'readline()':
37n/a strip_comments [default: true]
38n/a strip from "#" to end-of-line, as well as any whitespace
39n/a leading up to the "#" -- unless it is escaped by a backslash
40n/a lstrip_ws [default: false]
41n/a strip leading whitespace from each line before returning it
42n/a rstrip_ws [default: true]
43n/a strip trailing whitespace (including line terminator!) from
44n/a each line before returning it
45n/a skip_blanks [default: true}
46n/a skip lines that are empty *after* stripping comments and
47n/a whitespace. (If both lstrip_ws and rstrip_ws are false,
48n/a then some lines may consist of solely whitespace: these will
49n/a *not* be skipped, even if 'skip_blanks' is true.)
50n/a join_lines [default: false]
51n/a if a backslash is the last non-newline character on a line
52n/a after stripping comments and whitespace, join the following line
53n/a to it to form one "logical line"; if N consecutive lines end
54n/a with a backslash, then N+1 physical lines will be joined to
55n/a form one logical line.
56n/a collapse_join [default: false]
57n/a strip leading whitespace from lines that are joined to their
58n/a predecessor; only matters if (join_lines and not lstrip_ws)
59n/a errors [default: 'strict']
60n/a error handler used to decode the file content
61n/a
62n/a Note that since 'rstrip_ws' can strip the trailing newline, the
63n/a semantics of 'readline()' must differ from those of the builtin file
64n/a object's 'readline()' method! In particular, 'readline()' returns
65n/a None for end-of-file: an empty string might just be a blank line (or
66n/a an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is
67n/a not."""
68n/a
69n/a default_options = { 'strip_comments': 1,
70n/a 'skip_blanks': 1,
71n/a 'lstrip_ws': 0,
72n/a 'rstrip_ws': 1,
73n/a 'join_lines': 0,
74n/a 'collapse_join': 0,
75n/a 'errors': 'strict',
76n/a }
77n/a
78n/a def __init__(self, filename=None, file=None, **options):
79n/a """Construct a new TextFile object. At least one of 'filename'
80n/a (a string) and 'file' (a file-like object) must be supplied.
81n/a They keyword argument options are described above and affect
82n/a the values returned by 'readline()'."""
83n/a if filename is None and file is None:
84n/a raise RuntimeError("you must supply either or both of 'filename' and 'file'")
85n/a
86n/a # set values for all options -- either from client option hash
87n/a # or fallback to default_options
88n/a for opt in self.default_options.keys():
89n/a if opt in options:
90n/a setattr(self, opt, options[opt])
91n/a else:
92n/a setattr(self, opt, self.default_options[opt])
93n/a
94n/a # sanity check client option hash
95n/a for opt in options.keys():
96n/a if opt not in self.default_options:
97n/a raise KeyError("invalid TextFile option '%s'" % opt)
98n/a
99n/a if file is None:
100n/a self.open(filename)
101n/a else:
102n/a self.filename = filename
103n/a self.file = file
104n/a self.current_line = 0 # assuming that file is at BOF!
105n/a
106n/a # 'linebuf' is a stack of lines that will be emptied before we
107n/a # actually read from the file; it's only populated by an
108n/a # 'unreadline()' operation
109n/a self.linebuf = []
110n/a
111n/a def open(self, filename):
112n/a """Open a new file named 'filename'. This overrides both the
113n/a 'filename' and 'file' arguments to the constructor."""
114n/a self.filename = filename
115n/a self.file = io.open(self.filename, 'r', errors=self.errors)
116n/a self.current_line = 0
117n/a
118n/a def close(self):
119n/a """Close the current file and forget everything we know about it
120n/a (filename, current line number)."""
121n/a file = self.file
122n/a self.file = None
123n/a self.filename = None
124n/a self.current_line = None
125n/a file.close()
126n/a
127n/a def gen_error(self, msg, line=None):
128n/a outmsg = []
129n/a if line is None:
130n/a line = self.current_line
131n/a outmsg.append(self.filename + ", ")
132n/a if isinstance(line, (list, tuple)):
133n/a outmsg.append("lines %d-%d: " % tuple(line))
134n/a else:
135n/a outmsg.append("line %d: " % line)
136n/a outmsg.append(str(msg))
137n/a return "".join(outmsg)
138n/a
139n/a def error(self, msg, line=None):
140n/a raise ValueError("error: " + self.gen_error(msg, line))
141n/a
142n/a def warn(self, msg, line=None):
143n/a """Print (to stderr) a warning message tied to the current logical
144n/a line in the current file. If the current logical line in the
145n/a file spans multiple physical lines, the warning refers to the
146n/a whole range, eg. "lines 3-5". If 'line' supplied, it overrides
147n/a the current line number; it may be a list or tuple to indicate a
148n/a range of physical lines, or an integer for a single physical
149n/a line."""
150n/a sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n")
151n/a
152n/a def readline(self):
153n/a """Read and return a single logical line from the current file (or
154n/a from an internal buffer if lines have previously been "unread"
155n/a with 'unreadline()'). If the 'join_lines' option is true, this
156n/a may involve reading multiple physical lines concatenated into a
157n/a single string. Updates the current line number, so calling
158n/a 'warn()' after 'readline()' emits a warning about the physical
159n/a line(s) just read. Returns None on end-of-file, since the empty
160n/a string can occur if 'rstrip_ws' is true but 'strip_blanks' is
161n/a not."""
162n/a # If any "unread" lines waiting in 'linebuf', return the top
163n/a # one. (We don't actually buffer read-ahead data -- lines only
164n/a # get put in 'linebuf' if the client explicitly does an
165n/a # 'unreadline()'.
166n/a if self.linebuf:
167n/a line = self.linebuf[-1]
168n/a del self.linebuf[-1]
169n/a return line
170n/a
171n/a buildup_line = ''
172n/a
173n/a while True:
174n/a # read the line, make it None if EOF
175n/a line = self.file.readline()
176n/a if line == '':
177n/a line = None
178n/a
179n/a if self.strip_comments and line:
180n/a
181n/a # Look for the first "#" in the line. If none, never
182n/a # mind. If we find one and it's the first character, or
183n/a # is not preceded by "\", then it starts a comment --
184n/a # strip the comment, strip whitespace before it, and
185n/a # carry on. Otherwise, it's just an escaped "#", so
186n/a # unescape it (and any other escaped "#"'s that might be
187n/a # lurking in there) and otherwise leave the line alone.
188n/a
189n/a pos = line.find("#")
190n/a if pos == -1: # no "#" -- no comments
191n/a pass
192n/a
193n/a # It's definitely a comment -- either "#" is the first
194n/a # character, or it's elsewhere and unescaped.
195n/a elif pos == 0 or line[pos-1] != "\\":
196n/a # Have to preserve the trailing newline, because it's
197n/a # the job of a later step (rstrip_ws) to remove it --
198n/a # and if rstrip_ws is false, we'd better preserve it!
199n/a # (NB. this means that if the final line is all comment
200n/a # and has no trailing newline, we will think that it's
201n/a # EOF; I think that's OK.)
202n/a eol = (line[-1] == '\n') and '\n' or ''
203n/a line = line[0:pos] + eol
204n/a
205n/a # If all that's left is whitespace, then skip line
206n/a # *now*, before we try to join it to 'buildup_line' --
207n/a # that way constructs like
208n/a # hello \\
209n/a # # comment that should be ignored
210n/a # there
211n/a # result in "hello there".
212n/a if line.strip() == "":
213n/a continue
214n/a else: # it's an escaped "#"
215n/a line = line.replace("\\#", "#")
216n/a
217n/a # did previous line end with a backslash? then accumulate
218n/a if self.join_lines and buildup_line:
219n/a # oops: end of file
220n/a if line is None:
221n/a self.warn("continuation line immediately precedes "
222n/a "end-of-file")
223n/a return buildup_line
224n/a
225n/a if self.collapse_join:
226n/a line = line.lstrip()
227n/a line = buildup_line + line
228n/a
229n/a # careful: pay attention to line number when incrementing it
230n/a if isinstance(self.current_line, list):
231n/a self.current_line[1] = self.current_line[1] + 1
232n/a else:
233n/a self.current_line = [self.current_line,
234n/a self.current_line + 1]
235n/a # just an ordinary line, read it as usual
236n/a else:
237n/a if line is None: # eof
238n/a return None
239n/a
240n/a # still have to be careful about incrementing the line number!
241n/a if isinstance(self.current_line, list):
242n/a self.current_line = self.current_line[1] + 1
243n/a else:
244n/a self.current_line = self.current_line + 1
245n/a
246n/a # strip whitespace however the client wants (leading and
247n/a # trailing, or one or the other, or neither)
248n/a if self.lstrip_ws and self.rstrip_ws:
249n/a line = line.strip()
250n/a elif self.lstrip_ws:
251n/a line = line.lstrip()
252n/a elif self.rstrip_ws:
253n/a line = line.rstrip()
254n/a
255n/a # blank line (whether we rstrip'ed or not)? skip to next line
256n/a # if appropriate
257n/a if (line == '' or line == '\n') and self.skip_blanks:
258n/a continue
259n/a
260n/a if self.join_lines:
261n/a if line[-1] == '\\':
262n/a buildup_line = line[:-1]
263n/a continue
264n/a
265n/a if line[-2:] == '\\\n':
266n/a buildup_line = line[0:-2] + '\n'
267n/a continue
268n/a
269n/a # well, I guess there's some actual content there: return it
270n/a return line
271n/a
272n/a def readlines(self):
273n/a """Read and return the list of all logical lines remaining in the
274n/a current file."""
275n/a lines = []
276n/a while True:
277n/a line = self.readline()
278n/a if line is None:
279n/a return lines
280n/a lines.append(line)
281n/a
282n/a def unreadline(self, line):
283n/a """Push 'line' (a string) onto an internal buffer that will be
284n/a checked by future 'readline()' calls. Handy for implementing
285n/a a parser with line-at-a-time lookahead."""
286n/a self.linebuf.append(line)