»Core Development>Code coverage>Lib/shlex.py

Python code coverage for Lib/shlex.py

#countcontent
1n/a"""A lexical analyzer class for simple shell-like syntaxes."""
2n/a
3n/a# Module and documentation by Eric S. Raymond, 21 Dec 1998
4n/a# Input stacking and error message cleanup added by ESR, March 2000
5n/a# push_source() and pop_source() made explicit by ESR, January 2001.
6n/a# Posix compliance, split(), string arguments, and
7n/a# iterator interface by Gustavo Niemeyer, April 2003.
8n/a# changes to tokenize more like Posix shells by Vinay Sajip, July 2016.
9n/a
10n/aimport os
11n/aimport re
12n/aimport sys
13n/afrom collections import deque
14n/a
15n/afrom io import StringIO
16n/a
17n/a__all__ = ["shlex", "split", "quote"]
18n/a
19n/aclass shlex:
20n/a "A lexical analyzer class for simple shell-like syntaxes."
21n/a def __init__(self, instream=None, infile=None, posix=False,
22n/a punctuation_chars=False):
23n/a if isinstance(instream, str):
24n/a instream = StringIO(instream)
25n/a if instream is not None:
26n/a self.instream = instream
27n/a self.infile = infile
28n/a else:
29n/a self.instream = sys.stdin
30n/a self.infile = None
31n/a self.posix = posix
32n/a if posix:
33n/a self.eof = None
34n/a else:
35n/a self.eof = ''
36n/a self.commenters = '#'
37n/a self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
38n/a 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
39n/a if self.posix:
40n/a self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
41n/a 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
42n/a self.whitespace = ' \t\r\n'
43n/a self.whitespace_split = False
44n/a self.quotes = '\'"'
45n/a self.escape = '\\'
46n/a self.escapedquotes = '"'
47n/a self.state = ' '
48n/a self.pushback = deque()
49n/a self.lineno = 1
50n/a self.debug = 0
51n/a self.token = ''
52n/a self.filestack = deque()
53n/a self.source = None
54n/a if not punctuation_chars:
55n/a punctuation_chars = ''
56n/a elif punctuation_chars is True:
57n/a punctuation_chars = '();<>|&'
58n/a self.punctuation_chars = punctuation_chars
59n/a if punctuation_chars:
60n/a # _pushback_chars is a push back queue used by lookahead logic
61n/a self._pushback_chars = deque()
62n/a # these chars added because allowed in file names, args, wildcards
63n/a self.wordchars += '~-./*?='
64n/a #remove any punctuation chars from wordchars
65n/a t = self.wordchars.maketrans(dict.fromkeys(punctuation_chars))
66n/a self.wordchars = self.wordchars.translate(t)
67n/a
68n/a def push_token(self, tok):
69n/a "Push a token onto the stack popped by the get_token method"
70n/a if self.debug >= 1:
71n/a print("shlex: pushing token " + repr(tok))
72n/a self.pushback.appendleft(tok)
73n/a
74n/a def push_source(self, newstream, newfile=None):
75n/a "Push an input source onto the lexer's input source stack."
76n/a if isinstance(newstream, str):
77n/a newstream = StringIO(newstream)
78n/a self.filestack.appendleft((self.infile, self.instream, self.lineno))
79n/a self.infile = newfile
80n/a self.instream = newstream
81n/a self.lineno = 1
82n/a if self.debug:
83n/a if newfile is not None:
84n/a print('shlex: pushing to file %s' % (self.infile,))
85n/a else:
86n/a print('shlex: pushing to stream %s' % (self.instream,))
87n/a
88n/a def pop_source(self):
89n/a "Pop the input source stack."
90n/a self.instream.close()
91n/a (self.infile, self.instream, self.lineno) = self.filestack.popleft()
92n/a if self.debug:
93n/a print('shlex: popping to %s, line %d' \
94n/a % (self.instream, self.lineno))
95n/a self.state = ' '
96n/a
97n/a def get_token(self):
98n/a "Get a token from the input stream (or from stack if it's nonempty)"
99n/a if self.pushback:
100n/a tok = self.pushback.popleft()
101n/a if self.debug >= 1:
102n/a print("shlex: popping token " + repr(tok))
103n/a return tok
104n/a # No pushback. Get a token.
105n/a raw = self.read_token()
106n/a # Handle inclusions
107n/a if self.source is not None:
108n/a while raw == self.source:
109n/a spec = self.sourcehook(self.read_token())
110n/a if spec:
111n/a (newfile, newstream) = spec
112n/a self.push_source(newstream, newfile)
113n/a raw = self.get_token()
114n/a # Maybe we got EOF instead?
115n/a while raw == self.eof:
116n/a if not self.filestack:
117n/a return self.eof
118n/a else:
119n/a self.pop_source()
120n/a raw = self.get_token()
121n/a # Neither inclusion nor EOF
122n/a if self.debug >= 1:
123n/a if raw != self.eof:
124n/a print("shlex: token=" + repr(raw))
125n/a else:
126n/a print("shlex: token=EOF")
127n/a return raw
128n/a
129n/a def read_token(self):
130n/a quoted = False
131n/a escapedstate = ' '
132n/a while True:
133n/a if self.punctuation_chars and self._pushback_chars:
134n/a nextchar = self._pushback_chars.pop()
135n/a else:
136n/a nextchar = self.instream.read(1)
137n/a if nextchar == '\n':
138n/a self.lineno += 1
139n/a if self.debug >= 3:
140n/a print("shlex: in state %r I see character: %r" % (self.state,
141n/a nextchar))
142n/a if self.state is None:
143n/a self.token = '' # past end of file
144n/a break
145n/a elif self.state == ' ':
146n/a if not nextchar:
147n/a self.state = None # end of file
148n/a break
149n/a elif nextchar in self.whitespace:
150n/a if self.debug >= 2:
151n/a print("shlex: I see whitespace in whitespace state")
152n/a if self.token or (self.posix and quoted):
153n/a break # emit current token
154n/a else:
155n/a continue
156n/a elif nextchar in self.commenters:
157n/a self.instream.readline()
158n/a self.lineno += 1
159n/a elif self.posix and nextchar in self.escape:
160n/a escapedstate = 'a'
161n/a self.state = nextchar
162n/a elif nextchar in self.wordchars:
163n/a self.token = nextchar
164n/a self.state = 'a'
165n/a elif nextchar in self.punctuation_chars:
166n/a self.token = nextchar
167n/a self.state = 'c'
168n/a elif nextchar in self.quotes:
169n/a if not self.posix:
170n/a self.token = nextchar
171n/a self.state = nextchar
172n/a elif self.whitespace_split:
173n/a self.token = nextchar
174n/a self.state = 'a'
175n/a else:
176n/a self.token = nextchar
177n/a if self.token or (self.posix and quoted):
178n/a break # emit current token
179n/a else:
180n/a continue
181n/a elif self.state in self.quotes:
182n/a quoted = True
183n/a if not nextchar: # end of file
184n/a if self.debug >= 2:
185n/a print("shlex: I see EOF in quotes state")
186n/a # XXX what error should be raised here?
187n/a raise ValueError("No closing quotation")
188n/a if nextchar == self.state:
189n/a if not self.posix:
190n/a self.token += nextchar
191n/a self.state = ' '
192n/a break
193n/a else:
194n/a self.state = 'a'
195n/a elif (self.posix and nextchar in self.escape and self.state
196n/a in self.escapedquotes):
197n/a escapedstate = self.state
198n/a self.state = nextchar
199n/a else:
200n/a self.token += nextchar
201n/a elif self.state in self.escape:
202n/a if not nextchar: # end of file
203n/a if self.debug >= 2:
204n/a print("shlex: I see EOF in escape state")
205n/a # XXX what error should be raised here?
206n/a raise ValueError("No escaped character")
207n/a # In posix shells, only the quote itself or the escape
208n/a # character may be escaped within quotes.
209n/a if (escapedstate in self.quotes and
210n/a nextchar != self.state and nextchar != escapedstate):
211n/a self.token += self.state
212n/a self.token += nextchar
213n/a self.state = escapedstate
214n/a elif self.state in ('a', 'c'):
215n/a if not nextchar:
216n/a self.state = None # end of file
217n/a break
218n/a elif nextchar in self.whitespace:
219n/a if self.debug >= 2:
220n/a print("shlex: I see whitespace in word state")
221n/a self.state = ' '
222n/a if self.token or (self.posix and quoted):
223n/a break # emit current token
224n/a else:
225n/a continue
226n/a elif nextchar in self.commenters:
227n/a self.instream.readline()
228n/a self.lineno += 1
229n/a if self.posix:
230n/a self.state = ' '
231n/a if self.token or (self.posix and quoted):
232n/a break # emit current token
233n/a else:
234n/a continue
235n/a elif self.state == 'c':
236n/a if nextchar in self.punctuation_chars:
237n/a self.token += nextchar
238n/a else:
239n/a if nextchar not in self.whitespace:
240n/a self._pushback_chars.append(nextchar)
241n/a self.state = ' '
242n/a break
243n/a elif self.posix and nextchar in self.quotes:
244n/a self.state = nextchar
245n/a elif self.posix and nextchar in self.escape:
246n/a escapedstate = 'a'
247n/a self.state = nextchar
248n/a elif (nextchar in self.wordchars or nextchar in self.quotes
249n/a or self.whitespace_split):
250n/a self.token += nextchar
251n/a else:
252n/a if self.punctuation_chars:
253n/a self._pushback_chars.append(nextchar)
254n/a else:
255n/a self.pushback.appendleft(nextchar)
256n/a if self.debug >= 2:
257n/a print("shlex: I see punctuation in word state")
258n/a self.state = ' '
259n/a if self.token or (self.posix and quoted):
260n/a break # emit current token
261n/a else:
262n/a continue
263n/a result = self.token
264n/a self.token = ''
265n/a if self.posix and not quoted and result == '':
266n/a result = None
267n/a if self.debug > 1:
268n/a if result:
269n/a print("shlex: raw token=" + repr(result))
270n/a else:
271n/a print("shlex: raw token=EOF")
272n/a return result
273n/a
274n/a def sourcehook(self, newfile):
275n/a "Hook called on a filename to be sourced."
276n/a if newfile[0] == '"':
277n/a newfile = newfile[1:-1]
278n/a # This implements cpp-like semantics for relative-path inclusion.
279n/a if isinstance(self.infile, str) and not os.path.isabs(newfile):
280n/a newfile = os.path.join(os.path.dirname(self.infile), newfile)
281n/a return (newfile, open(newfile, "r"))
282n/a
283n/a def error_leader(self, infile=None, lineno=None):
284n/a "Emit a C-compiler-like, Emacs-friendly error-message leader."
285n/a if infile is None:
286n/a infile = self.infile
287n/a if lineno is None:
288n/a lineno = self.lineno
289n/a return "\"%s\", line %d: " % (infile, lineno)
290n/a
291n/a def __iter__(self):
292n/a return self
293n/a
294n/a def __next__(self):
295n/a token = self.get_token()
296n/a if token == self.eof:
297n/a raise StopIteration
298n/a return token
299n/a
300n/adef split(s, comments=False, posix=True):
301n/a lex = shlex(s, posix=posix)
302n/a lex.whitespace_split = True
303n/a if not comments:
304n/a lex.commenters = ''
305n/a return list(lex)
306n/a
307n/a
308n/a_find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search
309n/a
310n/adef quote(s):
311n/a """Return a shell-escaped version of the string *s*."""
312n/a if not s:
313n/a return "''"
314n/a if _find_unsafe(s) is None:
315n/a return s
316n/a
317n/a # use single quotes, and put single quotes into double quotes
318n/a # the string $'b is then quoted as '$'"'"'b'
319n/a return "'" + s.replace("'", "'\"'\"'") + "'"
320n/a
321n/a
322n/adef _print_tokens(lexer):
323n/a while 1:
324n/a tt = lexer.get_token()
325n/a if not tt:
326n/a break
327n/a print("Token: " + repr(tt))
328n/a
329n/aif __name__ == '__main__':
330n/a if len(sys.argv) == 1:
331n/a _print_tokens(shlex())
332n/a else:
333n/a fn = sys.argv[1]
334n/a with open(fn) as f:
335n/a _print_tokens(shlex(f, fn))