ยปCore Development>Code coverage>Lib/http/server.py

Python code coverage for Lib/http/server.py

#countcontent
1n/a"""HTTP server classes.
2n/a
3n/aNote: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4n/aSimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5n/aand CGIHTTPRequestHandler for CGI scripts.
6n/a
7n/aIt does, however, optionally implement HTTP/1.1 persistent connections,
8n/aas of version 0.3.
9n/a
10n/aNotes on CGIHTTPRequestHandler
11n/a------------------------------
12n/a
13n/aThis class implements GET and POST requests to cgi-bin scripts.
14n/a
15n/aIf the os.fork() function is not present (e.g. on Windows),
16n/asubprocess.Popen() is used as a fallback, with slightly altered semantics.
17n/a
18n/aIn all cases, the implementation is intentionally naive -- all
19n/arequests are executed synchronously.
20n/a
21n/aSECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22n/a-- it may execute arbitrary Python code or external programs.
23n/a
24n/aNote that status code 200 is sent prior to execution of a CGI script, so
25n/ascripts cannot send other status codes such as 302 (redirect).
26n/a
27n/aXXX To do:
28n/a
29n/a- log requests even later (to capture byte count)
30n/a- log user-agent header and other interesting goodies
31n/a- send error log to separate file
32n/a"""
33n/a
34n/a
35n/a# See also:
36n/a#
37n/a# HTTP Working Group T. Berners-Lee
38n/a# INTERNET-DRAFT R. T. Fielding
39n/a# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
40n/a# Expires September 8, 1995 March 8, 1995
41n/a#
42n/a# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43n/a#
44n/a# and
45n/a#
46n/a# Network Working Group R. Fielding
47n/a# Request for Comments: 2616 et al
48n/a# Obsoletes: 2068 June 1999
49n/a# Category: Standards Track
50n/a#
51n/a# URL: http://www.faqs.org/rfcs/rfc2616.html
52n/a
53n/a# Log files
54n/a# ---------
55n/a#
56n/a# Here's a quote from the NCSA httpd docs about log file format.
57n/a#
58n/a# | The logfile format is as follows. Each line consists of:
59n/a# |
60n/a# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61n/a# |
62n/a# | host: Either the DNS name or the IP number of the remote client
63n/a# | rfc931: Any information returned by identd for this person,
64n/a# | - otherwise.
65n/a# | authuser: If user sent a userid for authentication, the user name,
66n/a# | - otherwise.
67n/a# | DD: Day
68n/a# | Mon: Month (calendar name)
69n/a# | YYYY: Year
70n/a# | hh: hour (24-hour format, the machine's timezone)
71n/a# | mm: minutes
72n/a# | ss: seconds
73n/a# | request: The first line of the HTTP request as sent by the client.
74n/a# | ddd: the status code returned by the server, - if not available.
75n/a# | bbbb: the total number of bytes sent,
76n/a# | *not including the HTTP/1.0 header*, - if not available
77n/a# |
78n/a# | You can determine the name of the file accessed through request.
79n/a#
80n/a# (Actually, the latter is only true if you know the server configuration
81n/a# at the time the request was made!)
82n/a
83n/a__version__ = "0.6"
84n/a
85n/a__all__ = [
86n/a "HTTPServer", "BaseHTTPRequestHandler",
87n/a "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88n/a]
89n/a
90n/aimport email.utils
91n/aimport html
92n/aimport http.client
93n/aimport io
94n/aimport mimetypes
95n/aimport os
96n/aimport posixpath
97n/aimport select
98n/aimport shutil
99n/aimport socket # For gethostbyaddr()
100n/aimport socketserver
101n/aimport sys
102n/aimport time
103n/aimport urllib.parse
104n/aimport copy
105n/aimport argparse
106n/a
107n/afrom http import HTTPStatus
108n/a
109n/a
110n/a# Default error message template
111n/aDEFAULT_ERROR_MESSAGE = """\
112n/a<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
113n/a "http://www.w3.org/TR/html4/strict.dtd">
114n/a<html>
115n/a <head>
116n/a <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
117n/a <title>Error response</title>
118n/a </head>
119n/a <body>
120n/a <h1>Error response</h1>
121n/a <p>Error code: %(code)d</p>
122n/a <p>Message: %(message)s.</p>
123n/a <p>Error code explanation: %(code)s - %(explain)s.</p>
124n/a </body>
125n/a</html>
126n/a"""
127n/a
128n/aDEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
129n/a
130n/aclass HTTPServer(socketserver.TCPServer):
131n/a
132n/a allow_reuse_address = 1 # Seems to make sense in testing environment
133n/a
134n/a def server_bind(self):
135n/a """Override server_bind to store the server name."""
136n/a socketserver.TCPServer.server_bind(self)
137n/a host, port = self.server_address[:2]
138n/a self.server_name = socket.getfqdn(host)
139n/a self.server_port = port
140n/a
141n/a
142n/aclass BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
143n/a
144n/a """HTTP request handler base class.
145n/a
146n/a The following explanation of HTTP serves to guide you through the
147n/a code as well as to expose any misunderstandings I may have about
148n/a HTTP (so you don't need to read the code to figure out I'm wrong
149n/a :-).
150n/a
151n/a HTTP (HyperText Transfer Protocol) is an extensible protocol on
152n/a top of a reliable stream transport (e.g. TCP/IP). The protocol
153n/a recognizes three parts to a request:
154n/a
155n/a 1. One line identifying the request type and path
156n/a 2. An optional set of RFC-822-style headers
157n/a 3. An optional data part
158n/a
159n/a The headers and data are separated by a blank line.
160n/a
161n/a The first line of the request has the form
162n/a
163n/a <command> <path> <version>
164n/a
165n/a where <command> is a (case-sensitive) keyword such as GET or POST,
166n/a <path> is a string containing path information for the request,
167n/a and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
168n/a <path> is encoded using the URL encoding scheme (using %xx to signify
169n/a the ASCII character with hex code xx).
170n/a
171n/a The specification specifies that lines are separated by CRLF but
172n/a for compatibility with the widest range of clients recommends
173n/a servers also handle LF. Similarly, whitespace in the request line
174n/a is treated sensibly (allowing multiple spaces between components
175n/a and allowing trailing whitespace).
176n/a
177n/a Similarly, for output, lines ought to be separated by CRLF pairs
178n/a but most clients grok LF characters just fine.
179n/a
180n/a If the first line of the request has the form
181n/a
182n/a <command> <path>
183n/a
184n/a (i.e. <version> is left out) then this is assumed to be an HTTP
185n/a 0.9 request; this form has no optional headers and data part and
186n/a the reply consists of just the data.
187n/a
188n/a The reply form of the HTTP 1.x protocol again has three parts:
189n/a
190n/a 1. One line giving the response code
191n/a 2. An optional set of RFC-822-style headers
192n/a 3. The data
193n/a
194n/a Again, the headers and data are separated by a blank line.
195n/a
196n/a The response code line has the form
197n/a
198n/a <version> <responsecode> <responsestring>
199n/a
200n/a where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
201n/a <responsecode> is a 3-digit response code indicating success or
202n/a failure of the request, and <responsestring> is an optional
203n/a human-readable string explaining what the response code means.
204n/a
205n/a This server parses the request and the headers, and then calls a
206n/a function specific to the request type (<command>). Specifically,
207n/a a request SPAM will be handled by a method do_SPAM(). If no
208n/a such method exists the server sends an error response to the
209n/a client. If it exists, it is called with no arguments:
210n/a
211n/a do_SPAM()
212n/a
213n/a Note that the request name is case sensitive (i.e. SPAM and spam
214n/a are different requests).
215n/a
216n/a The various request details are stored in instance variables:
217n/a
218n/a - client_address is the client IP address in the form (host,
219n/a port);
220n/a
221n/a - command, path and version are the broken-down request line;
222n/a
223n/a - headers is an instance of email.message.Message (or a derived
224n/a class) containing the header information;
225n/a
226n/a - rfile is a file object open for reading positioned at the
227n/a start of the optional input data part;
228n/a
229n/a - wfile is a file object open for writing.
230n/a
231n/a IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
232n/a
233n/a The first thing to be written must be the response line. Then
234n/a follow 0 or more header lines, then a blank line, and then the
235n/a actual data (if any). The meaning of the header lines depends on
236n/a the command executed by the server; in most cases, when data is
237n/a returned, there should be at least one header line of the form
238n/a
239n/a Content-type: <type>/<subtype>
240n/a
241n/a where <type> and <subtype> should be registered MIME types,
242n/a e.g. "text/html" or "text/plain".
243n/a
244n/a """
245n/a
246n/a # The Python system version, truncated to its first component.
247n/a sys_version = "Python/" + sys.version.split()[0]
248n/a
249n/a # The server software version. You may want to override this.
250n/a # The format is multiple whitespace-separated strings,
251n/a # where each string is of the form name[/version].
252n/a server_version = "BaseHTTP/" + __version__
253n/a
254n/a error_message_format = DEFAULT_ERROR_MESSAGE
255n/a error_content_type = DEFAULT_ERROR_CONTENT_TYPE
256n/a
257n/a # The default request version. This only affects responses up until
258n/a # the point where the request line is parsed, so it mainly decides what
259n/a # the client gets back when sending a malformed request line.
260n/a # Most web servers default to HTTP 0.9, i.e. don't send a status line.
261n/a default_request_version = "HTTP/0.9"
262n/a
263n/a def parse_request(self):
264n/a """Parse a request (internal).
265n/a
266n/a The request should be stored in self.raw_requestline; the results
267n/a are in self.command, self.path, self.request_version and
268n/a self.headers.
269n/a
270n/a Return True for success, False for failure; on failure, any relevant
271n/a error response has already been sent back.
272n/a
273n/a """
274n/a self.command = None # set in case of error on the first line
275n/a self.request_version = version = self.default_request_version
276n/a self.close_connection = True
277n/a requestline = str(self.raw_requestline, 'iso-8859-1')
278n/a requestline = requestline.rstrip('\r\n')
279n/a self.requestline = requestline
280n/a words = requestline.split()
281n/a if len(words) == 0:
282n/a return False
283n/a
284n/a if len(words) >= 3: # Enough to determine protocol version
285n/a version = words[-1]
286n/a try:
287n/a if not version.startswith('HTTP/'):
288n/a raise ValueError
289n/a base_version_number = version.split('/', 1)[1]
290n/a version_number = base_version_number.split(".")
291n/a # RFC 2145 section 3.1 says there can be only one "." and
292n/a # - major and minor numbers MUST be treated as
293n/a # separate integers;
294n/a # - HTTP/2.4 is a lower version than HTTP/2.13, which in
295n/a # turn is lower than HTTP/12.3;
296n/a # - Leading zeros MUST be ignored by recipients.
297n/a if len(version_number) != 2:
298n/a raise ValueError
299n/a version_number = int(version_number[0]), int(version_number[1])
300n/a except (ValueError, IndexError):
301n/a self.send_error(
302n/a HTTPStatus.BAD_REQUEST,
303n/a "Bad request version (%r)" % version)
304n/a return False
305n/a if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
306n/a self.close_connection = False
307n/a if version_number >= (2, 0):
308n/a self.send_error(
309n/a HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
310n/a "Invalid HTTP version (%s)" % base_version_number)
311n/a return False
312n/a self.request_version = version
313n/a
314n/a if not 2 <= len(words) <= 3:
315n/a self.send_error(
316n/a HTTPStatus.BAD_REQUEST,
317n/a "Bad request syntax (%r)" % requestline)
318n/a return False
319n/a command, path = words[:2]
320n/a if len(words) == 2:
321n/a self.close_connection = True
322n/a if command != 'GET':
323n/a self.send_error(
324n/a HTTPStatus.BAD_REQUEST,
325n/a "Bad HTTP/0.9 request type (%r)" % command)
326n/a return False
327n/a self.command, self.path = command, path
328n/a
329n/a # Examine the headers and look for a Connection directive.
330n/a try:
331n/a self.headers = http.client.parse_headers(self.rfile,
332n/a _class=self.MessageClass)
333n/a except http.client.LineTooLong as err:
334n/a self.send_error(
335n/a HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
336n/a "Line too long",
337n/a str(err))
338n/a return False
339n/a except http.client.HTTPException as err:
340n/a self.send_error(
341n/a HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
342n/a "Too many headers",
343n/a str(err)
344n/a )
345n/a return False
346n/a
347n/a conntype = self.headers.get('Connection', "")
348n/a if conntype.lower() == 'close':
349n/a self.close_connection = True
350n/a elif (conntype.lower() == 'keep-alive' and
351n/a self.protocol_version >= "HTTP/1.1"):
352n/a self.close_connection = False
353n/a # Examine the headers and look for an Expect directive
354n/a expect = self.headers.get('Expect', "")
355n/a if (expect.lower() == "100-continue" and
356n/a self.protocol_version >= "HTTP/1.1" and
357n/a self.request_version >= "HTTP/1.1"):
358n/a if not self.handle_expect_100():
359n/a return False
360n/a return True
361n/a
362n/a def handle_expect_100(self):
363n/a """Decide what to do with an "Expect: 100-continue" header.
364n/a
365n/a If the client is expecting a 100 Continue response, we must
366n/a respond with either a 100 Continue or a final response before
367n/a waiting for the request body. The default is to always respond
368n/a with a 100 Continue. You can behave differently (for example,
369n/a reject unauthorized requests) by overriding this method.
370n/a
371n/a This method should either return True (possibly after sending
372n/a a 100 Continue response) or send an error response and return
373n/a False.
374n/a
375n/a """
376n/a self.send_response_only(HTTPStatus.CONTINUE)
377n/a self.end_headers()
378n/a return True
379n/a
380n/a def handle_one_request(self):
381n/a """Handle a single HTTP request.
382n/a
383n/a You normally don't need to override this method; see the class
384n/a __doc__ string for information on how to handle specific HTTP
385n/a commands such as GET and POST.
386n/a
387n/a """
388n/a try:
389n/a self.raw_requestline = self.rfile.readline(65537)
390n/a if len(self.raw_requestline) > 65536:
391n/a self.requestline = ''
392n/a self.request_version = ''
393n/a self.command = ''
394n/a self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
395n/a return
396n/a if not self.raw_requestline:
397n/a self.close_connection = True
398n/a return
399n/a if not self.parse_request():
400n/a # An error code has been sent, just exit
401n/a return
402n/a mname = 'do_' + self.command
403n/a if not hasattr(self, mname):
404n/a self.send_error(
405n/a HTTPStatus.NOT_IMPLEMENTED,
406n/a "Unsupported method (%r)" % self.command)
407n/a return
408n/a method = getattr(self, mname)
409n/a method()
410n/a self.wfile.flush() #actually send the response if not already done.
411n/a except socket.timeout as e:
412n/a #a read or a write timed out. Discard this connection
413n/a self.log_error("Request timed out: %r", e)
414n/a self.close_connection = True
415n/a return
416n/a
417n/a def handle(self):
418n/a """Handle multiple requests if necessary."""
419n/a self.close_connection = True
420n/a
421n/a self.handle_one_request()
422n/a while not self.close_connection:
423n/a self.handle_one_request()
424n/a
425n/a def send_error(self, code, message=None, explain=None):
426n/a """Send and log an error reply.
427n/a
428n/a Arguments are
429n/a * code: an HTTP error code
430n/a 3 digits
431n/a * message: a simple optional 1 line reason phrase.
432n/a *( HTAB / SP / VCHAR / %x80-FF )
433n/a defaults to short entry matching the response code
434n/a * explain: a detailed message defaults to the long entry
435n/a matching the response code.
436n/a
437n/a This sends an error response (so it must be called before any
438n/a output has been generated), logs the error, and finally sends
439n/a a piece of HTML explaining the error to the user.
440n/a
441n/a """
442n/a
443n/a try:
444n/a shortmsg, longmsg = self.responses[code]
445n/a except KeyError:
446n/a shortmsg, longmsg = '???', '???'
447n/a if message is None:
448n/a message = shortmsg
449n/a if explain is None:
450n/a explain = longmsg
451n/a self.log_error("code %d, message %s", code, message)
452n/a self.send_response(code, message)
453n/a self.send_header('Connection', 'close')
454n/a
455n/a # Message body is omitted for cases described in:
456n/a # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
457n/a # - RFC7231: 6.3.6. 205(Reset Content)
458n/a body = None
459n/a if (code >= 200 and
460n/a code not in (HTTPStatus.NO_CONTENT,
461n/a HTTPStatus.RESET_CONTENT,
462n/a HTTPStatus.NOT_MODIFIED)):
463n/a # HTML encode to prevent Cross Site Scripting attacks
464n/a # (see bug #1100201)
465n/a content = (self.error_message_format % {
466n/a 'code': code,
467n/a 'message': html.escape(message, quote=False),
468n/a 'explain': html.escape(explain, quote=False)
469n/a })
470n/a body = content.encode('UTF-8', 'replace')
471n/a self.send_header("Content-Type", self.error_content_type)
472n/a self.send_header('Content-Length', int(len(body)))
473n/a self.end_headers()
474n/a
475n/a if self.command != 'HEAD' and body:
476n/a self.wfile.write(body)
477n/a
478n/a def send_response(self, code, message=None):
479n/a """Add the response header to the headers buffer and log the
480n/a response code.
481n/a
482n/a Also send two standard headers with the server software
483n/a version and the current date.
484n/a
485n/a """
486n/a self.log_request(code)
487n/a self.send_response_only(code, message)
488n/a self.send_header('Server', self.version_string())
489n/a self.send_header('Date', self.date_time_string())
490n/a
491n/a def send_response_only(self, code, message=None):
492n/a """Send the response header only."""
493n/a if self.request_version != 'HTTP/0.9':
494n/a if message is None:
495n/a if code in self.responses:
496n/a message = self.responses[code][0]
497n/a else:
498n/a message = ''
499n/a if not hasattr(self, '_headers_buffer'):
500n/a self._headers_buffer = []
501n/a self._headers_buffer.append(("%s %d %s\r\n" %
502n/a (self.protocol_version, code, message)).encode(
503n/a 'latin-1', 'strict'))
504n/a
505n/a def send_header(self, keyword, value):
506n/a """Send a MIME header to the headers buffer."""
507n/a if self.request_version != 'HTTP/0.9':
508n/a if not hasattr(self, '_headers_buffer'):
509n/a self._headers_buffer = []
510n/a self._headers_buffer.append(
511n/a ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
512n/a
513n/a if keyword.lower() == 'connection':
514n/a if value.lower() == 'close':
515n/a self.close_connection = True
516n/a elif value.lower() == 'keep-alive':
517n/a self.close_connection = False
518n/a
519n/a def end_headers(self):
520n/a """Send the blank line ending the MIME headers."""
521n/a if self.request_version != 'HTTP/0.9':
522n/a self._headers_buffer.append(b"\r\n")
523n/a self.flush_headers()
524n/a
525n/a def flush_headers(self):
526n/a if hasattr(self, '_headers_buffer'):
527n/a self.wfile.write(b"".join(self._headers_buffer))
528n/a self._headers_buffer = []
529n/a
530n/a def log_request(self, code='-', size='-'):
531n/a """Log an accepted request.
532n/a
533n/a This is called by send_response().
534n/a
535n/a """
536n/a if isinstance(code, HTTPStatus):
537n/a code = code.value
538n/a self.log_message('"%s" %s %s',
539n/a self.requestline, str(code), str(size))
540n/a
541n/a def log_error(self, format, *args):
542n/a """Log an error.
543n/a
544n/a This is called when a request cannot be fulfilled. By
545n/a default it passes the message on to log_message().
546n/a
547n/a Arguments are the same as for log_message().
548n/a
549n/a XXX This should go to the separate error log.
550n/a
551n/a """
552n/a
553n/a self.log_message(format, *args)
554n/a
555n/a def log_message(self, format, *args):
556n/a """Log an arbitrary message.
557n/a
558n/a This is used by all other logging functions. Override
559n/a it if you have specific logging wishes.
560n/a
561n/a The first argument, FORMAT, is a format string for the
562n/a message to be logged. If the format string contains
563n/a any % escapes requiring parameters, they should be
564n/a specified as subsequent arguments (it's just like
565n/a printf!).
566n/a
567n/a The client ip and current date/time are prefixed to
568n/a every message.
569n/a
570n/a """
571n/a
572n/a sys.stderr.write("%s - - [%s] %s\n" %
573n/a (self.address_string(),
574n/a self.log_date_time_string(),
575n/a format%args))
576n/a
577n/a def version_string(self):
578n/a """Return the server software version string."""
579n/a return self.server_version + ' ' + self.sys_version
580n/a
581n/a def date_time_string(self, timestamp=None):
582n/a """Return the current date and time formatted for a message header."""
583n/a if timestamp is None:
584n/a timestamp = time.time()
585n/a return email.utils.formatdate(timestamp, usegmt=True)
586n/a
587n/a def log_date_time_string(self):
588n/a """Return the current time formatted for logging."""
589n/a now = time.time()
590n/a year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
591n/a s = "%02d/%3s/%04d %02d:%02d:%02d" % (
592n/a day, self.monthname[month], year, hh, mm, ss)
593n/a return s
594n/a
595n/a weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
596n/a
597n/a monthname = [None,
598n/a 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
599n/a 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
600n/a
601n/a def address_string(self):
602n/a """Return the client address."""
603n/a
604n/a return self.client_address[0]
605n/a
606n/a # Essentially static class variables
607n/a
608n/a # The version of the HTTP protocol we support.
609n/a # Set this to HTTP/1.1 to enable automatic keepalive
610n/a protocol_version = "HTTP/1.0"
611n/a
612n/a # MessageClass used to parse headers
613n/a MessageClass = http.client.HTTPMessage
614n/a
615n/a # hack to maintain backwards compatibility
616n/a responses = {
617n/a v: (v.phrase, v.description)
618n/a for v in HTTPStatus.__members__.values()
619n/a }
620n/a
621n/a
622n/aclass SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
623n/a
624n/a """Simple HTTP request handler with GET and HEAD commands.
625n/a
626n/a This serves files from the current directory and any of its
627n/a subdirectories. The MIME type for files is determined by
628n/a calling the .guess_type() method.
629n/a
630n/a The GET and HEAD requests are identical except that the HEAD
631n/a request omits the actual contents of the file.
632n/a
633n/a """
634n/a
635n/a server_version = "SimpleHTTP/" + __version__
636n/a
637n/a def do_GET(self):
638n/a """Serve a GET request."""
639n/a f = self.send_head()
640n/a if f:
641n/a try:
642n/a self.copyfile(f, self.wfile)
643n/a finally:
644n/a f.close()
645n/a
646n/a def do_HEAD(self):
647n/a """Serve a HEAD request."""
648n/a f = self.send_head()
649n/a if f:
650n/a f.close()
651n/a
652n/a def send_head(self):
653n/a """Common code for GET and HEAD commands.
654n/a
655n/a This sends the response code and MIME headers.
656n/a
657n/a Return value is either a file object (which has to be copied
658n/a to the outputfile by the caller unless the command was HEAD,
659n/a and must be closed by the caller under all circumstances), or
660n/a None, in which case the caller has nothing further to do.
661n/a
662n/a """
663n/a path = self.translate_path(self.path)
664n/a f = None
665n/a if os.path.isdir(path):
666n/a parts = urllib.parse.urlsplit(self.path)
667n/a if not parts.path.endswith('/'):
668n/a # redirect browser - doing basically what apache does
669n/a self.send_response(HTTPStatus.MOVED_PERMANENTLY)
670n/a new_parts = (parts[0], parts[1], parts[2] + '/',
671n/a parts[3], parts[4])
672n/a new_url = urllib.parse.urlunsplit(new_parts)
673n/a self.send_header("Location", new_url)
674n/a self.end_headers()
675n/a return None
676n/a for index in "index.html", "index.htm":
677n/a index = os.path.join(path, index)
678n/a if os.path.exists(index):
679n/a path = index
680n/a break
681n/a else:
682n/a return self.list_directory(path)
683n/a ctype = self.guess_type(path)
684n/a try:
685n/a f = open(path, 'rb')
686n/a except OSError:
687n/a self.send_error(HTTPStatus.NOT_FOUND, "File not found")
688n/a return None
689n/a try:
690n/a self.send_response(HTTPStatus.OK)
691n/a self.send_header("Content-type", ctype)
692n/a fs = os.fstat(f.fileno())
693n/a self.send_header("Content-Length", str(fs[6]))
694n/a self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
695n/a self.end_headers()
696n/a return f
697n/a except:
698n/a f.close()
699n/a raise
700n/a
701n/a def list_directory(self, path):
702n/a """Helper to produce a directory listing (absent index.html).
703n/a
704n/a Return value is either a file object, or None (indicating an
705n/a error). In either case, the headers are sent, making the
706n/a interface the same as for send_head().
707n/a
708n/a """
709n/a try:
710n/a list = os.listdir(path)
711n/a except OSError:
712n/a self.send_error(
713n/a HTTPStatus.NOT_FOUND,
714n/a "No permission to list directory")
715n/a return None
716n/a list.sort(key=lambda a: a.lower())
717n/a r = []
718n/a try:
719n/a displaypath = urllib.parse.unquote(self.path,
720n/a errors='surrogatepass')
721n/a except UnicodeDecodeError:
722n/a displaypath = urllib.parse.unquote(path)
723n/a displaypath = html.escape(displaypath, quote=False)
724n/a enc = sys.getfilesystemencoding()
725n/a title = 'Directory listing for %s' % displaypath
726n/a r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
727n/a '"http://www.w3.org/TR/html4/strict.dtd">')
728n/a r.append('<html>\n<head>')
729n/a r.append('<meta http-equiv="Content-Type" '
730n/a 'content="text/html; charset=%s">' % enc)
731n/a r.append('<title>%s</title>\n</head>' % title)
732n/a r.append('<body>\n<h1>%s</h1>' % title)
733n/a r.append('<hr>\n<ul>')
734n/a for name in list:
735n/a fullname = os.path.join(path, name)
736n/a displayname = linkname = name
737n/a # Append / for directories or @ for symbolic links
738n/a if os.path.isdir(fullname):
739n/a displayname = name + "/"
740n/a linkname = name + "/"
741n/a if os.path.islink(fullname):
742n/a displayname = name + "@"
743n/a # Note: a link to a directory displays with @ and links with /
744n/a r.append('<li><a href="%s">%s</a></li>'
745n/a % (urllib.parse.quote(linkname,
746n/a errors='surrogatepass'),
747n/a html.escape(displayname, quote=False)))
748n/a r.append('</ul>\n<hr>\n</body>\n</html>\n')
749n/a encoded = '\n'.join(r).encode(enc, 'surrogateescape')
750n/a f = io.BytesIO()
751n/a f.write(encoded)
752n/a f.seek(0)
753n/a self.send_response(HTTPStatus.OK)
754n/a self.send_header("Content-type", "text/html; charset=%s" % enc)
755n/a self.send_header("Content-Length", str(len(encoded)))
756n/a self.end_headers()
757n/a return f
758n/a
759n/a def translate_path(self, path):
760n/a """Translate a /-separated PATH to the local filename syntax.
761n/a
762n/a Components that mean special things to the local file system
763n/a (e.g. drive or directory names) are ignored. (XXX They should
764n/a probably be diagnosed.)
765n/a
766n/a """
767n/a # abandon query parameters
768n/a path = path.split('?',1)[0]
769n/a path = path.split('#',1)[0]
770n/a # Don't forget explicit trailing slash when normalizing. Issue17324
771n/a trailing_slash = path.rstrip().endswith('/')
772n/a try:
773n/a path = urllib.parse.unquote(path, errors='surrogatepass')
774n/a except UnicodeDecodeError:
775n/a path = urllib.parse.unquote(path)
776n/a path = posixpath.normpath(path)
777n/a words = path.split('/')
778n/a words = filter(None, words)
779n/a path = os.getcwd()
780n/a for word in words:
781n/a if os.path.dirname(word) or word in (os.curdir, os.pardir):
782n/a # Ignore components that are not a simple file/directory name
783n/a continue
784n/a path = os.path.join(path, word)
785n/a if trailing_slash:
786n/a path += '/'
787n/a return path
788n/a
789n/a def copyfile(self, source, outputfile):
790n/a """Copy all data between two file objects.
791n/a
792n/a The SOURCE argument is a file object open for reading
793n/a (or anything with a read() method) and the DESTINATION
794n/a argument is a file object open for writing (or
795n/a anything with a write() method).
796n/a
797n/a The only reason for overriding this would be to change
798n/a the block size or perhaps to replace newlines by CRLF
799n/a -- note however that this the default server uses this
800n/a to copy binary data as well.
801n/a
802n/a """
803n/a shutil.copyfileobj(source, outputfile)
804n/a
805n/a def guess_type(self, path):
806n/a """Guess the type of a file.
807n/a
808n/a Argument is a PATH (a filename).
809n/a
810n/a Return value is a string of the form type/subtype,
811n/a usable for a MIME Content-type header.
812n/a
813n/a The default implementation looks the file's extension
814n/a up in the table self.extensions_map, using application/octet-stream
815n/a as a default; however it would be permissible (if
816n/a slow) to look inside the data to make a better guess.
817n/a
818n/a """
819n/a
820n/a base, ext = posixpath.splitext(path)
821n/a if ext in self.extensions_map:
822n/a return self.extensions_map[ext]
823n/a ext = ext.lower()
824n/a if ext in self.extensions_map:
825n/a return self.extensions_map[ext]
826n/a else:
827n/a return self.extensions_map['']
828n/a
829n/a if not mimetypes.inited:
830n/a mimetypes.init() # try to read system mime.types
831n/a extensions_map = mimetypes.types_map.copy()
832n/a extensions_map.update({
833n/a '': 'application/octet-stream', # Default
834n/a '.py': 'text/plain',
835n/a '.c': 'text/plain',
836n/a '.h': 'text/plain',
837n/a })
838n/a
839n/a
840n/a# Utilities for CGIHTTPRequestHandler
841n/a
842n/adef _url_collapse_path(path):
843n/a """
844n/a Given a URL path, remove extra '/'s and '.' path elements and collapse
845n/a any '..' references and returns a collapsed path.
846n/a
847n/a Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
848n/a The utility of this function is limited to is_cgi method and helps
849n/a preventing some security attacks.
850n/a
851n/a Returns: The reconstituted URL, which will always start with a '/'.
852n/a
853n/a Raises: IndexError if too many '..' occur within the path.
854n/a
855n/a """
856n/a # Query component should not be involved.
857n/a path, _, query = path.partition('?')
858n/a path = urllib.parse.unquote(path)
859n/a
860n/a # Similar to os.path.split(os.path.normpath(path)) but specific to URL
861n/a # path semantics rather than local operating system semantics.
862n/a path_parts = path.split('/')
863n/a head_parts = []
864n/a for part in path_parts[:-1]:
865n/a if part == '..':
866n/a head_parts.pop() # IndexError if more '..' than prior parts
867n/a elif part and part != '.':
868n/a head_parts.append( part )
869n/a if path_parts:
870n/a tail_part = path_parts.pop()
871n/a if tail_part:
872n/a if tail_part == '..':
873n/a head_parts.pop()
874n/a tail_part = ''
875n/a elif tail_part == '.':
876n/a tail_part = ''
877n/a else:
878n/a tail_part = ''
879n/a
880n/a if query:
881n/a tail_part = '?'.join((tail_part, query))
882n/a
883n/a splitpath = ('/' + '/'.join(head_parts), tail_part)
884n/a collapsed_path = "/".join(splitpath)
885n/a
886n/a return collapsed_path
887n/a
888n/a
889n/a
890n/anobody = None
891n/a
892n/adef nobody_uid():
893n/a """Internal routine to get nobody's uid"""
894n/a global nobody
895n/a if nobody:
896n/a return nobody
897n/a try:
898n/a import pwd
899n/a except ImportError:
900n/a return -1
901n/a try:
902n/a nobody = pwd.getpwnam('nobody')[2]
903n/a except KeyError:
904n/a nobody = 1 + max(x[2] for x in pwd.getpwall())
905n/a return nobody
906n/a
907n/a
908n/adef executable(path):
909n/a """Test for executable file."""
910n/a return os.access(path, os.X_OK)
911n/a
912n/a
913n/aclass CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
914n/a
915n/a """Complete HTTP server with GET, HEAD and POST commands.
916n/a
917n/a GET and HEAD also support running CGI scripts.
918n/a
919n/a The POST command is *only* implemented for CGI scripts.
920n/a
921n/a """
922n/a
923n/a # Determine platform specifics
924n/a have_fork = hasattr(os, 'fork')
925n/a
926n/a # Make rfile unbuffered -- we need to read one line and then pass
927n/a # the rest to a subprocess, so we can't use buffered input.
928n/a rbufsize = 0
929n/a
930n/a def do_POST(self):
931n/a """Serve a POST request.
932n/a
933n/a This is only implemented for CGI scripts.
934n/a
935n/a """
936n/a
937n/a if self.is_cgi():
938n/a self.run_cgi()
939n/a else:
940n/a self.send_error(
941n/a HTTPStatus.NOT_IMPLEMENTED,
942n/a "Can only POST to CGI scripts")
943n/a
944n/a def send_head(self):
945n/a """Version of send_head that support CGI scripts"""
946n/a if self.is_cgi():
947n/a return self.run_cgi()
948n/a else:
949n/a return SimpleHTTPRequestHandler.send_head(self)
950n/a
951n/a def is_cgi(self):
952n/a """Test whether self.path corresponds to a CGI script.
953n/a
954n/a Returns True and updates the cgi_info attribute to the tuple
955n/a (dir, rest) if self.path requires running a CGI script.
956n/a Returns False otherwise.
957n/a
958n/a If any exception is raised, the caller should assume that
959n/a self.path was rejected as invalid and act accordingly.
960n/a
961n/a The default implementation tests whether the normalized url
962n/a path begins with one of the strings in self.cgi_directories
963n/a (and the next character is a '/' or the end of the string).
964n/a
965n/a """
966n/a collapsed_path = _url_collapse_path(self.path)
967n/a dir_sep = collapsed_path.find('/', 1)
968n/a head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
969n/a if head in self.cgi_directories:
970n/a self.cgi_info = head, tail
971n/a return True
972n/a return False
973n/a
974n/a
975n/a cgi_directories = ['/cgi-bin', '/htbin']
976n/a
977n/a def is_executable(self, path):
978n/a """Test whether argument path is an executable file."""
979n/a return executable(path)
980n/a
981n/a def is_python(self, path):
982n/a """Test whether argument path is a Python script."""
983n/a head, tail = os.path.splitext(path)
984n/a return tail.lower() in (".py", ".pyw")
985n/a
986n/a def run_cgi(self):
987n/a """Execute a CGI script."""
988n/a dir, rest = self.cgi_info
989n/a path = dir + '/' + rest
990n/a i = path.find('/', len(dir)+1)
991n/a while i >= 0:
992n/a nextdir = path[:i]
993n/a nextrest = path[i+1:]
994n/a
995n/a scriptdir = self.translate_path(nextdir)
996n/a if os.path.isdir(scriptdir):
997n/a dir, rest = nextdir, nextrest
998n/a i = path.find('/', len(dir)+1)
999n/a else:
1000n/a break
1001n/a
1002n/a # find an explicit query string, if present.
1003n/a rest, _, query = rest.partition('?')
1004n/a
1005n/a # dissect the part after the directory name into a script name &
1006n/a # a possible additional path, to be stored in PATH_INFO.
1007n/a i = rest.find('/')
1008n/a if i >= 0:
1009n/a script, rest = rest[:i], rest[i:]
1010n/a else:
1011n/a script, rest = rest, ''
1012n/a
1013n/a scriptname = dir + '/' + script
1014n/a scriptfile = self.translate_path(scriptname)
1015n/a if not os.path.exists(scriptfile):
1016n/a self.send_error(
1017n/a HTTPStatus.NOT_FOUND,
1018n/a "No such CGI script (%r)" % scriptname)
1019n/a return
1020n/a if not os.path.isfile(scriptfile):
1021n/a self.send_error(
1022n/a HTTPStatus.FORBIDDEN,
1023n/a "CGI script is not a plain file (%r)" % scriptname)
1024n/a return
1025n/a ispy = self.is_python(scriptname)
1026n/a if self.have_fork or not ispy:
1027n/a if not self.is_executable(scriptfile):
1028n/a self.send_error(
1029n/a HTTPStatus.FORBIDDEN,
1030n/a "CGI script is not executable (%r)" % scriptname)
1031n/a return
1032n/a
1033n/a # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1034n/a # XXX Much of the following could be prepared ahead of time!
1035n/a env = copy.deepcopy(os.environ)
1036n/a env['SERVER_SOFTWARE'] = self.version_string()
1037n/a env['SERVER_NAME'] = self.server.server_name
1038n/a env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1039n/a env['SERVER_PROTOCOL'] = self.protocol_version
1040n/a env['SERVER_PORT'] = str(self.server.server_port)
1041n/a env['REQUEST_METHOD'] = self.command
1042n/a uqrest = urllib.parse.unquote(rest)
1043n/a env['PATH_INFO'] = uqrest
1044n/a env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1045n/a env['SCRIPT_NAME'] = scriptname
1046n/a if query:
1047n/a env['QUERY_STRING'] = query
1048n/a env['REMOTE_ADDR'] = self.client_address[0]
1049n/a authorization = self.headers.get("authorization")
1050n/a if authorization:
1051n/a authorization = authorization.split()
1052n/a if len(authorization) == 2:
1053n/a import base64, binascii
1054n/a env['AUTH_TYPE'] = authorization[0]
1055n/a if authorization[0].lower() == "basic":
1056n/a try:
1057n/a authorization = authorization[1].encode('ascii')
1058n/a authorization = base64.decodebytes(authorization).\
1059n/a decode('ascii')
1060n/a except (binascii.Error, UnicodeError):
1061n/a pass
1062n/a else:
1063n/a authorization = authorization.split(':')
1064n/a if len(authorization) == 2:
1065n/a env['REMOTE_USER'] = authorization[0]
1066n/a # XXX REMOTE_IDENT
1067n/a if self.headers.get('content-type') is None:
1068n/a env['CONTENT_TYPE'] = self.headers.get_content_type()
1069n/a else:
1070n/a env['CONTENT_TYPE'] = self.headers['content-type']
1071n/a length = self.headers.get('content-length')
1072n/a if length:
1073n/a env['CONTENT_LENGTH'] = length
1074n/a referer = self.headers.get('referer')
1075n/a if referer:
1076n/a env['HTTP_REFERER'] = referer
1077n/a accept = []
1078n/a for line in self.headers.getallmatchingheaders('accept'):
1079n/a if line[:1] in "\t\n\r ":
1080n/a accept.append(line.strip())
1081n/a else:
1082n/a accept = accept + line[7:].split(',')
1083n/a env['HTTP_ACCEPT'] = ','.join(accept)
1084n/a ua = self.headers.get('user-agent')
1085n/a if ua:
1086n/a env['HTTP_USER_AGENT'] = ua
1087n/a co = filter(None, self.headers.get_all('cookie', []))
1088n/a cookie_str = ', '.join(co)
1089n/a if cookie_str:
1090n/a env['HTTP_COOKIE'] = cookie_str
1091n/a # XXX Other HTTP_* headers
1092n/a # Since we're setting the env in the parent, provide empty
1093n/a # values to override previously set values
1094n/a for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1095n/a 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1096n/a env.setdefault(k, "")
1097n/a
1098n/a self.send_response(HTTPStatus.OK, "Script output follows")
1099n/a self.flush_headers()
1100n/a
1101n/a decoded_query = query.replace('+', ' ')
1102n/a
1103n/a if self.have_fork:
1104n/a # Unix -- fork as we should
1105n/a args = [script]
1106n/a if '=' not in decoded_query:
1107n/a args.append(decoded_query)
1108n/a nobody = nobody_uid()
1109n/a self.wfile.flush() # Always flush before forking
1110n/a pid = os.fork()
1111n/a if pid != 0:
1112n/a # Parent
1113n/a pid, sts = os.waitpid(pid, 0)
1114n/a # throw away additional data [see bug #427345]
1115n/a while select.select([self.rfile], [], [], 0)[0]:
1116n/a if not self.rfile.read(1):
1117n/a break
1118n/a if sts:
1119n/a self.log_error("CGI script exit status %#x", sts)
1120n/a return
1121n/a # Child
1122n/a try:
1123n/a try:
1124n/a os.setuid(nobody)
1125n/a except OSError:
1126n/a pass
1127n/a os.dup2(self.rfile.fileno(), 0)
1128n/a os.dup2(self.wfile.fileno(), 1)
1129n/a os.execve(scriptfile, args, env)
1130n/a except:
1131n/a self.server.handle_error(self.request, self.client_address)
1132n/a os._exit(127)
1133n/a
1134n/a else:
1135n/a # Non-Unix -- use subprocess
1136n/a import subprocess
1137n/a cmdline = [scriptfile]
1138n/a if self.is_python(scriptfile):
1139n/a interp = sys.executable
1140n/a if interp.lower().endswith("w.exe"):
1141n/a # On Windows, use python.exe, not pythonw.exe
1142n/a interp = interp[:-5] + interp[-4:]
1143n/a cmdline = [interp, '-u'] + cmdline
1144n/a if '=' not in query:
1145n/a cmdline.append(query)
1146n/a self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1147n/a try:
1148n/a nbytes = int(length)
1149n/a except (TypeError, ValueError):
1150n/a nbytes = 0
1151n/a p = subprocess.Popen(cmdline,
1152n/a stdin=subprocess.PIPE,
1153n/a stdout=subprocess.PIPE,
1154n/a stderr=subprocess.PIPE,
1155n/a env = env
1156n/a )
1157n/a if self.command.lower() == "post" and nbytes > 0:
1158n/a data = self.rfile.read(nbytes)
1159n/a else:
1160n/a data = None
1161n/a # throw away additional data [see bug #427345]
1162n/a while select.select([self.rfile._sock], [], [], 0)[0]:
1163n/a if not self.rfile._sock.recv(1):
1164n/a break
1165n/a stdout, stderr = p.communicate(data)
1166n/a self.wfile.write(stdout)
1167n/a if stderr:
1168n/a self.log_error('%s', stderr)
1169n/a p.stderr.close()
1170n/a p.stdout.close()
1171n/a status = p.returncode
1172n/a if status:
1173n/a self.log_error("CGI script exit status %#x", status)
1174n/a else:
1175n/a self.log_message("CGI script exited OK")
1176n/a
1177n/a
1178n/adef test(HandlerClass=BaseHTTPRequestHandler,
1179n/a ServerClass=HTTPServer, protocol="HTTP/1.0", port=8000, bind=""):
1180n/a """Test the HTTP request handler class.
1181n/a
1182n/a This runs an HTTP server on port 8000 (or the port argument).
1183n/a
1184n/a """
1185n/a server_address = (bind, port)
1186n/a
1187n/a HandlerClass.protocol_version = protocol
1188n/a with ServerClass(server_address, HandlerClass) as httpd:
1189n/a sa = httpd.socket.getsockname()
1190n/a serve_message = "Serving HTTP on {host} port {port} (http://{host}:{port}/) ..."
1191n/a print(serve_message.format(host=sa[0], port=sa[1]))
1192n/a try:
1193n/a httpd.serve_forever()
1194n/a except KeyboardInterrupt:
1195n/a print("\nKeyboard interrupt received, exiting.")
1196n/a sys.exit(0)
1197n/a
1198n/aif __name__ == '__main__':
1199n/a parser = argparse.ArgumentParser()
1200n/a parser.add_argument('--cgi', action='store_true',
1201n/a help='Run as CGI Server')
1202n/a parser.add_argument('--bind', '-b', default='', metavar='ADDRESS',
1203n/a help='Specify alternate bind address '
1204n/a '[default: all interfaces]')
1205n/a parser.add_argument('port', action='store',
1206n/a default=8000, type=int,
1207n/a nargs='?',
1208n/a help='Specify alternate port [default: 8000]')
1209n/a args = parser.parse_args()
1210n/a if args.cgi:
1211n/a handler_class = CGIHTTPRequestHandler
1212n/a else:
1213n/a handler_class = SimpleHTTPRequestHandler
1214n/a test(HandlerClass=handler_class, port=args.port, bind=args.bind)