ยปCore Development>Code coverage>Lib/httplib.py

Python code coverage for Lib/httplib.py

#countcontent
1n/a"""HTTP/1.1 client library
2n/a
3n/a<intro stuff goes here>
4n/a<other stuff, too>
5n/a
6n/aHTTPConnection goes through a number of "states", which define when a client
7n/amay legally make another request or fetch the response for a particular
8n/arequest. This diagram details these state transitions:
9n/a
10n/a (null)
11n/a |
12n/a | HTTPConnection()
13n/a v
14n/a Idle
15n/a |
16n/a | putrequest()
17n/a v
18n/a Request-started
19n/a |
20n/a | ( putheader() )* endheaders()
21n/a v
22n/a Request-sent
23n/a |
24n/a | response = getresponse()
25n/a v
26n/a Unread-response [Response-headers-read]
27n/a |\____________________
28n/a | |
29n/a | response.read() | putrequest()
30n/a v v
31n/a Idle Req-started-unread-response
32n/a ______/|
33n/a / |
34n/a response.read() | | ( putheader() )* endheaders()
35n/a v v
36n/a Request-started Req-sent-unread-response
37n/a |
38n/a | response.read()
39n/a v
40n/a Request-sent
41n/a
42n/aThis diagram presents the following rules:
43n/a -- a second request may not be started until {response-headers-read}
44n/a -- a response [object] cannot be retrieved until {request-sent}
45n/a -- there is no differentiation between an unread response body and a
46n/a partially read response body
47n/a
48n/aNote: this enforcement is applied by the HTTPConnection class. The
49n/a HTTPResponse class does not enforce this state machine, which
50n/a implies sophisticated clients may accelerate the request/response
51n/a pipeline. Caution should be taken, though: accelerating the states
52n/a beyond the above pattern may imply knowledge of the server's
53n/a connection-close behavior for certain requests. For example, it
54n/a is impossible to tell whether the server will close the connection
55n/a UNTIL the response headers have been read; this means that further
56n/a requests cannot be placed into the pipeline until it is known that
57n/a the server will NOT be closing the connection.
58n/a
59n/aLogical State __state __response
60n/a------------- ------- ----------
61n/aIdle _CS_IDLE None
62n/aRequest-started _CS_REQ_STARTED None
63n/aRequest-sent _CS_REQ_SENT None
64n/aUnread-response _CS_IDLE <response_class>
65n/aReq-started-unread-response _CS_REQ_STARTED <response_class>
66n/aReq-sent-unread-response _CS_REQ_SENT <response_class>
671"""
68n/a
691from array import array
701import socket
711from sys import py3kwarning
721from urlparse import urlsplit
731import warnings
741with warnings.catch_warnings():
751 if py3kwarning:
760 warnings.filterwarnings("ignore", ".*mimetools has been removed",
770 DeprecationWarning)
781 import mimetools
79n/a
801try:
811 from cStringIO import StringIO
820except ImportError:
830 from StringIO import StringIO
84n/a
851__all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
861 "HTTPException", "NotConnected", "UnknownProtocol",
871 "UnknownTransferEncoding", "UnimplementedFileMode",
881 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
891 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
901 "BadStatusLine", "error", "responses"]
91n/a
921HTTP_PORT = 80
931HTTPS_PORT = 443
94n/a
951_UNKNOWN = 'UNKNOWN'
96n/a
97n/a# connection states
981_CS_IDLE = 'Idle'
991_CS_REQ_STARTED = 'Request-started'
1001_CS_REQ_SENT = 'Request-sent'
101n/a
102n/a# status codes
103n/a# informational
1041CONTINUE = 100
1051SWITCHING_PROTOCOLS = 101
1061PROCESSING = 102
107n/a
108n/a# successful
1091OK = 200
1101CREATED = 201
1111ACCEPTED = 202
1121NON_AUTHORITATIVE_INFORMATION = 203
1131NO_CONTENT = 204
1141RESET_CONTENT = 205
1151PARTIAL_CONTENT = 206
1161MULTI_STATUS = 207
1171IM_USED = 226
118n/a
119n/a# redirection
1201MULTIPLE_CHOICES = 300
1211MOVED_PERMANENTLY = 301
1221FOUND = 302
1231SEE_OTHER = 303
1241NOT_MODIFIED = 304
1251USE_PROXY = 305
1261TEMPORARY_REDIRECT = 307
127n/a
128n/a# client error
1291BAD_REQUEST = 400
1301UNAUTHORIZED = 401
1311PAYMENT_REQUIRED = 402
1321FORBIDDEN = 403
1331NOT_FOUND = 404
1341METHOD_NOT_ALLOWED = 405
1351NOT_ACCEPTABLE = 406
1361PROXY_AUTHENTICATION_REQUIRED = 407
1371REQUEST_TIMEOUT = 408
1381CONFLICT = 409
1391GONE = 410
1401LENGTH_REQUIRED = 411
1411PRECONDITION_FAILED = 412
1421REQUEST_ENTITY_TOO_LARGE = 413
1431REQUEST_URI_TOO_LONG = 414
1441UNSUPPORTED_MEDIA_TYPE = 415
1451REQUESTED_RANGE_NOT_SATISFIABLE = 416
1461EXPECTATION_FAILED = 417
1471UNPROCESSABLE_ENTITY = 422
1481LOCKED = 423
1491FAILED_DEPENDENCY = 424
1501UPGRADE_REQUIRED = 426
151n/a
152n/a# server error
1531INTERNAL_SERVER_ERROR = 500
1541NOT_IMPLEMENTED = 501
1551BAD_GATEWAY = 502
1561SERVICE_UNAVAILABLE = 503
1571GATEWAY_TIMEOUT = 504
1581HTTP_VERSION_NOT_SUPPORTED = 505
1591INSUFFICIENT_STORAGE = 507
1601NOT_EXTENDED = 510
161n/a
162n/a# Mapping status codes to official W3C names
1631responses = {
1641 100: 'Continue',
1651 101: 'Switching Protocols',
166n/a
1671 200: 'OK',
1681 201: 'Created',
1691 202: 'Accepted',
1701 203: 'Non-Authoritative Information',
1711 204: 'No Content',
1721 205: 'Reset Content',
1731 206: 'Partial Content',
174n/a
1751 300: 'Multiple Choices',
1761 301: 'Moved Permanently',
1771 302: 'Found',
1781 303: 'See Other',
1791 304: 'Not Modified',
1801 305: 'Use Proxy',
1811 306: '(Unused)',
1821 307: 'Temporary Redirect',
183n/a
1841 400: 'Bad Request',
1851 401: 'Unauthorized',
1861 402: 'Payment Required',
1871 403: 'Forbidden',
1881 404: 'Not Found',
1891 405: 'Method Not Allowed',
1901 406: 'Not Acceptable',
1911 407: 'Proxy Authentication Required',
1921 408: 'Request Timeout',
1931 409: 'Conflict',
1941 410: 'Gone',
1951 411: 'Length Required',
1961 412: 'Precondition Failed',
1971 413: 'Request Entity Too Large',
1981 414: 'Request-URI Too Long',
1991 415: 'Unsupported Media Type',
2001 416: 'Requested Range Not Satisfiable',
2011 417: 'Expectation Failed',
202n/a
2031 500: 'Internal Server Error',
2041 501: 'Not Implemented',
2051 502: 'Bad Gateway',
2061 503: 'Service Unavailable',
2071 504: 'Gateway Timeout',
2081 505: 'HTTP Version Not Supported',
209n/a}
210n/a
211n/a# maximal amount of data to read at one time in _safe_read
2121MAXAMOUNT = 1048576
213n/a
2142class HTTPMessage(mimetools.Message):
215n/a
2161 def addheader(self, key, value):
217n/a """Add header for field key handling repeats."""
218624 prev = self.dict.get(key)
219624 if prev is None:
220623 self.dict[key] = value
221n/a else:
2221 combined = ", ".join((prev, value))
2231 self.dict[key] = combined
224n/a
2251 def addcontinue(self, key, more):
226n/a """Add more field data from a continuation line."""
2272 prev = self.dict[key]
2282 self.dict[key] = prev + "\n " + more
229n/a
2301 def readheaders(self):
231n/a """Read header lines.
232n/a
233n/a Read header lines up to the entirely blank line that terminates them.
234n/a The (normally blank) line that ends the headers is skipped, but not
235n/a included in the returned list. If a non-header line ends the headers,
236n/a (which is an error), an attempt is made to backspace over it; it is
237n/a never included in the returned list.
238n/a
239n/a The variable self.status is set to the empty string if all went well,
240n/a otherwise it is an error message. The variable self.headers is a
241n/a completely uninterpreted list of lines contained in the header (so
242n/a printing them will reproduce the header exactly as it appears in the
243n/a file).
244n/a
245n/a If multiple header fields with the same name occur, they are combined
246n/a according to the rules in RFC 2616 sec 4.2:
247n/a
248n/a Appending each subsequent field-value to the first, each separated
249n/a by a comma. The order in which header fields with the same field-name
250n/a are received is significant to the interpretation of the combined
251n/a field value.
252n/a """
253n/a # XXX The implementation overrides the readheaders() method of
254n/a # rfc822.Message. The base class design isn't amenable to
255n/a # customized behavior here so the method here is a copy of the
256n/a # base class code with a few small changes.
257n/a
258134 self.dict = {}
259134 self.unixfrom = ''
260134 self.headers = hlist = []
261134 self.status = ''
262134 headerseen = ""
263134 firstline = 1
264134 startofline = unread = tell = None
265134 if hasattr(self.fp, 'unread'):
2660 unread = self.fp.unread
267134 elif self.seekable:
2681 tell = self.fp.tell
269760 while True:
270760 if tell:
2711 try:
2721 startofline = tell()
2730 except IOError:
2740 startofline = tell = None
2750 self.seekable = 0
276760 line = self.fp.readline()
277760 if not line:
2782 self.status = 'EOF in headers'
2792 break
280n/a # Skip unix From name time lines
281758 if firstline and line.startswith('From '):
2820 self.unixfrom = self.unixfrom + line
2830 continue
284758 firstline = 0
285758 if headerseen and line[0] in ' \t':
286n/a # XXX Not sure if continuation lines are handled properly
287n/a # for http and/or for repeating headers
288n/a # It's a continuation line.
2892 hlist.append(line)
2902 self.addcontinue(headerseen, line.strip())
2912 continue
292756 elif self.iscomment(line):
293n/a # It's a comment. Ignore it.
2940 continue
295756 elif self.islast(line):
296n/a # Note! No pushback here! The delimiter line gets eaten.
297132 break
298624 headerseen = self.isheader(line)
299624 if headerseen:
300n/a # It's a legal header line, save it.
301624 hlist.append(line)
302624 self.addheader(headerseen, line[len(headerseen)+1:].strip())
303624 continue
304n/a else:
305n/a # It's not a header line; throw it back and stop here.
3060 if not self.dict:
3070 self.status = 'No headers'
308n/a else:
3090 self.status = 'Non-header line where header expected'
310n/a # Try to undo the read.
3110 if unread:
3120 unread(line)
3130 elif tell:
3140 self.fp.seek(startofline)
315n/a else:
3160 self.status = self.status + '; bad seek'
3170 break
318n/a
3192class HTTPResponse:
320n/a
321n/a # strict: If true, raise BadStatusLine if the status line can't be
322n/a # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
323n/a # false because it prevents clients from talking to HTTP/0.9
324n/a # servers. Note that a response with a sufficiently corrupted
325n/a # status line will look like an HTTP/0.9 response.
326n/a
327n/a # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
328n/a
3291 def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
330140 if buffering:
331n/a # The caller won't be using any sock.recv() calls, so buffering
332n/a # is fine and recommended for performance.
33376 self.fp = sock.makefile('rb')
334n/a else:
335n/a # The buffer size is specified as zero, because the headers of
336n/a # the response are read with readline(). If the reads were
337n/a # buffered the readline() calls could consume some of the
338n/a # response, which make be read via a recv() on the underlying
339n/a # socket.
34064 self.fp = sock.makefile('rb', 0)
341136 self.debuglevel = debuglevel
342136 self.strict = strict
343136 self._method = method
344n/a
345136 self.msg = None
346n/a
347n/a # from the Status-Line of the response
348136 self.version = _UNKNOWN # HTTP-Version
349136 self.status = _UNKNOWN # Status-Code
350136 self.reason = _UNKNOWN # Reason-Phrase
351n/a
352136 self.chunked = _UNKNOWN # is "chunked" being used?
353136 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
354136 self.length = _UNKNOWN # number of bytes left in response
355136 self.will_close = _UNKNOWN # conn will close at end of response
356n/a
3571 def _read_status(self):
358n/a # Initialize with Simple-Response defaults
359136 line = self.fp.readline()
360136 if self.debuglevel > 0:
3610 print "reply:", repr(line)
362136 if not line:
363n/a # Presumably, the server closed the connection before
364n/a # sending a valid response.
3651 raise BadStatusLine(line)
366135 try:
367135 [version, status, reason] = line.split(None, 2)
3682 except ValueError:
3692 try:
3702 [version, status] = line.split(None, 1)
3711 reason = ""
3721 except ValueError:
373n/a # empty version will cause next test to fail and status
374n/a # will be treated as 0.9 response.
3751 version = ""
376135 if not version.startswith('HTTP/'):
3771 if self.strict:
3780 self.close()
3790 raise BadStatusLine(line)
380n/a else:
381n/a # assume it's a Simple-Response from an 0.9 server
3821 self.fp = LineAndFileWrapper(line, self.fp)
3831 return "HTTP/0.9", 200, ""
384n/a
385n/a # The status code is a three-digit number
386134 try:
387134 status = int(status)
388133 if status < 100 or status > 999:
3890 raise BadStatusLine(line)
3901 except ValueError:
3911 raise BadStatusLine(line)
392133 return version, status, reason
393n/a
3941 def begin(self):
395136 if self.msg is not None:
396n/a # we've already started reading the response
3970 return
398n/a
399n/a # read until we get a non-100 response
400136 while True:
401136 version, status, reason = self._read_status()
402134 if status != CONTINUE:
403134 break
404n/a # skip the header from the 100 response
4050 while True:
4060 skip = self.fp.readline().strip()
4070 if not skip:
4080 break
4090 if self.debuglevel > 0:
4100 print "header:", skip
411n/a
412134 self.status = status
413134 self.reason = reason.strip()
414134 if version == 'HTTP/1.0':
41559 self.version = 10
41675 elif version.startswith('HTTP/1.'):
41774 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
4181 elif version == 'HTTP/0.9':
4191 self.version = 9
420n/a else:
4210 raise UnknownProtocol(version)
422n/a
423134 if self.version == 9:
4241 self.length = None
4251 self.chunked = 0
4261 self.will_close = 1
4271 self.msg = HTTPMessage(StringIO())
4281 return
429n/a
430133 self.msg = HTTPMessage(self.fp, 0)
431133 if self.debuglevel > 0:
4320 for hdr in self.msg.headers:
4330 print "header:", hdr,
434n/a
435n/a # don't let the msg keep an fp
436133 self.msg.fp = None
437n/a
438n/a # are we using the chunked-style of transfer encoding?
439133 tr_enc = self.msg.getheader('transfer-encoding')
440133 if tr_enc and tr_enc.lower() == "chunked":
4414 self.chunked = 1
4424 self.chunk_left = None
443n/a else:
444129 self.chunked = 0
445n/a
446n/a # will the connection close at the end of the response?
447133 self.will_close = self._check_close()
448n/a
449n/a # do we have a Content-Length?
450n/a # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
451133 length = self.msg.getheader('content-length')
452133 if length and not self.chunked:
45379 try:
45479 self.length = int(length)
4550 except ValueError:
4560 self.length = None
457n/a else:
45879 if self.length < 0: # ignore nonsensical negative lengths
4591 self.length = None
460n/a else:
46154 self.length = None
462n/a
463n/a # does the body have a fixed length? (of zero)
464133 if (status == NO_CONTENT or status == NOT_MODIFIED or
465131 100 <= status < 200 or # 1xx codes
466131 self._method == 'HEAD'):
4675 self.length = 0
468n/a
469n/a # if the connection remains open, and we aren't using chunked, and
470n/a # a content-length was not provided, then assume that the connection
471n/a # WILL close.
472133 if not self.will_close and \
47327 not self.chunked and \
47423 self.length is None:
4753 self.will_close = 1
476n/a
4771 def _check_close(self):
478133 conn = self.msg.getheader('connection')
479133 if self.version == 11:
480n/a # An HTTP/1.1 proxy is assumed to stay open unless
481n/a # explicitly closed.
48274 conn = self.msg.getheader('connection')
48374 if conn and "close" in conn.lower():
48447 return True
48527 return False
486n/a
487n/a # Some HTTP/1.0 implementations have support for persistent
488n/a # connections, using rules different than HTTP/1.1.
489n/a
490n/a # For older HTTP, Keep-Alive indicates persistent connection.
49159 if self.msg.getheader('keep-alive'):
4920 return False
493n/a
494n/a # At least Akamai returns a "Connection: Keep-Alive" header,
495n/a # which was supposed to be sent by the client.
49659 if conn and "keep-alive" in conn.lower():
4970 return False
498n/a
499n/a # Proxy-Connection is a netscape hack.
50059 pconn = self.msg.getheader('proxy-connection')
50159 if pconn and "keep-alive" in pconn.lower():
5020 return False
503n/a
504n/a # otherwise, assume it will close
50559 return True
506n/a
5071 def close(self):
508122 if self.fp:
509101 self.fp.close()
510101 self.fp = None
511n/a
5121 def isclosed(self):
513n/a # NOTE: it is possible that we will not ever call self.close(). This
514n/a # case occurs when will_close is TRUE, length is None, and we
515n/a # read up to the last byte, but NOT past it.
516n/a #
517n/a # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
518n/a # called, meaning self.isclosed() is meaningful.
51913 return self.fp is None
520n/a
521n/a # XXX It would be nice to have readline and __iter__ for this, too.
522n/a
5231 def read(self, amt=None):
5241073 if self.fp is None:
52558 return ''
526n/a
5271015 if self._method == 'HEAD':
5283 self.close()
5293 return ''
530n/a
5311012 if self.chunked:
5323 return self._read_chunked(amt)
533n/a
5341009 if amt is None:
535n/a # unbounded read
53627 if self.length is None:
53714 s = self.fp.read()
538n/a else:
53913 s = self._safe_read(self.length)
54012 self.length = 0
54126 self.close() # we read everything
54226 return s
543n/a
544982 if self.length is not None:
545968 if amt > self.length:
546n/a # clip the read to the "end of response"
54743 amt = self.length
548n/a
549n/a # we do not use _safe_read() here because this may be a .will_close
550n/a # connection, and the user is reading more bytes than will be provided
551n/a # (for example, reading in 1k chunks)
552982 s = self.fp.read(amt)
553982 if self.length is not None:
554968 self.length -= len(s)
555968 if not self.length:
55644 self.close()
557982 return s
558n/a
5591 def _read_chunked(self, amt):
5603 assert self.chunked != _UNKNOWN
5613 chunk_left = self.chunk_left
5623 value = []
5639 while True:
5649 if chunk_left is None:
5659 line = self.fp.readline()
5669 i = line.find(';')
5679 if i >= 0:
5680 line = line[:i] # strip chunk-extensions
5699 try:
5709 chunk_left = int(line, 16)
5712 except ValueError:
572n/a # close the connection as protocol synchronisation is
573n/a # probably lost
5742 self.close()
5752 raise IncompleteRead(''.join(value))
5767 if chunk_left == 0:
5771 break
5786 if amt is None:
5796 value.append(self._safe_read(chunk_left))
5800 elif amt < chunk_left:
5810 value.append(self._safe_read(amt))
5820 self.chunk_left = chunk_left - amt
5830 return ''.join(value)
5840 elif amt == chunk_left:
5850 value.append(self._safe_read(amt))
5860 self._safe_read(2) # toss the CRLF at the end of the chunk
5870 self.chunk_left = None
5880 return ''.join(value)
589n/a else:
5900 value.append(self._safe_read(chunk_left))
5910 amt -= chunk_left
592n/a
593n/a # we read the whole chunk, get another
5946 self._safe_read(2) # toss the CRLF at the end of the chunk
5956 chunk_left = None
596n/a
597n/a # read and discard trailer up to the CRLF terminator
598n/a ### note: we shouldn't have any trailers!
5991 while True:
6001 line = self.fp.readline()
6011 if not line:
602n/a # a vanishingly small number of sites EOF without
603n/a # sending the trailer
6041 break
6050 if line == '\r\n':
6060 break
607n/a
608n/a # we read everything; close the "file"
6091 self.close()
610n/a
6111 return ''.join(value)
612n/a
6131 def _safe_read(self, amt):
614n/a """Read the number of bytes requested, compensating for partial reads.
615n/a
616n/a Normally, we have a blocking socket, but a read() can be interrupted
617n/a by a signal (resulting in a partial read).
618n/a
619n/a Note that we cannot distinguish between EOF and an interrupt when zero
620n/a bytes have been read. IncompleteRead() will be raised in this
621n/a situation.
622n/a
623n/a This function should be used when <amt> bytes "should" be present for
624n/a reading. If the bytes are truly not available (due to EOF), then the
625n/a IncompleteRead exception can be used to detect the problem.
626n/a """
627n/a # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
628n/a # return less than x bytes unless EOF is encountered. It now handles
629n/a # signal interruptions (socket.error EINTR) internally. This code
630n/a # never caught that exception anyways. It seems largely pointless.
631n/a # self.fp.read(amt) will work fine.
63225 s = []
63347 while amt > 0:
63423 chunk = self.fp.read(min(amt, MAXAMOUNT))
63523 if not chunk:
6361 raise IncompleteRead(''.join(s), amt)
63722 s.append(chunk)
63822 amt -= len(chunk)
63924 return ''.join(s)
640n/a
6411 def getheader(self, name, default=None):
64245 if self.msg is None:
6430 raise ResponseNotReady()
64445 return self.msg.getheader(name, default)
645n/a
6461 def getheaders(self):
647n/a """Return list of (header, value) tuples."""
6480 if self.msg is None:
6490 raise ResponseNotReady()
6500 return self.msg.items()
651n/a
652n/a
6532class HTTPConnection:
654n/a
6551 _http_vsn = 11
6561 _http_vsn_str = 'HTTP/1.1'
657n/a
6581 response_class = HTTPResponse
6591 default_port = HTTP_PORT
6601 auto_open = 1
6611 debuglevel = 0
6621 strict = 0
663n/a
6641 def __init__(self, host, port=None, strict=None,
6651 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
666141 self.timeout = timeout
667141 self.source_address = source_address
668141 self.sock = None
669141 self._buffer = []
670141 self.__response = None
671141 self.__state = _CS_IDLE
672141 self._method = None
673141 self._tunnel_host = None
674141 self._tunnel_port = None
675141 self._tunnel_headers = {}
676n/a
677141 self._set_hostport(host, port)
678139 if strict is not None:
6790 self.strict = strict
680n/a
6811 def set_tunnel(self, host, port=None, headers=None):
682n/a """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
683n/a
684n/a The headers argument should be a mapping of extra HTTP headers
685n/a to send with the CONNECT request.
686n/a """
6870 self._tunnel_host = host
6880 self._tunnel_port = port
6890 if headers:
6900 self._tunnel_headers = headers
691n/a else:
6920 self._tunnel_headers.clear()
693n/a
6941 def _set_hostport(self, host, port):
695141 if port is None:
696105 i = host.rfind(':')
697105 j = host.rfind(']') # ipv6 addresses have [...]
698105 if i > j:
69953 try:
70053 port = int(host[i+1:])
7012 except ValueError:
7022 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
70351 host = host[:i]
704n/a else:
70552 port = self.default_port
706103 if host and host[0] == '[' and host[-1] == ']':
7072 host = host[1:-1]
708139 self.host = host
709139 self.port = port
710n/a
7111 def set_debuglevel(self, level):
71241 self.debuglevel = level
713n/a
7141 def _tunnel(self):
7150 self._set_hostport(self._tunnel_host, self._tunnel_port)
7160 self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port))
7170 for header, value in self._tunnel_headers.iteritems():
7180 self.send("%s: %s\r\n" % (header, value))
7190 self.send("\r\n")
7200 response = self.response_class(self.sock, strict = self.strict,
7210 method = self._method)
7220 (version, code, message) = response._read_status()
723n/a
7240 if code != 200:
7250 self.close()
7260 raise socket.error("Tunnel connection failed: %d %s" % (code,
7270 message.strip()))
7280 while True:
7290 line = response.fp.readline()
7300 if line == '\r\n': break
731n/a
732n/a
7331 def connect(self):
734n/a """Connect to the host and port specified in __init__."""
735119 self.sock = socket.create_connection((self.host,self.port),
736119 self.timeout, self.source_address)
737n/a
738117 if self._tunnel_host:
7390 self._tunnel()
740n/a
7411 def close(self):
742n/a """Close the connection to the HTTP server."""
743125 if self.sock:
744118 self.sock.close() # close it manually... there may be other refs
745118 self.sock = None
746125 if self.__response:
7472 self.__response.close()
7482 self.__response = None
749125 self.__state = _CS_IDLE
750n/a
7511 def send(self, str):
752n/a """Send `str' to the server."""
753143 if self.sock is None:
754105 if self.auto_open:
755105 self.connect()
756n/a else:
7570 raise NotConnected()
758n/a
759n/a # send the data to the server. if we get a broken pipe, then close
760n/a # the socket. we want to reconnect when somebody tries to send again.
761n/a #
762n/a # NOTE: we DO propagate the error, though, because we cannot simply
763n/a # ignore the error... the caller will know if they can retry.
764141 if self.debuglevel > 0:
7650 print "send:", repr(str)
766141 try:
767141 blocksize=8192
768141 if hasattr(str,'read') and not isinstance(str, array):
7692 if self.debuglevel > 0: print "sendIng a read()able"
7702 data=str.read(blocksize)
7715 while data:
7723 self.sock.sendall(data)
7733 data=str.read(blocksize)
774n/a else:
775139 self.sock.sendall(str)
7760 except socket.error, v:
7770 if v.args[0] == 32: # Broken pipe
7780 self.close()
7790 raise
780n/a
7811 def _output(self, s):
782n/a """Add a line of output to the current request buffer.
783n/a
784n/a Assumes that the line does *not* end with \\r\\n.
785n/a """
786633 self._buffer.append(s)
787n/a
7881 def _send_output(self, message_body=None):
789n/a """Send the currently buffered request and clear the buffer.
790n/a
791n/a Appends an extra \\r\\n to the buffer.
792n/a A message_body may be specified, to be appended to the request.
793n/a """
794139 self._buffer.extend(("", ""))
795139 msg = "\r\n".join(self._buffer)
796139 del self._buffer[:]
797n/a # If msg and message_body are sent in a single send() call,
798n/a # it will avoid performance problems caused by the interaction
799n/a # between delayed ack and the Nagle algorithim.
800139 if isinstance(message_body, str):
80144 msg += message_body
80244 message_body = None
803139 self.send(msg)
804137 if message_body is not None:
805n/a #message_body was not a string (i.e. it is a file) and
806n/a #we must run the risk of Nagle
8071 self.send(message_body)
808n/a
8091 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
810n/a """Send a request to the server.
811n/a
812n/a `method' specifies an HTTP request method, e.g. 'GET'.
813n/a `url' specifies the object being requested, e.g. '/index.html'.
814n/a `skip_host' if True does not add automatically a 'Host:' header
815n/a `skip_accept_encoding' if True does not add automatically an
816n/a 'Accept-Encoding:' header
817n/a """
818n/a
819n/a # if a prior response has been completed, then forget about it.
820139 if self.__response and self.__response.isclosed():
8218 self.__response = None
822n/a
823n/a
824n/a # in certain cases, we cannot issue another request on this connection.
825n/a # this occurs when:
826n/a # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
827n/a # 2) a response to a previous request has signalled that it is going
828n/a # to close the connection upon completion.
829n/a # 3) the headers for the previous response have not been read, thus
830n/a # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
831n/a #
832n/a # if there is no prior response, then we can request at will.
833n/a #
834n/a # if point (2) is true, then we will have passed the socket to the
835n/a # response (effectively meaning, "there is no prior response"), and
836n/a # will open a new one when a new request is made.
837n/a #
838n/a # Note: if a prior response exists, then we *can* start a new request.
839n/a # We are not allowed to begin fetching the response to this new
840n/a # request, however, until that prior response is complete.
841n/a #
842139 if self.__state == _CS_IDLE:
843139 self.__state = _CS_REQ_STARTED
844n/a else:
8450 raise CannotSendRequest()
846n/a
847n/a # Save the method we use, we need it later in the response phase
848139 self._method = method
849139 if not url:
85010 url = '/'
851139 str = '%s %s %s' % (method, url, self._http_vsn_str)
852n/a
853139 self._output(str)
854n/a
855139 if self._http_vsn == 11:
856n/a # Issue some standard headers for better HTTP/1.1 compliance
857n/a
858120 if not skip_host:
859n/a # this header is issued *only* for HTTP/1.1
860n/a # connections. more specifically, this means it is
861n/a # only issued when the client uses the new
862n/a # HTTPConnection() class. backwards-compat clients
863n/a # will be using HTTP/1.0 and those clients may be
864n/a # issuing this header themselves. we should NOT issue
865n/a # it twice; some web servers (such as Apache) barf
866n/a # when they see two Host: headers
867n/a
868n/a # If we need a non-standard port,include it in the
869n/a # header. If the request is going through a proxy,
870n/a # but the host of the actual URL, not the host of the
871n/a # proxy.
872n/a
87378 netloc = ''
87478 if url.startswith('http'):
8750 nil, netloc, nil, nil, nil = urlsplit(url)
876n/a
87778 if netloc:
8780 try:
8790 netloc_enc = netloc.encode("ascii")
8800 except UnicodeEncodeError:
8810 netloc_enc = netloc.encode("idna")
8820 self.putheader('Host', netloc_enc)
883n/a else:
88478 try:
88578 host_enc = self.host.encode("ascii")
8860 except UnicodeEncodeError:
8870 host_enc = self.host.encode("idna")
88878 if self.port == self.default_port:
88910 self.putheader('Host', host_enc)
890n/a else:
89168 self.putheader('Host', "%s:%s" % (host_enc, self.port))
892n/a
893n/a # note: we are assuming that clients will not attempt to set these
894n/a # headers since *this* library must deal with the
895n/a # consequences. this also means that when the supporting
896n/a # libraries are updated to recognize other forms, then this
897n/a # code should be changed (removed or updated).
898n/a
899n/a # we only want a Content-Encoding of "identity" since we don't
900n/a # support encodings such as x-gzip or x-deflate.
901120 if not skip_accept_encoding:
90283 self.putheader('Accept-Encoding', 'identity')
903n/a
904n/a # we can accept "chunked" Transfer-Encodings, but no others
905n/a # NOTE: no TE header implies *only* "chunked"
906n/a #self.putheader('TE', 'chunked')
907n/a
908n/a # if TE is supplied in the header, then it must appear in a
909n/a # Connection header.
910n/a #self.putheader('Connection', 'TE')
911n/a
912n/a else:
913n/a # For HTTP/1.0, the server will assume "not chunked"
914139 pass
915n/a
9161 def putheader(self, header, *values):
917n/a """Send a request header line to the server.
918n/a
919n/a For example: h.putheader('Accept', 'text/html')
920n/a """
921494 if self.__state != _CS_REQ_STARTED:
9220 raise CannotSendHeader()
923n/a
924494 str = '%s: %s' % (header, '\r\n\t'.join(values))
925494 self._output(str)
926n/a
9271 def endheaders(self, message_body=None):
928n/a """Indicate that the last header line has been sent to the server.
929n/a
930n/a This method sends the request to the server. The optional
931n/a message_body argument can be used to pass message body
932n/a associated with the request. The message body will be sent in
933n/a the same packet as the message headers if possible. The
934n/a message_body should be a string.
935n/a """
936139 if self.__state == _CS_REQ_STARTED:
937139 self.__state = _CS_REQ_SENT
938n/a else:
9390 raise CannotSendHeader()
940139 self._send_output(message_body)
941n/a
9421 def request(self, method, url, body=None, headers={}):
943n/a """Send a complete request to the server."""
944n/a
94576 try:
94676 self._send_request(method, url, body, headers)
9471 except socket.error, v:
948n/a # trap 'Broken pipe' if we're allowed to automatically reconnect
9491 if v.args[0] != 32 or not self.auto_open:
9501 raise
951n/a # try one more time
9520 self._send_request(method, url, body, headers)
953n/a
9541 def _set_content_length(self, body):
955n/a # Set the content-length based on the body.
9567 thelen = None
9577 try:
9587 thelen = str(len(body))
9591 except TypeError, te:
960n/a # If this is a file-like object, try to
961n/a # fstat its file descriptor
9621 import os
9631 try:
9641 thelen = str(os.fstat(body.fileno()).st_size)
9650 except (AttributeError, OSError):
966n/a # Don't send a length if this failed
9670 if self.debuglevel > 0: print "Cannot stat!!"
968n/a
9697 if thelen is not None:
9707 self.putheader('Content-Length', thelen)
971n/a
9721 def _send_request(self, method, url, body, headers):
973n/a # honour explicitly requested Host: and Accept-Encoding headers
974214 header_names = dict.fromkeys([k.lower() for k in headers])
97576 skips = {}
97676 if 'host' in header_names:
97742 skips['skip_host'] = 1
97876 if 'accept-encoding' in header_names:
9791 skips['skip_accept_encoding'] = 1
980n/a
98176 self.putrequest(method, url, **skips)
982n/a
98376 if body and ('content-length' not in header_names):
9847 self._set_content_length(body)
985214 for hdr, value in headers.iteritems():
986138 self.putheader(hdr, value)
98776 self.endheaders(body)
988n/a
9891 def getresponse(self, buffering=False):
990n/a "Get the response from the server."
991n/a
992n/a # if a prior response has been completed, then forget about it.
993129 if self.__response and self.__response.isclosed():
9940 self.__response = None
995n/a
996n/a #
997n/a # if a prior response exists, then it must be completed (otherwise, we
998n/a # cannot read this response's header to determine the connection-close
999n/a # behavior)
1000n/a #
1001n/a # note: if a prior response existed, but was connection-close, then the
1002n/a # socket and response were made independent of this HTTPConnection
1003n/a # object since a new request requires that we open a whole new
1004n/a # connection
1005n/a #
1006n/a # this means the prior response had one of two states:
1007n/a # 1) will_close: this connection was reset and the prior socket and
1008n/a # response operate independently
1009n/a # 2) persistent: the response was retained and we await its
1010n/a # isclosed() status to become true.
1011n/a #
1012129 if self.__state != _CS_REQ_SENT or self.__response:
10130 raise ResponseNotReady()
1014n/a
1015129 args = (self.sock,)
1016129 kwds = {"strict":self.strict, "method":self._method}
1017129 if self.debuglevel > 0:
10180 args += (self.debuglevel,)
1019129 if buffering:
1020n/a #only add this keyword if non-default, for compatibility with
1021n/a #other response_classes.
102276 kwds["buffering"] = True;
1023129 response = self.response_class(*args, **kwds)
1024n/a
1025125 response.begin()
1026124 assert response.will_close != _UNKNOWN
1027124 self.__state = _CS_IDLE
1028n/a
1029124 if response.will_close:
1030n/a # this effectively passes the connection to the response
1031107 self.close()
1032n/a else:
1033n/a # remember this, so we can tell when it is complete
103417 self.__response = response
1035n/a
1036124 return response
1037n/a
1038n/a
10392class HTTP:
10401 "Compatibility class with httplib.py from 1.5."
1041n/a
10421 _http_vsn = 10
10431 _http_vsn_str = 'HTTP/1.0'
1044n/a
10451 debuglevel = 0
1046n/a
10471 _connection_class = HTTPConnection
1048n/a
10491 def __init__(self, host='', port=None, strict=None):
1050n/a "Provide a default host, since the superclass requires one."
1051n/a
1052n/a # some joker passed 0 explicitly, meaning default port
105323 if port == 0:
10540 port = None
1055n/a
1056n/a # Note that we may pass an empty string as the host; this will throw
1057n/a # an error when we attempt to connect. Presumably, the client code
1058n/a # will call connect before then, with a proper host.
105923 self._setup(self._connection_class(host, port, strict))
1060n/a
10611 def _setup(self, conn):
106222 self._conn = conn
1063n/a
1064n/a # set up delegation to flesh out interface
106522 self.send = conn.send
106622 self.putrequest = conn.putrequest
106722 self.putheader = conn.putheader
106822 self.endheaders = conn.endheaders
106922 self.set_debuglevel = conn.set_debuglevel
1070n/a
107122 conn._http_vsn = self._http_vsn
107222 conn._http_vsn_str = self._http_vsn_str
1073n/a
107422 self.file = None
1075n/a
10761 def connect(self, host=None, port=None):
1077n/a "Accept arguments to set the host/port, since the superclass doesn't."
1078n/a
10790 if host is not None:
10800 self._conn._set_hostport(host, port)
10810 self._conn.connect()
1082n/a
10831 def getfile(self):
1084n/a "Provide a getfile, since the superclass' does not use this concept."
108517 return self.file
1086n/a
10871 def getreply(self, buffering=False):
1088n/a """Compat definition since superclass does not define it.
1089n/a
1090n/a Returns a tuple consisting of:
1091n/a - server status code (e.g. '200' if all goes well)
1092n/a - server "reason" corresponding to status code
1093n/a - any RFC822 headers in the response from the server
1094n/a """
109517 try:
109617 if not buffering:
109717 response = self._conn.getresponse()
1098n/a else:
1099n/a #only add this keyword if non-default for compatibility
1100n/a #with other connection classes
11010 response = self._conn.getresponse(buffering)
11021 except BadStatusLine, e:
1103n/a ### hmm. if getresponse() ever closes the socket on a bad request,
1104n/a ### then we are going to have problems with self.sock
1105n/a
1106n/a ### should we keep this behavior? do people use it?
1107n/a # keep the socket open (as a file), and return it
11081 self.file = self._conn.sock.makefile('rb', 0)
1109n/a
1110n/a # close our socket -- we want to restart after any protocol error
11111 self.close()
1112n/a
11131 self.headers = None
11141 return -1, e.line, None
1115n/a
111616 self.headers = response.msg
111716 self.file = response.fp
111816 return response.status, response.reason, response.msg
1119n/a
11201 def close(self):
11211 self._conn.close()
1122n/a
1123n/a # note that self.file == response.fp, which gets closed by the
1124n/a # superclass. just clear the object ref here.
1125n/a ### hmm. messy. if status==-1, then self.file is owned by us.
1126n/a ### well... we aren't explicitly closing, but losing this ref will
1127n/a ### do it
11281 self.file = None
1129n/a
11301try:
11311 import ssl
11320except ImportError:
11330 pass
1134n/aelse:
11352 class HTTPSConnection(HTTPConnection):
11361 "This class allows communication via SSL."
1137n/a
11381 default_port = HTTPS_PORT
1139n/a
11401 def __init__(self, host, port=None, key_file=None, cert_file=None,
11411 strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
11421 source_address=None):
11433 HTTPConnection.__init__(self, host, port, strict, timeout,
11443 source_address)
11453 self.key_file = key_file
11463 self.cert_file = cert_file
1147n/a
11481 def connect(self):
1149n/a "Connect to a host on a given (SSL) port."
1150n/a
11511 sock = socket.create_connection((self.host, self.port),
11521 self.timeout, self.source_address)
11531 if self._tunnel_host:
11540 self.sock = sock
11550 self._tunnel()
11561 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1157n/a
11581 __all__.append("HTTPSConnection")
1159n/a
11602 class HTTPS(HTTP):
1161n/a """Compatibility with 1.5 httplib interface
1162n/a
1163n/a Python 1.5.2 did not have an HTTPS class, but it defined an
1164n/a interface for sending http requests that is also useful for
1165n/a https.
11661 """
1167n/a
11681 _connection_class = HTTPSConnection
1169n/a
11701 def __init__(self, host='', port=None, key_file=None, cert_file=None,
11711 strict=None):
1172n/a # provide a default host, pass the X509 cert info
1173n/a
1174n/a # urf. compensate for bad input.
11751 if port == 0:
11761 port = None
11771 self._setup(self._connection_class(host, port, key_file,
11781 cert_file, strict))
1179n/a
1180n/a # we never actually use these for anything, but we keep them
1181n/a # here for compatibility with post-1.5.2 CVS.
11821 self.key_file = key_file
11831 self.cert_file = cert_file
1184n/a
1185n/a
11861 def FakeSocket (sock, sslobj):
11870 warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " +
11880 "Use the result of ssl.wrap_socket() directly instead.",
11890 DeprecationWarning, stacklevel=2)
11900 return sslobj
1191n/a
1192n/a
11932class HTTPException(Exception):
1194n/a # Subclasses that define an __init__ must call Exception.__init__
1195n/a # or define self.args. Otherwise, str() will fail.
11961 pass
1197n/a
11982class NotConnected(HTTPException):
11991 pass
1200n/a
12012class InvalidURL(HTTPException):
12021 pass
1203n/a
12042class UnknownProtocol(HTTPException):
12051 def __init__(self, version):
12060 self.args = version,
12070 self.version = version
1208n/a
12092class UnknownTransferEncoding(HTTPException):
12101 pass
1211n/a
12122class UnimplementedFileMode(HTTPException):
12131 pass
1214n/a
12152class IncompleteRead(HTTPException):
12161 def __init__(self, partial, expected=None):
12173 self.args = partial,
12183 self.partial = partial
12193 self.expected = expected
12201 def __repr__(self):
12216 if self.expected is not None:
12222 e = ', %i more expected' % self.expected
1223n/a else:
12244 e = ''
12256 return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
12261 def __str__(self):
12273 return repr(self)
1228n/a
12292class ImproperConnectionState(HTTPException):
12301 pass
1231n/a
12322class CannotSendRequest(ImproperConnectionState):
12331 pass
1234n/a
12352class CannotSendHeader(ImproperConnectionState):
12361 pass
1237n/a
12382class ResponseNotReady(ImproperConnectionState):
12391 pass
1240n/a
12412class BadStatusLine(HTTPException):
12421 def __init__(self, line):
12433 if not line:
12442 line = repr(line)
12453 self.args = line,
12463 self.line = line
1247n/a
1248n/a# for backwards compatibility
12491error = HTTPException
1250n/a
12512class LineAndFileWrapper:
12521 """A limited file-like object for HTTP/0.9 responses."""
1253n/a
1254n/a # The status-line parsing code calls readline(), which normally
1255n/a # get the HTTP status line. For a 0.9 response, however, this is
1256n/a # actually the first line of the body! Clients need to get a
1257n/a # readable file object that contains that line.
1258n/a
12591 def __init__(self, line, file):
12601 self._line = line
12611 self._file = file
12621 self._line_consumed = 0
12631 self._line_offset = 0
12641 self._line_left = len(line)
1265n/a
12661 def __getattr__(self, attr):
12675 return getattr(self._file, attr)
1268n/a
12691 def _done(self):
1270n/a # called when the last byte is read from the line. After the
1271n/a # call, all read methods are delegated to the underlying file
1272n/a # object.
12731 self._line_consumed = 1
12741 self.read = self._file.read
12751 self.readline = self._file.readline
12761 self.readlines = self._file.readlines
1277n/a
12781 def read(self, amt=None):
12790 if self._line_consumed:
12800 return self._file.read(amt)
12810 assert self._line_left
12820 if amt is None or amt > self._line_left:
12830 s = self._line[self._line_offset:]
12840 self._done()
12850 if amt is None:
12860 return s + self._file.read()
1287n/a else:
12880 return s + self._file.read(amt - len(s))
1289n/a else:
12900 assert amt <= self._line_left
12910 i = self._line_offset
12920 j = i + amt
12930 s = self._line[i:j]
12940 self._line_offset = j
12950 self._line_left -= amt
12960 if self._line_left == 0:
12970 self._done()
12980 return s
1299n/a
13001 def readline(self):
13012 if self._line_consumed:
13021 return self._file.readline()
13031 assert self._line_left
13041 s = self._line[self._line_offset:]
13051 self._done()
13061 return s
1307n/a
13081 def readlines(self, size=None):
13090 if self._line_consumed:
13100 return self._file.readlines(size)
13110 assert self._line_left
13120 L = [self._line[self._line_offset:]]
13130 self._done()
13140 if size is None:
13150 return L + self._file.readlines()
1316n/a else:
13170 return L + self._file.readlines(size)
1318n/a
13191def test():
1320n/a """Test this module.
1321n/a
1322n/a A hodge podge of tests collected here, because they have too many
1323n/a external dependencies for the regular test suite.
1324n/a """
1325n/a
13260 import sys
13270 import getopt
13280 opts, args = getopt.getopt(sys.argv[1:], 'd')
13290 dl = 0
13300 for o, a in opts:
13310 if o == '-d': dl = dl + 1
13320 host = 'www.python.org'
13330 selector = '/'
13340 if args[0:]: host = args[0]
13350 if args[1:]: selector = args[1]
13360 h = HTTP()
13370 h.set_debuglevel(dl)
13380 h.connect(host)
13390 h.putrequest('GET', selector)
13400 h.endheaders()
13410 status, reason, headers = h.getreply()
13420 print 'status =', status
13430 print 'reason =', reason
13440 print "read", len(h.getfile().read())
13450 print
13460 if headers:
13470 for header in headers.headers: print header.strip()
13480 print
1349n/a
1350n/a # minimal test that code to extract host from url works
13510 class HTTP11(HTTP):
13520 _http_vsn = 11
13530 _http_vsn_str = 'HTTP/1.1'
1354n/a
13550 h = HTTP11('www.python.org')
13560 h.putrequest('GET', 'http://www.python.org/~jeremy/')
13570 h.endheaders()
13580 h.getreply()
13590 h.close()
1360n/a
13610 try:
13620 import ssl
13630 except ImportError:
13640 pass
1365n/a else:
1366n/a
13670 for host, selector in (('sourceforge.net', '/projects/python'),
1368n/a ):
13690 print "https://%s%s" % (host, selector)
13700 hs = HTTPS()
13710 hs.set_debuglevel(dl)
13720 hs.connect(host)
13730 hs.putrequest('GET', selector)
13740 hs.endheaders()
13750 status, reason, headers = hs.getreply()
13760 print 'status =', status
13770 print 'reason =', reason
13780 print "read", len(hs.getfile().read())
13790 print
13800 if headers:
13810 for header in headers.headers: print header.strip()
13820 print
1383n/a
13841if __name__ == '__main__':
13850 test()