| 1 | n/a | r"""HTTP/1.1 client library |
|---|
| 2 | n/a | |
|---|
| 3 | n/a | <intro stuff goes here> |
|---|
| 4 | n/a | <other stuff, too> |
|---|
| 5 | n/a | |
|---|
| 6 | n/a | HTTPConnection goes through a number of "states", which define when a client |
|---|
| 7 | n/a | may legally make another request or fetch the response for a particular |
|---|
| 8 | n/a | request. This diagram details these state transitions: |
|---|
| 9 | n/a | |
|---|
| 10 | n/a | (null) |
|---|
| 11 | n/a | | |
|---|
| 12 | n/a | | HTTPConnection() |
|---|
| 13 | n/a | v |
|---|
| 14 | n/a | Idle |
|---|
| 15 | n/a | | |
|---|
| 16 | n/a | | putrequest() |
|---|
| 17 | n/a | v |
|---|
| 18 | n/a | Request-started |
|---|
| 19 | n/a | | |
|---|
| 20 | n/a | | ( putheader() )* endheaders() |
|---|
| 21 | n/a | v |
|---|
| 22 | n/a | Request-sent |
|---|
| 23 | n/a | |\_____________________________ |
|---|
| 24 | n/a | | | getresponse() raises |
|---|
| 25 | n/a | | response = getresponse() | ConnectionError |
|---|
| 26 | n/a | v v |
|---|
| 27 | n/a | Unread-response Idle |
|---|
| 28 | n/a | [Response-headers-read] |
|---|
| 29 | n/a | |\____________________ |
|---|
| 30 | n/a | | | |
|---|
| 31 | n/a | | response.read() | putrequest() |
|---|
| 32 | n/a | v v |
|---|
| 33 | n/a | Idle Req-started-unread-response |
|---|
| 34 | n/a | ______/| |
|---|
| 35 | n/a | / | |
|---|
| 36 | n/a | response.read() | | ( putheader() )* endheaders() |
|---|
| 37 | n/a | v v |
|---|
| 38 | n/a | Request-started Req-sent-unread-response |
|---|
| 39 | n/a | | |
|---|
| 40 | n/a | | response.read() |
|---|
| 41 | n/a | v |
|---|
| 42 | n/a | Request-sent |
|---|
| 43 | n/a | |
|---|
| 44 | n/a | This diagram presents the following rules: |
|---|
| 45 | n/a | -- a second request may not be started until {response-headers-read} |
|---|
| 46 | n/a | -- a response [object] cannot be retrieved until {request-sent} |
|---|
| 47 | n/a | -- there is no differentiation between an unread response body and a |
|---|
| 48 | n/a | partially read response body |
|---|
| 49 | n/a | |
|---|
| 50 | n/a | Note: this enforcement is applied by the HTTPConnection class. The |
|---|
| 51 | n/a | HTTPResponse class does not enforce this state machine, which |
|---|
| 52 | n/a | implies sophisticated clients may accelerate the request/response |
|---|
| 53 | n/a | pipeline. Caution should be taken, though: accelerating the states |
|---|
| 54 | n/a | beyond the above pattern may imply knowledge of the server's |
|---|
| 55 | n/a | connection-close behavior for certain requests. For example, it |
|---|
| 56 | n/a | is impossible to tell whether the server will close the connection |
|---|
| 57 | n/a | UNTIL the response headers have been read; this means that further |
|---|
| 58 | n/a | requests cannot be placed into the pipeline until it is known that |
|---|
| 59 | n/a | the server will NOT be closing the connection. |
|---|
| 60 | n/a | |
|---|
| 61 | n/a | Logical State __state __response |
|---|
| 62 | n/a | ------------- ------- ---------- |
|---|
| 63 | n/a | Idle _CS_IDLE None |
|---|
| 64 | n/a | Request-started _CS_REQ_STARTED None |
|---|
| 65 | n/a | Request-sent _CS_REQ_SENT None |
|---|
| 66 | n/a | Unread-response _CS_IDLE <response_class> |
|---|
| 67 | n/a | Req-started-unread-response _CS_REQ_STARTED <response_class> |
|---|
| 68 | n/a | Req-sent-unread-response _CS_REQ_SENT <response_class> |
|---|
| 69 | n/a | """ |
|---|
| 70 | n/a | |
|---|
| 71 | n/a | import email.parser |
|---|
| 72 | n/a | import email.message |
|---|
| 73 | n/a | import http |
|---|
| 74 | n/a | import io |
|---|
| 75 | n/a | import re |
|---|
| 76 | n/a | import socket |
|---|
| 77 | n/a | import collections |
|---|
| 78 | n/a | from urllib.parse import urlsplit |
|---|
| 79 | n/a | |
|---|
| 80 | n/a | # HTTPMessage, parse_headers(), and the HTTP status code constants are |
|---|
| 81 | n/a | # intentionally omitted for simplicity |
|---|
| 82 | n/a | __all__ = ["HTTPResponse", "HTTPConnection", |
|---|
| 83 | n/a | "HTTPException", "NotConnected", "UnknownProtocol", |
|---|
| 84 | n/a | "UnknownTransferEncoding", "UnimplementedFileMode", |
|---|
| 85 | n/a | "IncompleteRead", "InvalidURL", "ImproperConnectionState", |
|---|
| 86 | n/a | "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", |
|---|
| 87 | n/a | "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error", |
|---|
| 88 | n/a | "responses"] |
|---|
| 89 | n/a | |
|---|
| 90 | n/a | HTTP_PORT = 80 |
|---|
| 91 | n/a | HTTPS_PORT = 443 |
|---|
| 92 | n/a | |
|---|
| 93 | n/a | _UNKNOWN = 'UNKNOWN' |
|---|
| 94 | n/a | |
|---|
| 95 | n/a | # connection states |
|---|
| 96 | n/a | _CS_IDLE = 'Idle' |
|---|
| 97 | n/a | _CS_REQ_STARTED = 'Request-started' |
|---|
| 98 | n/a | _CS_REQ_SENT = 'Request-sent' |
|---|
| 99 | n/a | |
|---|
| 100 | n/a | |
|---|
| 101 | n/a | # hack to maintain backwards compatibility |
|---|
| 102 | n/a | globals().update(http.HTTPStatus.__members__) |
|---|
| 103 | n/a | |
|---|
| 104 | n/a | # another hack to maintain backwards compatibility |
|---|
| 105 | n/a | # Mapping status codes to official W3C names |
|---|
| 106 | n/a | responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()} |
|---|
| 107 | n/a | |
|---|
| 108 | n/a | # maximal amount of data to read at one time in _safe_read |
|---|
| 109 | n/a | MAXAMOUNT = 1048576 |
|---|
| 110 | n/a | |
|---|
| 111 | n/a | # maximal line length when calling readline(). |
|---|
| 112 | n/a | _MAXLINE = 65536 |
|---|
| 113 | n/a | _MAXHEADERS = 100 |
|---|
| 114 | n/a | |
|---|
| 115 | n/a | # Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) |
|---|
| 116 | n/a | # |
|---|
| 117 | n/a | # VCHAR = %x21-7E |
|---|
| 118 | n/a | # obs-text = %x80-FF |
|---|
| 119 | n/a | # header-field = field-name ":" OWS field-value OWS |
|---|
| 120 | n/a | # field-name = token |
|---|
| 121 | n/a | # field-value = *( field-content / obs-fold ) |
|---|
| 122 | n/a | # field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] |
|---|
| 123 | n/a | # field-vchar = VCHAR / obs-text |
|---|
| 124 | n/a | # |
|---|
| 125 | n/a | # obs-fold = CRLF 1*( SP / HTAB ) |
|---|
| 126 | n/a | # ; obsolete line folding |
|---|
| 127 | n/a | # ; see Section 3.2.4 |
|---|
| 128 | n/a | |
|---|
| 129 | n/a | # token = 1*tchar |
|---|
| 130 | n/a | # |
|---|
| 131 | n/a | # tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" |
|---|
| 132 | n/a | # / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" |
|---|
| 133 | n/a | # / DIGIT / ALPHA |
|---|
| 134 | n/a | # ; any VCHAR, except delimiters |
|---|
| 135 | n/a | # |
|---|
| 136 | n/a | # VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1 |
|---|
| 137 | n/a | |
|---|
| 138 | n/a | # the patterns for both name and value are more lenient than RFC |
|---|
| 139 | n/a | # definitions to allow for backwards compatibility |
|---|
| 140 | n/a | _is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch |
|---|
| 141 | n/a | _is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search |
|---|
| 142 | n/a | |
|---|
| 143 | n/a | # We always set the Content-Length header for these methods because some |
|---|
| 144 | n/a | # servers will otherwise respond with a 411 |
|---|
| 145 | n/a | _METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'} |
|---|
| 146 | n/a | |
|---|
| 147 | n/a | |
|---|
| 148 | n/a | def _encode(data, name='data'): |
|---|
| 149 | n/a | """Call data.encode("latin-1") but show a better error message.""" |
|---|
| 150 | n/a | try: |
|---|
| 151 | n/a | return data.encode("latin-1") |
|---|
| 152 | n/a | except UnicodeEncodeError as err: |
|---|
| 153 | n/a | raise UnicodeEncodeError( |
|---|
| 154 | n/a | err.encoding, |
|---|
| 155 | n/a | err.object, |
|---|
| 156 | n/a | err.start, |
|---|
| 157 | n/a | err.end, |
|---|
| 158 | n/a | "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') " |
|---|
| 159 | n/a | "if you want to send it encoded in UTF-8." % |
|---|
| 160 | n/a | (name.title(), data[err.start:err.end], name)) from None |
|---|
| 161 | n/a | |
|---|
| 162 | n/a | |
|---|
| 163 | n/a | class HTTPMessage(email.message.Message): |
|---|
| 164 | n/a | # XXX The only usage of this method is in |
|---|
| 165 | n/a | # http.server.CGIHTTPRequestHandler. Maybe move the code there so |
|---|
| 166 | n/a | # that it doesn't need to be part of the public API. The API has |
|---|
| 167 | n/a | # never been defined so this could cause backwards compatibility |
|---|
| 168 | n/a | # issues. |
|---|
| 169 | n/a | |
|---|
| 170 | n/a | def getallmatchingheaders(self, name): |
|---|
| 171 | n/a | """Find all header lines matching a given header name. |
|---|
| 172 | n/a | |
|---|
| 173 | n/a | Look through the list of headers and find all lines matching a given |
|---|
| 174 | n/a | header name (and their continuation lines). A list of the lines is |
|---|
| 175 | n/a | returned, without interpretation. If the header does not occur, an |
|---|
| 176 | n/a | empty list is returned. If the header occurs multiple times, all |
|---|
| 177 | n/a | occurrences are returned. Case is not important in the header name. |
|---|
| 178 | n/a | |
|---|
| 179 | n/a | """ |
|---|
| 180 | n/a | name = name.lower() + ':' |
|---|
| 181 | n/a | n = len(name) |
|---|
| 182 | n/a | lst = [] |
|---|
| 183 | n/a | hit = 0 |
|---|
| 184 | n/a | for line in self.keys(): |
|---|
| 185 | n/a | if line[:n].lower() == name: |
|---|
| 186 | n/a | hit = 1 |
|---|
| 187 | n/a | elif not line[:1].isspace(): |
|---|
| 188 | n/a | hit = 0 |
|---|
| 189 | n/a | if hit: |
|---|
| 190 | n/a | lst.append(line) |
|---|
| 191 | n/a | return lst |
|---|
| 192 | n/a | |
|---|
| 193 | n/a | def parse_headers(fp, _class=HTTPMessage): |
|---|
| 194 | n/a | """Parses only RFC2822 headers from a file pointer. |
|---|
| 195 | n/a | |
|---|
| 196 | n/a | email Parser wants to see strings rather than bytes. |
|---|
| 197 | n/a | But a TextIOWrapper around self.rfile would buffer too many bytes |
|---|
| 198 | n/a | from the stream, bytes which we later need to read as bytes. |
|---|
| 199 | n/a | So we read the correct bytes here, as bytes, for email Parser |
|---|
| 200 | n/a | to parse. |
|---|
| 201 | n/a | |
|---|
| 202 | n/a | """ |
|---|
| 203 | n/a | headers = [] |
|---|
| 204 | n/a | while True: |
|---|
| 205 | n/a | line = fp.readline(_MAXLINE + 1) |
|---|
| 206 | n/a | if len(line) > _MAXLINE: |
|---|
| 207 | n/a | raise LineTooLong("header line") |
|---|
| 208 | n/a | headers.append(line) |
|---|
| 209 | n/a | if len(headers) > _MAXHEADERS: |
|---|
| 210 | n/a | raise HTTPException("got more than %d headers" % _MAXHEADERS) |
|---|
| 211 | n/a | if line in (b'\r\n', b'\n', b''): |
|---|
| 212 | n/a | break |
|---|
| 213 | n/a | hstring = b''.join(headers).decode('iso-8859-1') |
|---|
| 214 | n/a | return email.parser.Parser(_class=_class).parsestr(hstring) |
|---|
| 215 | n/a | |
|---|
| 216 | n/a | |
|---|
| 217 | n/a | class HTTPResponse(io.BufferedIOBase): |
|---|
| 218 | n/a | |
|---|
| 219 | n/a | # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. |
|---|
| 220 | n/a | |
|---|
| 221 | n/a | # The bytes from the socket object are iso-8859-1 strings. |
|---|
| 222 | n/a | # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded |
|---|
| 223 | n/a | # text following RFC 2047. The basic status line parsing only |
|---|
| 224 | n/a | # accepts iso-8859-1. |
|---|
| 225 | n/a | |
|---|
| 226 | n/a | def __init__(self, sock, debuglevel=0, method=None, url=None): |
|---|
| 227 | n/a | # If the response includes a content-length header, we need to |
|---|
| 228 | n/a | # make sure that the client doesn't read more than the |
|---|
| 229 | n/a | # specified number of bytes. If it does, it will block until |
|---|
| 230 | n/a | # the server times out and closes the connection. This will |
|---|
| 231 | n/a | # happen if a self.fp.read() is done (without a size) whether |
|---|
| 232 | n/a | # self.fp is buffered or not. So, no self.fp.read() by |
|---|
| 233 | n/a | # clients unless they know what they are doing. |
|---|
| 234 | n/a | self.fp = sock.makefile("rb") |
|---|
| 235 | n/a | self.debuglevel = debuglevel |
|---|
| 236 | n/a | self._method = method |
|---|
| 237 | n/a | |
|---|
| 238 | n/a | # The HTTPResponse object is returned via urllib. The clients |
|---|
| 239 | n/a | # of http and urllib expect different attributes for the |
|---|
| 240 | n/a | # headers. headers is used here and supports urllib. msg is |
|---|
| 241 | n/a | # provided as a backwards compatibility layer for http |
|---|
| 242 | n/a | # clients. |
|---|
| 243 | n/a | |
|---|
| 244 | n/a | self.headers = self.msg = None |
|---|
| 245 | n/a | |
|---|
| 246 | n/a | # from the Status-Line of the response |
|---|
| 247 | n/a | self.version = _UNKNOWN # HTTP-Version |
|---|
| 248 | n/a | self.status = _UNKNOWN # Status-Code |
|---|
| 249 | n/a | self.reason = _UNKNOWN # Reason-Phrase |
|---|
| 250 | n/a | |
|---|
| 251 | n/a | self.chunked = _UNKNOWN # is "chunked" being used? |
|---|
| 252 | n/a | self.chunk_left = _UNKNOWN # bytes left to read in current chunk |
|---|
| 253 | n/a | self.length = _UNKNOWN # number of bytes left in response |
|---|
| 254 | n/a | self.will_close = _UNKNOWN # conn will close at end of response |
|---|
| 255 | n/a | |
|---|
| 256 | n/a | def _read_status(self): |
|---|
| 257 | n/a | line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") |
|---|
| 258 | n/a | if len(line) > _MAXLINE: |
|---|
| 259 | n/a | raise LineTooLong("status line") |
|---|
| 260 | n/a | if self.debuglevel > 0: |
|---|
| 261 | n/a | print("reply:", repr(line)) |
|---|
| 262 | n/a | if not line: |
|---|
| 263 | n/a | # Presumably, the server closed the connection before |
|---|
| 264 | n/a | # sending a valid response. |
|---|
| 265 | n/a | raise RemoteDisconnected("Remote end closed connection without" |
|---|
| 266 | n/a | " response") |
|---|
| 267 | n/a | try: |
|---|
| 268 | n/a | version, status, reason = line.split(None, 2) |
|---|
| 269 | n/a | except ValueError: |
|---|
| 270 | n/a | try: |
|---|
| 271 | n/a | version, status = line.split(None, 1) |
|---|
| 272 | n/a | reason = "" |
|---|
| 273 | n/a | except ValueError: |
|---|
| 274 | n/a | # empty version will cause next test to fail. |
|---|
| 275 | n/a | version = "" |
|---|
| 276 | n/a | if not version.startswith("HTTP/"): |
|---|
| 277 | n/a | self._close_conn() |
|---|
| 278 | n/a | raise BadStatusLine(line) |
|---|
| 279 | n/a | |
|---|
| 280 | n/a | # The status code is a three-digit number |
|---|
| 281 | n/a | try: |
|---|
| 282 | n/a | status = int(status) |
|---|
| 283 | n/a | if status < 100 or status > 999: |
|---|
| 284 | n/a | raise BadStatusLine(line) |
|---|
| 285 | n/a | except ValueError: |
|---|
| 286 | n/a | raise BadStatusLine(line) |
|---|
| 287 | n/a | return version, status, reason |
|---|
| 288 | n/a | |
|---|
| 289 | n/a | def begin(self): |
|---|
| 290 | n/a | if self.headers is not None: |
|---|
| 291 | n/a | # we've already started reading the response |
|---|
| 292 | n/a | return |
|---|
| 293 | n/a | |
|---|
| 294 | n/a | # read until we get a non-100 response |
|---|
| 295 | n/a | while True: |
|---|
| 296 | n/a | version, status, reason = self._read_status() |
|---|
| 297 | n/a | if status != CONTINUE: |
|---|
| 298 | n/a | break |
|---|
| 299 | n/a | # skip the header from the 100 response |
|---|
| 300 | n/a | while True: |
|---|
| 301 | n/a | skip = self.fp.readline(_MAXLINE + 1) |
|---|
| 302 | n/a | if len(skip) > _MAXLINE: |
|---|
| 303 | n/a | raise LineTooLong("header line") |
|---|
| 304 | n/a | skip = skip.strip() |
|---|
| 305 | n/a | if not skip: |
|---|
| 306 | n/a | break |
|---|
| 307 | n/a | if self.debuglevel > 0: |
|---|
| 308 | n/a | print("header:", skip) |
|---|
| 309 | n/a | |
|---|
| 310 | n/a | self.code = self.status = status |
|---|
| 311 | n/a | self.reason = reason.strip() |
|---|
| 312 | n/a | if version in ("HTTP/1.0", "HTTP/0.9"): |
|---|
| 313 | n/a | # Some servers might still return "0.9", treat it as 1.0 anyway |
|---|
| 314 | n/a | self.version = 10 |
|---|
| 315 | n/a | elif version.startswith("HTTP/1."): |
|---|
| 316 | n/a | self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 |
|---|
| 317 | n/a | else: |
|---|
| 318 | n/a | raise UnknownProtocol(version) |
|---|
| 319 | n/a | |
|---|
| 320 | n/a | self.headers = self.msg = parse_headers(self.fp) |
|---|
| 321 | n/a | |
|---|
| 322 | n/a | if self.debuglevel > 0: |
|---|
| 323 | n/a | for hdr in self.headers: |
|---|
| 324 | n/a | print("header:", hdr, end=" ") |
|---|
| 325 | n/a | |
|---|
| 326 | n/a | # are we using the chunked-style of transfer encoding? |
|---|
| 327 | n/a | tr_enc = self.headers.get("transfer-encoding") |
|---|
| 328 | n/a | if tr_enc and tr_enc.lower() == "chunked": |
|---|
| 329 | n/a | self.chunked = True |
|---|
| 330 | n/a | self.chunk_left = None |
|---|
| 331 | n/a | else: |
|---|
| 332 | n/a | self.chunked = False |
|---|
| 333 | n/a | |
|---|
| 334 | n/a | # will the connection close at the end of the response? |
|---|
| 335 | n/a | self.will_close = self._check_close() |
|---|
| 336 | n/a | |
|---|
| 337 | n/a | # do we have a Content-Length? |
|---|
| 338 | n/a | # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" |
|---|
| 339 | n/a | self.length = None |
|---|
| 340 | n/a | length = self.headers.get("content-length") |
|---|
| 341 | n/a | |
|---|
| 342 | n/a | # are we using the chunked-style of transfer encoding? |
|---|
| 343 | n/a | tr_enc = self.headers.get("transfer-encoding") |
|---|
| 344 | n/a | if length and not self.chunked: |
|---|
| 345 | n/a | try: |
|---|
| 346 | n/a | self.length = int(length) |
|---|
| 347 | n/a | except ValueError: |
|---|
| 348 | n/a | self.length = None |
|---|
| 349 | n/a | else: |
|---|
| 350 | n/a | if self.length < 0: # ignore nonsensical negative lengths |
|---|
| 351 | n/a | self.length = None |
|---|
| 352 | n/a | else: |
|---|
| 353 | n/a | self.length = None |
|---|
| 354 | n/a | |
|---|
| 355 | n/a | # does the body have a fixed length? (of zero) |
|---|
| 356 | n/a | if (status == NO_CONTENT or status == NOT_MODIFIED or |
|---|
| 357 | n/a | 100 <= status < 200 or # 1xx codes |
|---|
| 358 | n/a | self._method == "HEAD"): |
|---|
| 359 | n/a | self.length = 0 |
|---|
| 360 | n/a | |
|---|
| 361 | n/a | # if the connection remains open, and we aren't using chunked, and |
|---|
| 362 | n/a | # a content-length was not provided, then assume that the connection |
|---|
| 363 | n/a | # WILL close. |
|---|
| 364 | n/a | if (not self.will_close and |
|---|
| 365 | n/a | not self.chunked and |
|---|
| 366 | n/a | self.length is None): |
|---|
| 367 | n/a | self.will_close = True |
|---|
| 368 | n/a | |
|---|
| 369 | n/a | def _check_close(self): |
|---|
| 370 | n/a | conn = self.headers.get("connection") |
|---|
| 371 | n/a | if self.version == 11: |
|---|
| 372 | n/a | # An HTTP/1.1 proxy is assumed to stay open unless |
|---|
| 373 | n/a | # explicitly closed. |
|---|
| 374 | n/a | conn = self.headers.get("connection") |
|---|
| 375 | n/a | if conn and "close" in conn.lower(): |
|---|
| 376 | n/a | return True |
|---|
| 377 | n/a | return False |
|---|
| 378 | n/a | |
|---|
| 379 | n/a | # Some HTTP/1.0 implementations have support for persistent |
|---|
| 380 | n/a | # connections, using rules different than HTTP/1.1. |
|---|
| 381 | n/a | |
|---|
| 382 | n/a | # For older HTTP, Keep-Alive indicates persistent connection. |
|---|
| 383 | n/a | if self.headers.get("keep-alive"): |
|---|
| 384 | n/a | return False |
|---|
| 385 | n/a | |
|---|
| 386 | n/a | # At least Akamai returns a "Connection: Keep-Alive" header, |
|---|
| 387 | n/a | # which was supposed to be sent by the client. |
|---|
| 388 | n/a | if conn and "keep-alive" in conn.lower(): |
|---|
| 389 | n/a | return False |
|---|
| 390 | n/a | |
|---|
| 391 | n/a | # Proxy-Connection is a netscape hack. |
|---|
| 392 | n/a | pconn = self.headers.get("proxy-connection") |
|---|
| 393 | n/a | if pconn and "keep-alive" in pconn.lower(): |
|---|
| 394 | n/a | return False |
|---|
| 395 | n/a | |
|---|
| 396 | n/a | # otherwise, assume it will close |
|---|
| 397 | n/a | return True |
|---|
| 398 | n/a | |
|---|
| 399 | n/a | def _close_conn(self): |
|---|
| 400 | n/a | fp = self.fp |
|---|
| 401 | n/a | self.fp = None |
|---|
| 402 | n/a | fp.close() |
|---|
| 403 | n/a | |
|---|
| 404 | n/a | def close(self): |
|---|
| 405 | n/a | try: |
|---|
| 406 | n/a | super().close() # set "closed" flag |
|---|
| 407 | n/a | finally: |
|---|
| 408 | n/a | if self.fp: |
|---|
| 409 | n/a | self._close_conn() |
|---|
| 410 | n/a | |
|---|
| 411 | n/a | # These implementations are for the benefit of io.BufferedReader. |
|---|
| 412 | n/a | |
|---|
| 413 | n/a | # XXX This class should probably be revised to act more like |
|---|
| 414 | n/a | # the "raw stream" that BufferedReader expects. |
|---|
| 415 | n/a | |
|---|
| 416 | n/a | def flush(self): |
|---|
| 417 | n/a | super().flush() |
|---|
| 418 | n/a | if self.fp: |
|---|
| 419 | n/a | self.fp.flush() |
|---|
| 420 | n/a | |
|---|
| 421 | n/a | def readable(self): |
|---|
| 422 | n/a | """Always returns True""" |
|---|
| 423 | n/a | return True |
|---|
| 424 | n/a | |
|---|
| 425 | n/a | # End of "raw stream" methods |
|---|
| 426 | n/a | |
|---|
| 427 | n/a | def isclosed(self): |
|---|
| 428 | n/a | """True if the connection is closed.""" |
|---|
| 429 | n/a | # NOTE: it is possible that we will not ever call self.close(). This |
|---|
| 430 | n/a | # case occurs when will_close is TRUE, length is None, and we |
|---|
| 431 | n/a | # read up to the last byte, but NOT past it. |
|---|
| 432 | n/a | # |
|---|
| 433 | n/a | # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be |
|---|
| 434 | n/a | # called, meaning self.isclosed() is meaningful. |
|---|
| 435 | n/a | return self.fp is None |
|---|
| 436 | n/a | |
|---|
| 437 | n/a | def read(self, amt=None): |
|---|
| 438 | n/a | if self.fp is None: |
|---|
| 439 | n/a | return b"" |
|---|
| 440 | n/a | |
|---|
| 441 | n/a | if self._method == "HEAD": |
|---|
| 442 | n/a | self._close_conn() |
|---|
| 443 | n/a | return b"" |
|---|
| 444 | n/a | |
|---|
| 445 | n/a | if amt is not None: |
|---|
| 446 | n/a | # Amount is given, implement using readinto |
|---|
| 447 | n/a | b = bytearray(amt) |
|---|
| 448 | n/a | n = self.readinto(b) |
|---|
| 449 | n/a | return memoryview(b)[:n].tobytes() |
|---|
| 450 | n/a | else: |
|---|
| 451 | n/a | # Amount is not given (unbounded read) so we must check self.length |
|---|
| 452 | n/a | # and self.chunked |
|---|
| 453 | n/a | |
|---|
| 454 | n/a | if self.chunked: |
|---|
| 455 | n/a | return self._readall_chunked() |
|---|
| 456 | n/a | |
|---|
| 457 | n/a | if self.length is None: |
|---|
| 458 | n/a | s = self.fp.read() |
|---|
| 459 | n/a | else: |
|---|
| 460 | n/a | try: |
|---|
| 461 | n/a | s = self._safe_read(self.length) |
|---|
| 462 | n/a | except IncompleteRead: |
|---|
| 463 | n/a | self._close_conn() |
|---|
| 464 | n/a | raise |
|---|
| 465 | n/a | self.length = 0 |
|---|
| 466 | n/a | self._close_conn() # we read everything |
|---|
| 467 | n/a | return s |
|---|
| 468 | n/a | |
|---|
| 469 | n/a | def readinto(self, b): |
|---|
| 470 | n/a | """Read up to len(b) bytes into bytearray b and return the number |
|---|
| 471 | n/a | of bytes read. |
|---|
| 472 | n/a | """ |
|---|
| 473 | n/a | |
|---|
| 474 | n/a | if self.fp is None: |
|---|
| 475 | n/a | return 0 |
|---|
| 476 | n/a | |
|---|
| 477 | n/a | if self._method == "HEAD": |
|---|
| 478 | n/a | self._close_conn() |
|---|
| 479 | n/a | return 0 |
|---|
| 480 | n/a | |
|---|
| 481 | n/a | if self.chunked: |
|---|
| 482 | n/a | return self._readinto_chunked(b) |
|---|
| 483 | n/a | |
|---|
| 484 | n/a | if self.length is not None: |
|---|
| 485 | n/a | if len(b) > self.length: |
|---|
| 486 | n/a | # clip the read to the "end of response" |
|---|
| 487 | n/a | b = memoryview(b)[0:self.length] |
|---|
| 488 | n/a | |
|---|
| 489 | n/a | # we do not use _safe_read() here because this may be a .will_close |
|---|
| 490 | n/a | # connection, and the user is reading more bytes than will be provided |
|---|
| 491 | n/a | # (for example, reading in 1k chunks) |
|---|
| 492 | n/a | n = self.fp.readinto(b) |
|---|
| 493 | n/a | if not n and b: |
|---|
| 494 | n/a | # Ideally, we would raise IncompleteRead if the content-length |
|---|
| 495 | n/a | # wasn't satisfied, but it might break compatibility. |
|---|
| 496 | n/a | self._close_conn() |
|---|
| 497 | n/a | elif self.length is not None: |
|---|
| 498 | n/a | self.length -= n |
|---|
| 499 | n/a | if not self.length: |
|---|
| 500 | n/a | self._close_conn() |
|---|
| 501 | n/a | return n |
|---|
| 502 | n/a | |
|---|
| 503 | n/a | def _read_next_chunk_size(self): |
|---|
| 504 | n/a | # Read the next chunk size from the file |
|---|
| 505 | n/a | line = self.fp.readline(_MAXLINE + 1) |
|---|
| 506 | n/a | if len(line) > _MAXLINE: |
|---|
| 507 | n/a | raise LineTooLong("chunk size") |
|---|
| 508 | n/a | i = line.find(b";") |
|---|
| 509 | n/a | if i >= 0: |
|---|
| 510 | n/a | line = line[:i] # strip chunk-extensions |
|---|
| 511 | n/a | try: |
|---|
| 512 | n/a | return int(line, 16) |
|---|
| 513 | n/a | except ValueError: |
|---|
| 514 | n/a | # close the connection as protocol synchronisation is |
|---|
| 515 | n/a | # probably lost |
|---|
| 516 | n/a | self._close_conn() |
|---|
| 517 | n/a | raise |
|---|
| 518 | n/a | |
|---|
| 519 | n/a | def _read_and_discard_trailer(self): |
|---|
| 520 | n/a | # read and discard trailer up to the CRLF terminator |
|---|
| 521 | n/a | ### note: we shouldn't have any trailers! |
|---|
| 522 | n/a | while True: |
|---|
| 523 | n/a | line = self.fp.readline(_MAXLINE + 1) |
|---|
| 524 | n/a | if len(line) > _MAXLINE: |
|---|
| 525 | n/a | raise LineTooLong("trailer line") |
|---|
| 526 | n/a | if not line: |
|---|
| 527 | n/a | # a vanishingly small number of sites EOF without |
|---|
| 528 | n/a | # sending the trailer |
|---|
| 529 | n/a | break |
|---|
| 530 | n/a | if line in (b'\r\n', b'\n', b''): |
|---|
| 531 | n/a | break |
|---|
| 532 | n/a | |
|---|
| 533 | n/a | def _get_chunk_left(self): |
|---|
| 534 | n/a | # return self.chunk_left, reading a new chunk if necessary. |
|---|
| 535 | n/a | # chunk_left == 0: at the end of the current chunk, need to close it |
|---|
| 536 | n/a | # chunk_left == None: No current chunk, should read next. |
|---|
| 537 | n/a | # This function returns non-zero or None if the last chunk has |
|---|
| 538 | n/a | # been read. |
|---|
| 539 | n/a | chunk_left = self.chunk_left |
|---|
| 540 | n/a | if not chunk_left: # Can be 0 or None |
|---|
| 541 | n/a | if chunk_left is not None: |
|---|
| 542 | n/a | # We are at the end of chunk. dicard chunk end |
|---|
| 543 | n/a | self._safe_read(2) # toss the CRLF at the end of the chunk |
|---|
| 544 | n/a | try: |
|---|
| 545 | n/a | chunk_left = self._read_next_chunk_size() |
|---|
| 546 | n/a | except ValueError: |
|---|
| 547 | n/a | raise IncompleteRead(b'') |
|---|
| 548 | n/a | if chunk_left == 0: |
|---|
| 549 | n/a | # last chunk: 1*("0") [ chunk-extension ] CRLF |
|---|
| 550 | n/a | self._read_and_discard_trailer() |
|---|
| 551 | n/a | # we read everything; close the "file" |
|---|
| 552 | n/a | self._close_conn() |
|---|
| 553 | n/a | chunk_left = None |
|---|
| 554 | n/a | self.chunk_left = chunk_left |
|---|
| 555 | n/a | return chunk_left |
|---|
| 556 | n/a | |
|---|
| 557 | n/a | def _readall_chunked(self): |
|---|
| 558 | n/a | assert self.chunked != _UNKNOWN |
|---|
| 559 | n/a | value = [] |
|---|
| 560 | n/a | try: |
|---|
| 561 | n/a | while True: |
|---|
| 562 | n/a | chunk_left = self._get_chunk_left() |
|---|
| 563 | n/a | if chunk_left is None: |
|---|
| 564 | n/a | break |
|---|
| 565 | n/a | value.append(self._safe_read(chunk_left)) |
|---|
| 566 | n/a | self.chunk_left = 0 |
|---|
| 567 | n/a | return b''.join(value) |
|---|
| 568 | n/a | except IncompleteRead: |
|---|
| 569 | n/a | raise IncompleteRead(b''.join(value)) |
|---|
| 570 | n/a | |
|---|
| 571 | n/a | def _readinto_chunked(self, b): |
|---|
| 572 | n/a | assert self.chunked != _UNKNOWN |
|---|
| 573 | n/a | total_bytes = 0 |
|---|
| 574 | n/a | mvb = memoryview(b) |
|---|
| 575 | n/a | try: |
|---|
| 576 | n/a | while True: |
|---|
| 577 | n/a | chunk_left = self._get_chunk_left() |
|---|
| 578 | n/a | if chunk_left is None: |
|---|
| 579 | n/a | return total_bytes |
|---|
| 580 | n/a | |
|---|
| 581 | n/a | if len(mvb) <= chunk_left: |
|---|
| 582 | n/a | n = self._safe_readinto(mvb) |
|---|
| 583 | n/a | self.chunk_left = chunk_left - n |
|---|
| 584 | n/a | return total_bytes + n |
|---|
| 585 | n/a | |
|---|
| 586 | n/a | temp_mvb = mvb[:chunk_left] |
|---|
| 587 | n/a | n = self._safe_readinto(temp_mvb) |
|---|
| 588 | n/a | mvb = mvb[n:] |
|---|
| 589 | n/a | total_bytes += n |
|---|
| 590 | n/a | self.chunk_left = 0 |
|---|
| 591 | n/a | |
|---|
| 592 | n/a | except IncompleteRead: |
|---|
| 593 | n/a | raise IncompleteRead(bytes(b[0:total_bytes])) |
|---|
| 594 | n/a | |
|---|
| 595 | n/a | def _safe_read(self, amt): |
|---|
| 596 | n/a | """Read the number of bytes requested, compensating for partial reads. |
|---|
| 597 | n/a | |
|---|
| 598 | n/a | Normally, we have a blocking socket, but a read() can be interrupted |
|---|
| 599 | n/a | by a signal (resulting in a partial read). |
|---|
| 600 | n/a | |
|---|
| 601 | n/a | Note that we cannot distinguish between EOF and an interrupt when zero |
|---|
| 602 | n/a | bytes have been read. IncompleteRead() will be raised in this |
|---|
| 603 | n/a | situation. |
|---|
| 604 | n/a | |
|---|
| 605 | n/a | This function should be used when <amt> bytes "should" be present for |
|---|
| 606 | n/a | reading. If the bytes are truly not available (due to EOF), then the |
|---|
| 607 | n/a | IncompleteRead exception can be used to detect the problem. |
|---|
| 608 | n/a | """ |
|---|
| 609 | n/a | s = [] |
|---|
| 610 | n/a | while amt > 0: |
|---|
| 611 | n/a | chunk = self.fp.read(min(amt, MAXAMOUNT)) |
|---|
| 612 | n/a | if not chunk: |
|---|
| 613 | n/a | raise IncompleteRead(b''.join(s), amt) |
|---|
| 614 | n/a | s.append(chunk) |
|---|
| 615 | n/a | amt -= len(chunk) |
|---|
| 616 | n/a | return b"".join(s) |
|---|
| 617 | n/a | |
|---|
| 618 | n/a | def _safe_readinto(self, b): |
|---|
| 619 | n/a | """Same as _safe_read, but for reading into a buffer.""" |
|---|
| 620 | n/a | total_bytes = 0 |
|---|
| 621 | n/a | mvb = memoryview(b) |
|---|
| 622 | n/a | while total_bytes < len(b): |
|---|
| 623 | n/a | if MAXAMOUNT < len(mvb): |
|---|
| 624 | n/a | temp_mvb = mvb[0:MAXAMOUNT] |
|---|
| 625 | n/a | n = self.fp.readinto(temp_mvb) |
|---|
| 626 | n/a | else: |
|---|
| 627 | n/a | n = self.fp.readinto(mvb) |
|---|
| 628 | n/a | if not n: |
|---|
| 629 | n/a | raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b)) |
|---|
| 630 | n/a | mvb = mvb[n:] |
|---|
| 631 | n/a | total_bytes += n |
|---|
| 632 | n/a | return total_bytes |
|---|
| 633 | n/a | |
|---|
| 634 | n/a | def read1(self, n=-1): |
|---|
| 635 | n/a | """Read with at most one underlying system call. If at least one |
|---|
| 636 | n/a | byte is buffered, return that instead. |
|---|
| 637 | n/a | """ |
|---|
| 638 | n/a | if self.fp is None or self._method == "HEAD": |
|---|
| 639 | n/a | return b"" |
|---|
| 640 | n/a | if self.chunked: |
|---|
| 641 | n/a | return self._read1_chunked(n) |
|---|
| 642 | n/a | if self.length is not None and (n < 0 or n > self.length): |
|---|
| 643 | n/a | n = self.length |
|---|
| 644 | n/a | result = self.fp.read1(n) |
|---|
| 645 | n/a | if not result and n: |
|---|
| 646 | n/a | self._close_conn() |
|---|
| 647 | n/a | elif self.length is not None: |
|---|
| 648 | n/a | self.length -= len(result) |
|---|
| 649 | n/a | return result |
|---|
| 650 | n/a | |
|---|
| 651 | n/a | def peek(self, n=-1): |
|---|
| 652 | n/a | # Having this enables IOBase.readline() to read more than one |
|---|
| 653 | n/a | # byte at a time |
|---|
| 654 | n/a | if self.fp is None or self._method == "HEAD": |
|---|
| 655 | n/a | return b"" |
|---|
| 656 | n/a | if self.chunked: |
|---|
| 657 | n/a | return self._peek_chunked(n) |
|---|
| 658 | n/a | return self.fp.peek(n) |
|---|
| 659 | n/a | |
|---|
| 660 | n/a | def readline(self, limit=-1): |
|---|
| 661 | n/a | if self.fp is None or self._method == "HEAD": |
|---|
| 662 | n/a | return b"" |
|---|
| 663 | n/a | if self.chunked: |
|---|
| 664 | n/a | # Fallback to IOBase readline which uses peek() and read() |
|---|
| 665 | n/a | return super().readline(limit) |
|---|
| 666 | n/a | if self.length is not None and (limit < 0 or limit > self.length): |
|---|
| 667 | n/a | limit = self.length |
|---|
| 668 | n/a | result = self.fp.readline(limit) |
|---|
| 669 | n/a | if not result and limit: |
|---|
| 670 | n/a | self._close_conn() |
|---|
| 671 | n/a | elif self.length is not None: |
|---|
| 672 | n/a | self.length -= len(result) |
|---|
| 673 | n/a | return result |
|---|
| 674 | n/a | |
|---|
| 675 | n/a | def _read1_chunked(self, n): |
|---|
| 676 | n/a | # Strictly speaking, _get_chunk_left() may cause more than one read, |
|---|
| 677 | n/a | # but that is ok, since that is to satisfy the chunked protocol. |
|---|
| 678 | n/a | chunk_left = self._get_chunk_left() |
|---|
| 679 | n/a | if chunk_left is None or n == 0: |
|---|
| 680 | n/a | return b'' |
|---|
| 681 | n/a | if not (0 <= n <= chunk_left): |
|---|
| 682 | n/a | n = chunk_left # if n is negative or larger than chunk_left |
|---|
| 683 | n/a | read = self.fp.read1(n) |
|---|
| 684 | n/a | self.chunk_left -= len(read) |
|---|
| 685 | n/a | if not read: |
|---|
| 686 | n/a | raise IncompleteRead(b"") |
|---|
| 687 | n/a | return read |
|---|
| 688 | n/a | |
|---|
| 689 | n/a | def _peek_chunked(self, n): |
|---|
| 690 | n/a | # Strictly speaking, _get_chunk_left() may cause more than one read, |
|---|
| 691 | n/a | # but that is ok, since that is to satisfy the chunked protocol. |
|---|
| 692 | n/a | try: |
|---|
| 693 | n/a | chunk_left = self._get_chunk_left() |
|---|
| 694 | n/a | except IncompleteRead: |
|---|
| 695 | n/a | return b'' # peek doesn't worry about protocol |
|---|
| 696 | n/a | if chunk_left is None: |
|---|
| 697 | n/a | return b'' # eof |
|---|
| 698 | n/a | # peek is allowed to return more than requested. Just request the |
|---|
| 699 | n/a | # entire chunk, and truncate what we get. |
|---|
| 700 | n/a | return self.fp.peek(chunk_left)[:chunk_left] |
|---|
| 701 | n/a | |
|---|
| 702 | n/a | def fileno(self): |
|---|
| 703 | n/a | return self.fp.fileno() |
|---|
| 704 | n/a | |
|---|
| 705 | n/a | def getheader(self, name, default=None): |
|---|
| 706 | n/a | '''Returns the value of the header matching *name*. |
|---|
| 707 | n/a | |
|---|
| 708 | n/a | If there are multiple matching headers, the values are |
|---|
| 709 | n/a | combined into a single string separated by commas and spaces. |
|---|
| 710 | n/a | |
|---|
| 711 | n/a | If no matching header is found, returns *default* or None if |
|---|
| 712 | n/a | the *default* is not specified. |
|---|
| 713 | n/a | |
|---|
| 714 | n/a | If the headers are unknown, raises http.client.ResponseNotReady. |
|---|
| 715 | n/a | |
|---|
| 716 | n/a | ''' |
|---|
| 717 | n/a | if self.headers is None: |
|---|
| 718 | n/a | raise ResponseNotReady() |
|---|
| 719 | n/a | headers = self.headers.get_all(name) or default |
|---|
| 720 | n/a | if isinstance(headers, str) or not hasattr(headers, '__iter__'): |
|---|
| 721 | n/a | return headers |
|---|
| 722 | n/a | else: |
|---|
| 723 | n/a | return ', '.join(headers) |
|---|
| 724 | n/a | |
|---|
| 725 | n/a | def getheaders(self): |
|---|
| 726 | n/a | """Return list of (header, value) tuples.""" |
|---|
| 727 | n/a | if self.headers is None: |
|---|
| 728 | n/a | raise ResponseNotReady() |
|---|
| 729 | n/a | return list(self.headers.items()) |
|---|
| 730 | n/a | |
|---|
| 731 | n/a | # We override IOBase.__iter__ so that it doesn't check for closed-ness |
|---|
| 732 | n/a | |
|---|
| 733 | n/a | def __iter__(self): |
|---|
| 734 | n/a | return self |
|---|
| 735 | n/a | |
|---|
| 736 | n/a | # For compatibility with old-style urllib responses. |
|---|
| 737 | n/a | |
|---|
| 738 | n/a | def info(self): |
|---|
| 739 | n/a | '''Returns an instance of the class mimetools.Message containing |
|---|
| 740 | n/a | meta-information associated with the URL. |
|---|
| 741 | n/a | |
|---|
| 742 | n/a | When the method is HTTP, these headers are those returned by |
|---|
| 743 | n/a | the server at the head of the retrieved HTML page (including |
|---|
| 744 | n/a | Content-Length and Content-Type). |
|---|
| 745 | n/a | |
|---|
| 746 | n/a | When the method is FTP, a Content-Length header will be |
|---|
| 747 | n/a | present if (as is now usual) the server passed back a file |
|---|
| 748 | n/a | length in response to the FTP retrieval request. A |
|---|
| 749 | n/a | Content-Type header will be present if the MIME type can be |
|---|
| 750 | n/a | guessed. |
|---|
| 751 | n/a | |
|---|
| 752 | n/a | When the method is local-file, returned headers will include |
|---|
| 753 | n/a | a Date representing the file's last-modified time, a |
|---|
| 754 | n/a | Content-Length giving file size, and a Content-Type |
|---|
| 755 | n/a | containing a guess at the file's type. See also the |
|---|
| 756 | n/a | description of the mimetools module. |
|---|
| 757 | n/a | |
|---|
| 758 | n/a | ''' |
|---|
| 759 | n/a | return self.headers |
|---|
| 760 | n/a | |
|---|
| 761 | n/a | def geturl(self): |
|---|
| 762 | n/a | '''Return the real URL of the page. |
|---|
| 763 | n/a | |
|---|
| 764 | n/a | In some cases, the HTTP server redirects a client to another |
|---|
| 765 | n/a | URL. The urlopen() function handles this transparently, but in |
|---|
| 766 | n/a | some cases the caller needs to know which URL the client was |
|---|
| 767 | n/a | redirected to. The geturl() method can be used to get at this |
|---|
| 768 | n/a | redirected URL. |
|---|
| 769 | n/a | |
|---|
| 770 | n/a | ''' |
|---|
| 771 | n/a | return self.url |
|---|
| 772 | n/a | |
|---|
| 773 | n/a | def getcode(self): |
|---|
| 774 | n/a | '''Return the HTTP status code that was sent with the response, |
|---|
| 775 | n/a | or None if the URL is not an HTTP URL. |
|---|
| 776 | n/a | |
|---|
| 777 | n/a | ''' |
|---|
| 778 | n/a | return self.status |
|---|
| 779 | n/a | |
|---|
| 780 | n/a | class HTTPConnection: |
|---|
| 781 | n/a | |
|---|
| 782 | n/a | _http_vsn = 11 |
|---|
| 783 | n/a | _http_vsn_str = 'HTTP/1.1' |
|---|
| 784 | n/a | |
|---|
| 785 | n/a | response_class = HTTPResponse |
|---|
| 786 | n/a | default_port = HTTP_PORT |
|---|
| 787 | n/a | auto_open = 1 |
|---|
| 788 | n/a | debuglevel = 0 |
|---|
| 789 | n/a | |
|---|
| 790 | n/a | @staticmethod |
|---|
| 791 | n/a | def _is_textIO(stream): |
|---|
| 792 | n/a | """Test whether a file-like object is a text or a binary stream. |
|---|
| 793 | n/a | """ |
|---|
| 794 | n/a | return isinstance(stream, io.TextIOBase) |
|---|
| 795 | n/a | |
|---|
| 796 | n/a | @staticmethod |
|---|
| 797 | n/a | def _get_content_length(body, method): |
|---|
| 798 | n/a | """Get the content-length based on the body. |
|---|
| 799 | n/a | |
|---|
| 800 | n/a | If the body is None, we set Content-Length: 0 for methods that expect |
|---|
| 801 | n/a | a body (RFC 7230, Section 3.3.2). We also set the Content-Length for |
|---|
| 802 | n/a | any method if the body is a str or bytes-like object and not a file. |
|---|
| 803 | n/a | """ |
|---|
| 804 | n/a | if body is None: |
|---|
| 805 | n/a | # do an explicit check for not None here to distinguish |
|---|
| 806 | n/a | # between unset and set but empty |
|---|
| 807 | n/a | if method.upper() in _METHODS_EXPECTING_BODY: |
|---|
| 808 | n/a | return 0 |
|---|
| 809 | n/a | else: |
|---|
| 810 | n/a | return None |
|---|
| 811 | n/a | |
|---|
| 812 | n/a | if hasattr(body, 'read'): |
|---|
| 813 | n/a | # file-like object. |
|---|
| 814 | n/a | return None |
|---|
| 815 | n/a | |
|---|
| 816 | n/a | try: |
|---|
| 817 | n/a | # does it implement the buffer protocol (bytes, bytearray, array)? |
|---|
| 818 | n/a | mv = memoryview(body) |
|---|
| 819 | n/a | return mv.nbytes |
|---|
| 820 | n/a | except TypeError: |
|---|
| 821 | n/a | pass |
|---|
| 822 | n/a | |
|---|
| 823 | n/a | if isinstance(body, str): |
|---|
| 824 | n/a | return len(body) |
|---|
| 825 | n/a | |
|---|
| 826 | n/a | return None |
|---|
| 827 | n/a | |
|---|
| 828 | n/a | def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, |
|---|
| 829 | n/a | source_address=None): |
|---|
| 830 | n/a | self.timeout = timeout |
|---|
| 831 | n/a | self.source_address = source_address |
|---|
| 832 | n/a | self.sock = None |
|---|
| 833 | n/a | self._buffer = [] |
|---|
| 834 | n/a | self.__response = None |
|---|
| 835 | n/a | self.__state = _CS_IDLE |
|---|
| 836 | n/a | self._method = None |
|---|
| 837 | n/a | self._tunnel_host = None |
|---|
| 838 | n/a | self._tunnel_port = None |
|---|
| 839 | n/a | self._tunnel_headers = {} |
|---|
| 840 | n/a | |
|---|
| 841 | n/a | (self.host, self.port) = self._get_hostport(host, port) |
|---|
| 842 | n/a | |
|---|
| 843 | n/a | # This is stored as an instance variable to allow unit |
|---|
| 844 | n/a | # tests to replace it with a suitable mockup |
|---|
| 845 | n/a | self._create_connection = socket.create_connection |
|---|
| 846 | n/a | |
|---|
| 847 | n/a | def set_tunnel(self, host, port=None, headers=None): |
|---|
| 848 | n/a | """Set up host and port for HTTP CONNECT tunnelling. |
|---|
| 849 | n/a | |
|---|
| 850 | n/a | In a connection that uses HTTP CONNECT tunneling, the host passed to the |
|---|
| 851 | n/a | constructor is used as a proxy server that relays all communication to |
|---|
| 852 | n/a | the endpoint passed to `set_tunnel`. This done by sending an HTTP |
|---|
| 853 | n/a | CONNECT request to the proxy server when the connection is established. |
|---|
| 854 | n/a | |
|---|
| 855 | n/a | This method must be called before the HTML connection has been |
|---|
| 856 | n/a | established. |
|---|
| 857 | n/a | |
|---|
| 858 | n/a | The headers argument should be a mapping of extra HTTP headers to send |
|---|
| 859 | n/a | with the CONNECT request. |
|---|
| 860 | n/a | """ |
|---|
| 861 | n/a | |
|---|
| 862 | n/a | if self.sock: |
|---|
| 863 | n/a | raise RuntimeError("Can't set up tunnel for established connection") |
|---|
| 864 | n/a | |
|---|
| 865 | n/a | self._tunnel_host, self._tunnel_port = self._get_hostport(host, port) |
|---|
| 866 | n/a | if headers: |
|---|
| 867 | n/a | self._tunnel_headers = headers |
|---|
| 868 | n/a | else: |
|---|
| 869 | n/a | self._tunnel_headers.clear() |
|---|
| 870 | n/a | |
|---|
| 871 | n/a | def _get_hostport(self, host, port): |
|---|
| 872 | n/a | if port is None: |
|---|
| 873 | n/a | i = host.rfind(':') |
|---|
| 874 | n/a | j = host.rfind(']') # ipv6 addresses have [...] |
|---|
| 875 | n/a | if i > j: |
|---|
| 876 | n/a | try: |
|---|
| 877 | n/a | port = int(host[i+1:]) |
|---|
| 878 | n/a | except ValueError: |
|---|
| 879 | n/a | if host[i+1:] == "": # http://foo.com:/ == http://foo.com/ |
|---|
| 880 | n/a | port = self.default_port |
|---|
| 881 | n/a | else: |
|---|
| 882 | n/a | raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) |
|---|
| 883 | n/a | host = host[:i] |
|---|
| 884 | n/a | else: |
|---|
| 885 | n/a | port = self.default_port |
|---|
| 886 | n/a | if host and host[0] == '[' and host[-1] == ']': |
|---|
| 887 | n/a | host = host[1:-1] |
|---|
| 888 | n/a | |
|---|
| 889 | n/a | return (host, port) |
|---|
| 890 | n/a | |
|---|
| 891 | n/a | def set_debuglevel(self, level): |
|---|
| 892 | n/a | self.debuglevel = level |
|---|
| 893 | n/a | |
|---|
| 894 | n/a | def _tunnel(self): |
|---|
| 895 | n/a | connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host, |
|---|
| 896 | n/a | self._tunnel_port) |
|---|
| 897 | n/a | connect_bytes = connect_str.encode("ascii") |
|---|
| 898 | n/a | self.send(connect_bytes) |
|---|
| 899 | n/a | for header, value in self._tunnel_headers.items(): |
|---|
| 900 | n/a | header_str = "%s: %s\r\n" % (header, value) |
|---|
| 901 | n/a | header_bytes = header_str.encode("latin-1") |
|---|
| 902 | n/a | self.send(header_bytes) |
|---|
| 903 | n/a | self.send(b'\r\n') |
|---|
| 904 | n/a | |
|---|
| 905 | n/a | response = self.response_class(self.sock, method=self._method) |
|---|
| 906 | n/a | (version, code, message) = response._read_status() |
|---|
| 907 | n/a | |
|---|
| 908 | n/a | if code != http.HTTPStatus.OK: |
|---|
| 909 | n/a | self.close() |
|---|
| 910 | n/a | raise OSError("Tunnel connection failed: %d %s" % (code, |
|---|
| 911 | n/a | message.strip())) |
|---|
| 912 | n/a | while True: |
|---|
| 913 | n/a | line = response.fp.readline(_MAXLINE + 1) |
|---|
| 914 | n/a | if len(line) > _MAXLINE: |
|---|
| 915 | n/a | raise LineTooLong("header line") |
|---|
| 916 | n/a | if not line: |
|---|
| 917 | n/a | # for sites which EOF without sending a trailer |
|---|
| 918 | n/a | break |
|---|
| 919 | n/a | if line in (b'\r\n', b'\n', b''): |
|---|
| 920 | n/a | break |
|---|
| 921 | n/a | |
|---|
| 922 | n/a | if self.debuglevel > 0: |
|---|
| 923 | n/a | print('header:', line.decode()) |
|---|
| 924 | n/a | |
|---|
| 925 | n/a | def connect(self): |
|---|
| 926 | n/a | """Connect to the host and port specified in __init__.""" |
|---|
| 927 | n/a | self.sock = self._create_connection( |
|---|
| 928 | n/a | (self.host,self.port), self.timeout, self.source_address) |
|---|
| 929 | n/a | self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) |
|---|
| 930 | n/a | |
|---|
| 931 | n/a | if self._tunnel_host: |
|---|
| 932 | n/a | self._tunnel() |
|---|
| 933 | n/a | |
|---|
| 934 | n/a | def close(self): |
|---|
| 935 | n/a | """Close the connection to the HTTP server.""" |
|---|
| 936 | n/a | self.__state = _CS_IDLE |
|---|
| 937 | n/a | try: |
|---|
| 938 | n/a | sock = self.sock |
|---|
| 939 | n/a | if sock: |
|---|
| 940 | n/a | self.sock = None |
|---|
| 941 | n/a | sock.close() # close it manually... there may be other refs |
|---|
| 942 | n/a | finally: |
|---|
| 943 | n/a | response = self.__response |
|---|
| 944 | n/a | if response: |
|---|
| 945 | n/a | self.__response = None |
|---|
| 946 | n/a | response.close() |
|---|
| 947 | n/a | |
|---|
| 948 | n/a | def send(self, data): |
|---|
| 949 | n/a | """Send `data' to the server. |
|---|
| 950 | n/a | ``data`` can be a string object, a bytes object, an array object, a |
|---|
| 951 | n/a | file-like object that supports a .read() method, or an iterable object. |
|---|
| 952 | n/a | """ |
|---|
| 953 | n/a | |
|---|
| 954 | n/a | if self.sock is None: |
|---|
| 955 | n/a | if self.auto_open: |
|---|
| 956 | n/a | self.connect() |
|---|
| 957 | n/a | else: |
|---|
| 958 | n/a | raise NotConnected() |
|---|
| 959 | n/a | |
|---|
| 960 | n/a | if self.debuglevel > 0: |
|---|
| 961 | n/a | print("send:", repr(data)) |
|---|
| 962 | n/a | blocksize = 8192 |
|---|
| 963 | n/a | if hasattr(data, "read") : |
|---|
| 964 | n/a | if self.debuglevel > 0: |
|---|
| 965 | n/a | print("sendIng a read()able") |
|---|
| 966 | n/a | encode = self._is_textIO(data) |
|---|
| 967 | n/a | if encode and self.debuglevel > 0: |
|---|
| 968 | n/a | print("encoding file using iso-8859-1") |
|---|
| 969 | n/a | while 1: |
|---|
| 970 | n/a | datablock = data.read(blocksize) |
|---|
| 971 | n/a | if not datablock: |
|---|
| 972 | n/a | break |
|---|
| 973 | n/a | if encode: |
|---|
| 974 | n/a | datablock = datablock.encode("iso-8859-1") |
|---|
| 975 | n/a | self.sock.sendall(datablock) |
|---|
| 976 | n/a | return |
|---|
| 977 | n/a | try: |
|---|
| 978 | n/a | self.sock.sendall(data) |
|---|
| 979 | n/a | except TypeError: |
|---|
| 980 | n/a | if isinstance(data, collections.Iterable): |
|---|
| 981 | n/a | for d in data: |
|---|
| 982 | n/a | self.sock.sendall(d) |
|---|
| 983 | n/a | else: |
|---|
| 984 | n/a | raise TypeError("data should be a bytes-like object " |
|---|
| 985 | n/a | "or an iterable, got %r" % type(data)) |
|---|
| 986 | n/a | |
|---|
| 987 | n/a | def _output(self, s): |
|---|
| 988 | n/a | """Add a line of output to the current request buffer. |
|---|
| 989 | n/a | |
|---|
| 990 | n/a | Assumes that the line does *not* end with \\r\\n. |
|---|
| 991 | n/a | """ |
|---|
| 992 | n/a | self._buffer.append(s) |
|---|
| 993 | n/a | |
|---|
| 994 | n/a | def _read_readable(self, readable): |
|---|
| 995 | n/a | blocksize = 8192 |
|---|
| 996 | n/a | if self.debuglevel > 0: |
|---|
| 997 | n/a | print("sendIng a read()able") |
|---|
| 998 | n/a | encode = self._is_textIO(readable) |
|---|
| 999 | n/a | if encode and self.debuglevel > 0: |
|---|
| 1000 | n/a | print("encoding file using iso-8859-1") |
|---|
| 1001 | n/a | while True: |
|---|
| 1002 | n/a | datablock = readable.read(blocksize) |
|---|
| 1003 | n/a | if not datablock: |
|---|
| 1004 | n/a | break |
|---|
| 1005 | n/a | if encode: |
|---|
| 1006 | n/a | datablock = datablock.encode("iso-8859-1") |
|---|
| 1007 | n/a | yield datablock |
|---|
| 1008 | n/a | |
|---|
| 1009 | n/a | def _send_output(self, message_body=None, encode_chunked=False): |
|---|
| 1010 | n/a | """Send the currently buffered request and clear the buffer. |
|---|
| 1011 | n/a | |
|---|
| 1012 | n/a | Appends an extra \\r\\n to the buffer. |
|---|
| 1013 | n/a | A message_body may be specified, to be appended to the request. |
|---|
| 1014 | n/a | """ |
|---|
| 1015 | n/a | self._buffer.extend((b"", b"")) |
|---|
| 1016 | n/a | msg = b"\r\n".join(self._buffer) |
|---|
| 1017 | n/a | del self._buffer[:] |
|---|
| 1018 | n/a | self.send(msg) |
|---|
| 1019 | n/a | |
|---|
| 1020 | n/a | if message_body is not None: |
|---|
| 1021 | n/a | |
|---|
| 1022 | n/a | # create a consistent interface to message_body |
|---|
| 1023 | n/a | if hasattr(message_body, 'read'): |
|---|
| 1024 | n/a | # Let file-like take precedence over byte-like. This |
|---|
| 1025 | n/a | # is needed to allow the current position of mmap'ed |
|---|
| 1026 | n/a | # files to be taken into account. |
|---|
| 1027 | n/a | chunks = self._read_readable(message_body) |
|---|
| 1028 | n/a | else: |
|---|
| 1029 | n/a | try: |
|---|
| 1030 | n/a | # this is solely to check to see if message_body |
|---|
| 1031 | n/a | # implements the buffer API. it /would/ be easier |
|---|
| 1032 | n/a | # to capture if PyObject_CheckBuffer was exposed |
|---|
| 1033 | n/a | # to Python. |
|---|
| 1034 | n/a | memoryview(message_body) |
|---|
| 1035 | n/a | except TypeError: |
|---|
| 1036 | n/a | try: |
|---|
| 1037 | n/a | chunks = iter(message_body) |
|---|
| 1038 | n/a | except TypeError: |
|---|
| 1039 | n/a | raise TypeError("message_body should be a bytes-like " |
|---|
| 1040 | n/a | "object or an iterable, got %r" |
|---|
| 1041 | n/a | % type(message_body)) |
|---|
| 1042 | n/a | else: |
|---|
| 1043 | n/a | # the object implements the buffer interface and |
|---|
| 1044 | n/a | # can be passed directly into socket methods |
|---|
| 1045 | n/a | chunks = (message_body,) |
|---|
| 1046 | n/a | |
|---|
| 1047 | n/a | for chunk in chunks: |
|---|
| 1048 | n/a | if not chunk: |
|---|
| 1049 | n/a | if self.debuglevel > 0: |
|---|
| 1050 | n/a | print('Zero length chunk ignored') |
|---|
| 1051 | n/a | continue |
|---|
| 1052 | n/a | |
|---|
| 1053 | n/a | if encode_chunked and self._http_vsn == 11: |
|---|
| 1054 | n/a | # chunked encoding |
|---|
| 1055 | n/a | chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \ |
|---|
| 1056 | n/a | + b'\r\n' |
|---|
| 1057 | n/a | self.send(chunk) |
|---|
| 1058 | n/a | |
|---|
| 1059 | n/a | if encode_chunked and self._http_vsn == 11: |
|---|
| 1060 | n/a | # end chunked transfer |
|---|
| 1061 | n/a | self.send(b'0\r\n\r\n') |
|---|
| 1062 | n/a | |
|---|
| 1063 | n/a | def putrequest(self, method, url, skip_host=False, |
|---|
| 1064 | n/a | skip_accept_encoding=False): |
|---|
| 1065 | n/a | """Send a request to the server. |
|---|
| 1066 | n/a | |
|---|
| 1067 | n/a | `method' specifies an HTTP request method, e.g. 'GET'. |
|---|
| 1068 | n/a | `url' specifies the object being requested, e.g. '/index.html'. |
|---|
| 1069 | n/a | `skip_host' if True does not add automatically a 'Host:' header |
|---|
| 1070 | n/a | `skip_accept_encoding' if True does not add automatically an |
|---|
| 1071 | n/a | 'Accept-Encoding:' header |
|---|
| 1072 | n/a | """ |
|---|
| 1073 | n/a | |
|---|
| 1074 | n/a | # if a prior response has been completed, then forget about it. |
|---|
| 1075 | n/a | if self.__response and self.__response.isclosed(): |
|---|
| 1076 | n/a | self.__response = None |
|---|
| 1077 | n/a | |
|---|
| 1078 | n/a | |
|---|
| 1079 | n/a | # in certain cases, we cannot issue another request on this connection. |
|---|
| 1080 | n/a | # this occurs when: |
|---|
| 1081 | n/a | # 1) we are in the process of sending a request. (_CS_REQ_STARTED) |
|---|
| 1082 | n/a | # 2) a response to a previous request has signalled that it is going |
|---|
| 1083 | n/a | # to close the connection upon completion. |
|---|
| 1084 | n/a | # 3) the headers for the previous response have not been read, thus |
|---|
| 1085 | n/a | # we cannot determine whether point (2) is true. (_CS_REQ_SENT) |
|---|
| 1086 | n/a | # |
|---|
| 1087 | n/a | # if there is no prior response, then we can request at will. |
|---|
| 1088 | n/a | # |
|---|
| 1089 | n/a | # if point (2) is true, then we will have passed the socket to the |
|---|
| 1090 | n/a | # response (effectively meaning, "there is no prior response"), and |
|---|
| 1091 | n/a | # will open a new one when a new request is made. |
|---|
| 1092 | n/a | # |
|---|
| 1093 | n/a | # Note: if a prior response exists, then we *can* start a new request. |
|---|
| 1094 | n/a | # We are not allowed to begin fetching the response to this new |
|---|
| 1095 | n/a | # request, however, until that prior response is complete. |
|---|
| 1096 | n/a | # |
|---|
| 1097 | n/a | if self.__state == _CS_IDLE: |
|---|
| 1098 | n/a | self.__state = _CS_REQ_STARTED |
|---|
| 1099 | n/a | else: |
|---|
| 1100 | n/a | raise CannotSendRequest(self.__state) |
|---|
| 1101 | n/a | |
|---|
| 1102 | n/a | # Save the method we use, we need it later in the response phase |
|---|
| 1103 | n/a | self._method = method |
|---|
| 1104 | n/a | if not url: |
|---|
| 1105 | n/a | url = '/' |
|---|
| 1106 | n/a | request = '%s %s %s' % (method, url, self._http_vsn_str) |
|---|
| 1107 | n/a | |
|---|
| 1108 | n/a | # Non-ASCII characters should have been eliminated earlier |
|---|
| 1109 | n/a | self._output(request.encode('ascii')) |
|---|
| 1110 | n/a | |
|---|
| 1111 | n/a | if self._http_vsn == 11: |
|---|
| 1112 | n/a | # Issue some standard headers for better HTTP/1.1 compliance |
|---|
| 1113 | n/a | |
|---|
| 1114 | n/a | if not skip_host: |
|---|
| 1115 | n/a | # this header is issued *only* for HTTP/1.1 |
|---|
| 1116 | n/a | # connections. more specifically, this means it is |
|---|
| 1117 | n/a | # only issued when the client uses the new |
|---|
| 1118 | n/a | # HTTPConnection() class. backwards-compat clients |
|---|
| 1119 | n/a | # will be using HTTP/1.0 and those clients may be |
|---|
| 1120 | n/a | # issuing this header themselves. we should NOT issue |
|---|
| 1121 | n/a | # it twice; some web servers (such as Apache) barf |
|---|
| 1122 | n/a | # when they see two Host: headers |
|---|
| 1123 | n/a | |
|---|
| 1124 | n/a | # If we need a non-standard port,include it in the |
|---|
| 1125 | n/a | # header. If the request is going through a proxy, |
|---|
| 1126 | n/a | # but the host of the actual URL, not the host of the |
|---|
| 1127 | n/a | # proxy. |
|---|
| 1128 | n/a | |
|---|
| 1129 | n/a | netloc = '' |
|---|
| 1130 | n/a | if url.startswith('http'): |
|---|
| 1131 | n/a | nil, netloc, nil, nil, nil = urlsplit(url) |
|---|
| 1132 | n/a | |
|---|
| 1133 | n/a | if netloc: |
|---|
| 1134 | n/a | try: |
|---|
| 1135 | n/a | netloc_enc = netloc.encode("ascii") |
|---|
| 1136 | n/a | except UnicodeEncodeError: |
|---|
| 1137 | n/a | netloc_enc = netloc.encode("idna") |
|---|
| 1138 | n/a | self.putheader('Host', netloc_enc) |
|---|
| 1139 | n/a | else: |
|---|
| 1140 | n/a | if self._tunnel_host: |
|---|
| 1141 | n/a | host = self._tunnel_host |
|---|
| 1142 | n/a | port = self._tunnel_port |
|---|
| 1143 | n/a | else: |
|---|
| 1144 | n/a | host = self.host |
|---|
| 1145 | n/a | port = self.port |
|---|
| 1146 | n/a | |
|---|
| 1147 | n/a | try: |
|---|
| 1148 | n/a | host_enc = host.encode("ascii") |
|---|
| 1149 | n/a | except UnicodeEncodeError: |
|---|
| 1150 | n/a | host_enc = host.encode("idna") |
|---|
| 1151 | n/a | |
|---|
| 1152 | n/a | # As per RFC 273, IPv6 address should be wrapped with [] |
|---|
| 1153 | n/a | # when used as Host header |
|---|
| 1154 | n/a | |
|---|
| 1155 | n/a | if host.find(':') >= 0: |
|---|
| 1156 | n/a | host_enc = b'[' + host_enc + b']' |
|---|
| 1157 | n/a | |
|---|
| 1158 | n/a | if port == self.default_port: |
|---|
| 1159 | n/a | self.putheader('Host', host_enc) |
|---|
| 1160 | n/a | else: |
|---|
| 1161 | n/a | host_enc = host_enc.decode("ascii") |
|---|
| 1162 | n/a | self.putheader('Host', "%s:%s" % (host_enc, port)) |
|---|
| 1163 | n/a | |
|---|
| 1164 | n/a | # note: we are assuming that clients will not attempt to set these |
|---|
| 1165 | n/a | # headers since *this* library must deal with the |
|---|
| 1166 | n/a | # consequences. this also means that when the supporting |
|---|
| 1167 | n/a | # libraries are updated to recognize other forms, then this |
|---|
| 1168 | n/a | # code should be changed (removed or updated). |
|---|
| 1169 | n/a | |
|---|
| 1170 | n/a | # we only want a Content-Encoding of "identity" since we don't |
|---|
| 1171 | n/a | # support encodings such as x-gzip or x-deflate. |
|---|
| 1172 | n/a | if not skip_accept_encoding: |
|---|
| 1173 | n/a | self.putheader('Accept-Encoding', 'identity') |
|---|
| 1174 | n/a | |
|---|
| 1175 | n/a | # we can accept "chunked" Transfer-Encodings, but no others |
|---|
| 1176 | n/a | # NOTE: no TE header implies *only* "chunked" |
|---|
| 1177 | n/a | #self.putheader('TE', 'chunked') |
|---|
| 1178 | n/a | |
|---|
| 1179 | n/a | # if TE is supplied in the header, then it must appear in a |
|---|
| 1180 | n/a | # Connection header. |
|---|
| 1181 | n/a | #self.putheader('Connection', 'TE') |
|---|
| 1182 | n/a | |
|---|
| 1183 | n/a | else: |
|---|
| 1184 | n/a | # For HTTP/1.0, the server will assume "not chunked" |
|---|
| 1185 | n/a | pass |
|---|
| 1186 | n/a | |
|---|
| 1187 | n/a | def putheader(self, header, *values): |
|---|
| 1188 | n/a | """Send a request header line to the server. |
|---|
| 1189 | n/a | |
|---|
| 1190 | n/a | For example: h.putheader('Accept', 'text/html') |
|---|
| 1191 | n/a | """ |
|---|
| 1192 | n/a | if self.__state != _CS_REQ_STARTED: |
|---|
| 1193 | n/a | raise CannotSendHeader() |
|---|
| 1194 | n/a | |
|---|
| 1195 | n/a | if hasattr(header, 'encode'): |
|---|
| 1196 | n/a | header = header.encode('ascii') |
|---|
| 1197 | n/a | |
|---|
| 1198 | n/a | if not _is_legal_header_name(header): |
|---|
| 1199 | n/a | raise ValueError('Invalid header name %r' % (header,)) |
|---|
| 1200 | n/a | |
|---|
| 1201 | n/a | values = list(values) |
|---|
| 1202 | n/a | for i, one_value in enumerate(values): |
|---|
| 1203 | n/a | if hasattr(one_value, 'encode'): |
|---|
| 1204 | n/a | values[i] = one_value.encode('latin-1') |
|---|
| 1205 | n/a | elif isinstance(one_value, int): |
|---|
| 1206 | n/a | values[i] = str(one_value).encode('ascii') |
|---|
| 1207 | n/a | |
|---|
| 1208 | n/a | if _is_illegal_header_value(values[i]): |
|---|
| 1209 | n/a | raise ValueError('Invalid header value %r' % (values[i],)) |
|---|
| 1210 | n/a | |
|---|
| 1211 | n/a | value = b'\r\n\t'.join(values) |
|---|
| 1212 | n/a | header = header + b': ' + value |
|---|
| 1213 | n/a | self._output(header) |
|---|
| 1214 | n/a | |
|---|
| 1215 | n/a | def endheaders(self, message_body=None, *, encode_chunked=False): |
|---|
| 1216 | n/a | """Indicate that the last header line has been sent to the server. |
|---|
| 1217 | n/a | |
|---|
| 1218 | n/a | This method sends the request to the server. The optional message_body |
|---|
| 1219 | n/a | argument can be used to pass a message body associated with the |
|---|
| 1220 | n/a | request. |
|---|
| 1221 | n/a | """ |
|---|
| 1222 | n/a | if self.__state == _CS_REQ_STARTED: |
|---|
| 1223 | n/a | self.__state = _CS_REQ_SENT |
|---|
| 1224 | n/a | else: |
|---|
| 1225 | n/a | raise CannotSendHeader() |
|---|
| 1226 | n/a | self._send_output(message_body, encode_chunked=encode_chunked) |
|---|
| 1227 | n/a | |
|---|
| 1228 | n/a | def request(self, method, url, body=None, headers={}, *, |
|---|
| 1229 | n/a | encode_chunked=False): |
|---|
| 1230 | n/a | """Send a complete request to the server.""" |
|---|
| 1231 | n/a | self._send_request(method, url, body, headers, encode_chunked) |
|---|
| 1232 | n/a | |
|---|
| 1233 | n/a | def _send_request(self, method, url, body, headers, encode_chunked): |
|---|
| 1234 | n/a | # Honor explicitly requested Host: and Accept-Encoding: headers. |
|---|
| 1235 | n/a | header_names = frozenset(k.lower() for k in headers) |
|---|
| 1236 | n/a | skips = {} |
|---|
| 1237 | n/a | if 'host' in header_names: |
|---|
| 1238 | n/a | skips['skip_host'] = 1 |
|---|
| 1239 | n/a | if 'accept-encoding' in header_names: |
|---|
| 1240 | n/a | skips['skip_accept_encoding'] = 1 |
|---|
| 1241 | n/a | |
|---|
| 1242 | n/a | self.putrequest(method, url, **skips) |
|---|
| 1243 | n/a | |
|---|
| 1244 | n/a | # chunked encoding will happen if HTTP/1.1 is used and either |
|---|
| 1245 | n/a | # the caller passes encode_chunked=True or the following |
|---|
| 1246 | n/a | # conditions hold: |
|---|
| 1247 | n/a | # 1. content-length has not been explicitly set |
|---|
| 1248 | n/a | # 2. the body is a file or iterable, but not a str or bytes-like |
|---|
| 1249 | n/a | # 3. Transfer-Encoding has NOT been explicitly set by the caller |
|---|
| 1250 | n/a | |
|---|
| 1251 | n/a | if 'content-length' not in header_names: |
|---|
| 1252 | n/a | # only chunk body if not explicitly set for backwards |
|---|
| 1253 | n/a | # compatibility, assuming the client code is already handling the |
|---|
| 1254 | n/a | # chunking |
|---|
| 1255 | n/a | if 'transfer-encoding' not in header_names: |
|---|
| 1256 | n/a | # if content-length cannot be automatically determined, fall |
|---|
| 1257 | n/a | # back to chunked encoding |
|---|
| 1258 | n/a | encode_chunked = False |
|---|
| 1259 | n/a | content_length = self._get_content_length(body, method) |
|---|
| 1260 | n/a | if content_length is None: |
|---|
| 1261 | n/a | if body is not None: |
|---|
| 1262 | n/a | if self.debuglevel > 0: |
|---|
| 1263 | n/a | print('Unable to determine size of %r' % body) |
|---|
| 1264 | n/a | encode_chunked = True |
|---|
| 1265 | n/a | self.putheader('Transfer-Encoding', 'chunked') |
|---|
| 1266 | n/a | else: |
|---|
| 1267 | n/a | self.putheader('Content-Length', str(content_length)) |
|---|
| 1268 | n/a | else: |
|---|
| 1269 | n/a | encode_chunked = False |
|---|
| 1270 | n/a | |
|---|
| 1271 | n/a | for hdr, value in headers.items(): |
|---|
| 1272 | n/a | self.putheader(hdr, value) |
|---|
| 1273 | n/a | if isinstance(body, str): |
|---|
| 1274 | n/a | # RFC 2616 Section 3.7.1 says that text default has a |
|---|
| 1275 | n/a | # default charset of iso-8859-1. |
|---|
| 1276 | n/a | body = _encode(body, 'body') |
|---|
| 1277 | n/a | self.endheaders(body, encode_chunked=encode_chunked) |
|---|
| 1278 | n/a | |
|---|
| 1279 | n/a | def getresponse(self): |
|---|
| 1280 | n/a | """Get the response from the server. |
|---|
| 1281 | n/a | |
|---|
| 1282 | n/a | If the HTTPConnection is in the correct state, returns an |
|---|
| 1283 | n/a | instance of HTTPResponse or of whatever object is returned by |
|---|
| 1284 | n/a | the response_class variable. |
|---|
| 1285 | n/a | |
|---|
| 1286 | n/a | If a request has not been sent or if a previous response has |
|---|
| 1287 | n/a | not be handled, ResponseNotReady is raised. If the HTTP |
|---|
| 1288 | n/a | response indicates that the connection should be closed, then |
|---|
| 1289 | n/a | it will be closed before the response is returned. When the |
|---|
| 1290 | n/a | connection is closed, the underlying socket is closed. |
|---|
| 1291 | n/a | """ |
|---|
| 1292 | n/a | |
|---|
| 1293 | n/a | # if a prior response has been completed, then forget about it. |
|---|
| 1294 | n/a | if self.__response and self.__response.isclosed(): |
|---|
| 1295 | n/a | self.__response = None |
|---|
| 1296 | n/a | |
|---|
| 1297 | n/a | # if a prior response exists, then it must be completed (otherwise, we |
|---|
| 1298 | n/a | # cannot read this response's header to determine the connection-close |
|---|
| 1299 | n/a | # behavior) |
|---|
| 1300 | n/a | # |
|---|
| 1301 | n/a | # note: if a prior response existed, but was connection-close, then the |
|---|
| 1302 | n/a | # socket and response were made independent of this HTTPConnection |
|---|
| 1303 | n/a | # object since a new request requires that we open a whole new |
|---|
| 1304 | n/a | # connection |
|---|
| 1305 | n/a | # |
|---|
| 1306 | n/a | # this means the prior response had one of two states: |
|---|
| 1307 | n/a | # 1) will_close: this connection was reset and the prior socket and |
|---|
| 1308 | n/a | # response operate independently |
|---|
| 1309 | n/a | # 2) persistent: the response was retained and we await its |
|---|
| 1310 | n/a | # isclosed() status to become true. |
|---|
| 1311 | n/a | # |
|---|
| 1312 | n/a | if self.__state != _CS_REQ_SENT or self.__response: |
|---|
| 1313 | n/a | raise ResponseNotReady(self.__state) |
|---|
| 1314 | n/a | |
|---|
| 1315 | n/a | if self.debuglevel > 0: |
|---|
| 1316 | n/a | response = self.response_class(self.sock, self.debuglevel, |
|---|
| 1317 | n/a | method=self._method) |
|---|
| 1318 | n/a | else: |
|---|
| 1319 | n/a | response = self.response_class(self.sock, method=self._method) |
|---|
| 1320 | n/a | |
|---|
| 1321 | n/a | try: |
|---|
| 1322 | n/a | try: |
|---|
| 1323 | n/a | response.begin() |
|---|
| 1324 | n/a | except ConnectionError: |
|---|
| 1325 | n/a | self.close() |
|---|
| 1326 | n/a | raise |
|---|
| 1327 | n/a | assert response.will_close != _UNKNOWN |
|---|
| 1328 | n/a | self.__state = _CS_IDLE |
|---|
| 1329 | n/a | |
|---|
| 1330 | n/a | if response.will_close: |
|---|
| 1331 | n/a | # this effectively passes the connection to the response |
|---|
| 1332 | n/a | self.close() |
|---|
| 1333 | n/a | else: |
|---|
| 1334 | n/a | # remember this, so we can tell when it is complete |
|---|
| 1335 | n/a | self.__response = response |
|---|
| 1336 | n/a | |
|---|
| 1337 | n/a | return response |
|---|
| 1338 | n/a | except: |
|---|
| 1339 | n/a | response.close() |
|---|
| 1340 | n/a | raise |
|---|
| 1341 | n/a | |
|---|
| 1342 | n/a | try: |
|---|
| 1343 | n/a | import ssl |
|---|
| 1344 | n/a | except ImportError: |
|---|
| 1345 | n/a | pass |
|---|
| 1346 | n/a | else: |
|---|
| 1347 | n/a | class HTTPSConnection(HTTPConnection): |
|---|
| 1348 | n/a | "This class allows communication via SSL." |
|---|
| 1349 | n/a | |
|---|
| 1350 | n/a | default_port = HTTPS_PORT |
|---|
| 1351 | n/a | |
|---|
| 1352 | n/a | # XXX Should key_file and cert_file be deprecated in favour of context? |
|---|
| 1353 | n/a | |
|---|
| 1354 | n/a | def __init__(self, host, port=None, key_file=None, cert_file=None, |
|---|
| 1355 | n/a | timeout=socket._GLOBAL_DEFAULT_TIMEOUT, |
|---|
| 1356 | n/a | source_address=None, *, context=None, |
|---|
| 1357 | n/a | check_hostname=None): |
|---|
| 1358 | n/a | super(HTTPSConnection, self).__init__(host, port, timeout, |
|---|
| 1359 | n/a | source_address) |
|---|
| 1360 | n/a | if (key_file is not None or cert_file is not None or |
|---|
| 1361 | n/a | check_hostname is not None): |
|---|
| 1362 | n/a | import warnings |
|---|
| 1363 | n/a | warnings.warn("key_file, cert_file and check_hostname are " |
|---|
| 1364 | n/a | "deprecated, use a custom context instead.", |
|---|
| 1365 | n/a | DeprecationWarning, 2) |
|---|
| 1366 | n/a | self.key_file = key_file |
|---|
| 1367 | n/a | self.cert_file = cert_file |
|---|
| 1368 | n/a | if context is None: |
|---|
| 1369 | n/a | context = ssl._create_default_https_context() |
|---|
| 1370 | n/a | will_verify = context.verify_mode != ssl.CERT_NONE |
|---|
| 1371 | n/a | if check_hostname is None: |
|---|
| 1372 | n/a | check_hostname = context.check_hostname |
|---|
| 1373 | n/a | if check_hostname and not will_verify: |
|---|
| 1374 | n/a | raise ValueError("check_hostname needs a SSL context with " |
|---|
| 1375 | n/a | "either CERT_OPTIONAL or CERT_REQUIRED") |
|---|
| 1376 | n/a | if key_file or cert_file: |
|---|
| 1377 | n/a | context.load_cert_chain(cert_file, key_file) |
|---|
| 1378 | n/a | self._context = context |
|---|
| 1379 | n/a | self._check_hostname = check_hostname |
|---|
| 1380 | n/a | |
|---|
| 1381 | n/a | def connect(self): |
|---|
| 1382 | n/a | "Connect to a host on a given (SSL) port." |
|---|
| 1383 | n/a | |
|---|
| 1384 | n/a | super().connect() |
|---|
| 1385 | n/a | |
|---|
| 1386 | n/a | if self._tunnel_host: |
|---|
| 1387 | n/a | server_hostname = self._tunnel_host |
|---|
| 1388 | n/a | else: |
|---|
| 1389 | n/a | server_hostname = self.host |
|---|
| 1390 | n/a | |
|---|
| 1391 | n/a | self.sock = self._context.wrap_socket(self.sock, |
|---|
| 1392 | n/a | server_hostname=server_hostname) |
|---|
| 1393 | n/a | if not self._context.check_hostname and self._check_hostname: |
|---|
| 1394 | n/a | try: |
|---|
| 1395 | n/a | ssl.match_hostname(self.sock.getpeercert(), server_hostname) |
|---|
| 1396 | n/a | except Exception: |
|---|
| 1397 | n/a | self.sock.shutdown(socket.SHUT_RDWR) |
|---|
| 1398 | n/a | self.sock.close() |
|---|
| 1399 | n/a | raise |
|---|
| 1400 | n/a | |
|---|
| 1401 | n/a | __all__.append("HTTPSConnection") |
|---|
| 1402 | n/a | |
|---|
| 1403 | n/a | class HTTPException(Exception): |
|---|
| 1404 | n/a | # Subclasses that define an __init__ must call Exception.__init__ |
|---|
| 1405 | n/a | # or define self.args. Otherwise, str() will fail. |
|---|
| 1406 | n/a | pass |
|---|
| 1407 | n/a | |
|---|
| 1408 | n/a | class NotConnected(HTTPException): |
|---|
| 1409 | n/a | pass |
|---|
| 1410 | n/a | |
|---|
| 1411 | n/a | class InvalidURL(HTTPException): |
|---|
| 1412 | n/a | pass |
|---|
| 1413 | n/a | |
|---|
| 1414 | n/a | class UnknownProtocol(HTTPException): |
|---|
| 1415 | n/a | def __init__(self, version): |
|---|
| 1416 | n/a | self.args = version, |
|---|
| 1417 | n/a | self.version = version |
|---|
| 1418 | n/a | |
|---|
| 1419 | n/a | class UnknownTransferEncoding(HTTPException): |
|---|
| 1420 | n/a | pass |
|---|
| 1421 | n/a | |
|---|
| 1422 | n/a | class UnimplementedFileMode(HTTPException): |
|---|
| 1423 | n/a | pass |
|---|
| 1424 | n/a | |
|---|
| 1425 | n/a | class IncompleteRead(HTTPException): |
|---|
| 1426 | n/a | def __init__(self, partial, expected=None): |
|---|
| 1427 | n/a | self.args = partial, |
|---|
| 1428 | n/a | self.partial = partial |
|---|
| 1429 | n/a | self.expected = expected |
|---|
| 1430 | n/a | def __repr__(self): |
|---|
| 1431 | n/a | if self.expected is not None: |
|---|
| 1432 | n/a | e = ', %i more expected' % self.expected |
|---|
| 1433 | n/a | else: |
|---|
| 1434 | n/a | e = '' |
|---|
| 1435 | n/a | return '%s(%i bytes read%s)' % (self.__class__.__name__, |
|---|
| 1436 | n/a | len(self.partial), e) |
|---|
| 1437 | n/a | def __str__(self): |
|---|
| 1438 | n/a | return repr(self) |
|---|
| 1439 | n/a | |
|---|
| 1440 | n/a | class ImproperConnectionState(HTTPException): |
|---|
| 1441 | n/a | pass |
|---|
| 1442 | n/a | |
|---|
| 1443 | n/a | class CannotSendRequest(ImproperConnectionState): |
|---|
| 1444 | n/a | pass |
|---|
| 1445 | n/a | |
|---|
| 1446 | n/a | class CannotSendHeader(ImproperConnectionState): |
|---|
| 1447 | n/a | pass |
|---|
| 1448 | n/a | |
|---|
| 1449 | n/a | class ResponseNotReady(ImproperConnectionState): |
|---|
| 1450 | n/a | pass |
|---|
| 1451 | n/a | |
|---|
| 1452 | n/a | class BadStatusLine(HTTPException): |
|---|
| 1453 | n/a | def __init__(self, line): |
|---|
| 1454 | n/a | if not line: |
|---|
| 1455 | n/a | line = repr(line) |
|---|
| 1456 | n/a | self.args = line, |
|---|
| 1457 | n/a | self.line = line |
|---|
| 1458 | n/a | |
|---|
| 1459 | n/a | class LineTooLong(HTTPException): |
|---|
| 1460 | n/a | def __init__(self, line_type): |
|---|
| 1461 | n/a | HTTPException.__init__(self, "got more than %d bytes when reading %s" |
|---|
| 1462 | n/a | % (_MAXLINE, line_type)) |
|---|
| 1463 | n/a | |
|---|
| 1464 | n/a | class RemoteDisconnected(ConnectionResetError, BadStatusLine): |
|---|
| 1465 | n/a | def __init__(self, *pos, **kw): |
|---|
| 1466 | n/a | BadStatusLine.__init__(self, "") |
|---|
| 1467 | n/a | ConnectionResetError.__init__(self, *pos, **kw) |
|---|
| 1468 | n/a | |
|---|
| 1469 | n/a | # for backwards compatibility |
|---|
| 1470 | n/a | error = HTTPException |
|---|