'''
dugong.py - Python HTTP Client Module
Copyright (C) Nikolaus Rath <Nikolaus@rath.org>
This module may be distributed under the terms of the Python Software Foundation
License Version 2.
The CaseInsensitiveDict implementation is copyright 2013 Kenneth Reitz and
licensed under the Apache License, Version 2.0
(http://www.apache.org/licenses/LICENSE-2.0)
'''
import socket
import logging
import errno
import ssl
import hashlib
from inspect import getdoc
import textwrap
from base64 import b64encode
from collections import deque
from collections.abc import MutableMapping, Mapping
import email
import email.policy
from http.client import (HTTPS_PORT, HTTP_PORT, NO_CONTENT, NOT_MODIFIED)
from select import select, EPOLLIN, EPOLLOUT
try:
import asyncio
except ImportError:
asyncio = None
__version__ = '1.0'
log = logging.getLogger(__name__)
#: Internal buffer size
BUFFER_SIZE = 64*1024
#: Maximal length of HTTP status line. If the server sends a line longer than
#: this value, `InvalidResponse` will be raised.
MAX_LINE_SIZE = BUFFER_SIZE-1
#: Maximal length of a response header (i.e., for all header
#: lines together). If the server sends a header segment longer than
#: this value, `InvalidResponse` will be raised.
MAX_HEADER_SIZE = BUFFER_SIZE-1
CHUNKED_ENCODING = 'chunked_encoding'
IDENTITY_ENCODING = 'identity_encoding'
#: Marker object for request body size when we're waiting
#: for a 100-continue response from the server
WAITING_FOR_100c = object()
[docs]class PollNeeded(tuple):
'''
This class encapsulates the requirements for a IO operation to continue.
`PollNeeded` instances are typically yielded by coroutines.
'''
__slots__ = ()
def __new__(self, fd, mask):
return tuple.__new__(self, (fd, mask))
@property
[docs] def fd(self):
'''File descriptor that the IO operation depends on'''
return self[0]
@property
[docs] def mask(self):
'''Event mask specifiying the type of required IO
This attribute defines what type of IO the provider of the `PollNeeded`
instance needs to perform on *fd*. It is expected that, when *fd* is
ready for IO of the specified type, operation will continue without
blocking.
The type of IO is specified as a :ref:`epoll <epoll-objects>` compatible
event mask, i.e. a bitwise combination of `!select.EPOLLIN` and
`!select.EPOLLOUT`.
'''
return self[1]
[docs] def poll(self, timeout=None):
'''Wait until fd is ready for requested IO
This is a convenince function that uses `~select.select` to wait until
`.fd` is ready for requested type of IO.
If *timeout* is specified, return `False` if the timeout is exceeded
without the file descriptor becoming ready.
'''
read_fds = (self.fd,) if self.mask & EPOLLIN else ()
write_fds = (self.fd,) if self.mask & EPOLLOUT else ()
log.debug('calling select with %s, %s', read_fds, write_fds)
if timeout is None:
(read_fds, write_fds, _) = select(read_fds, write_fds, ())
else:
(read_fds, write_fds, _) = select(read_fds, write_fds, (), timeout)
return bool(read_fds) or bool(write_fds)
[docs]class HTTPResponse:
'''
This class encapsulates information about HTTP response. Instances of this
class are returned by the `HTTPConnection.read_response` method and have
access to response status, reason, and headers. Response body data
has to be read directly from the `HTTPConnection` instance.
'''
def __init__(self, method, path, status, reason, headers,
length=None):
#: HTTP Method of the request this was response is associated with
self.method = method
#: Path of the request this was response is associated with
self.path = path
#: HTTP status code returned by the server
self.status = status
#: HTTP reason phrase returned by the server
self.reason = reason
#: HTTP Response headers, a `email.message.Message` instance
self.headers = headers
#: Length of the response body or `None`, if not known
self.length = length
[docs]class BodyFollowing:
'''
Sentinel class for the *body* parameter of the
`~HTTPConnection.send_request` method. Passing an instance of this class
declares that body data is going to be provided in separate method calls.
If no length is specified in the constructor, the body data will be send
using chunked encoding.
'''
__slots__ = 'length'
def __init__(self, length=None):
#: the length of the body data that is going to be send, or `None`
#: to use chunked encoding.
self.length = length
class _ChunkTooLong(Exception):
'''
Raised by `_co_readstr_until` if the requested end pattern
cannot be found within the specified byte limit.
'''
pass
class _GeneralError(Exception):
msg = 'General HTTP Error'
def __init__(self, msg=None):
if msg:
self.msg = msg
def __str__(self):
return self.msg
[docs]class StateError(_GeneralError):
'''
Raised when attempting an operation that doesn't make
sense in the current connection state.
'''
msg = 'Operation invalid in current connection state'
[docs]class ExcessBodyData(_GeneralError):
'''
Raised when trying to send more data to the server than
announced.
'''
msg = 'Cannot send larger request body than announced'
[docs]class InvalidResponse(_GeneralError):
'''
Raised if the server produced an invalid response (i.e, something
that is not proper HTTP 1.0 or 1.1).
'''
msg = 'Server sent invalid response'
[docs]class UnsupportedResponse(_GeneralError):
'''
This exception is raised if the server produced a response that is not
supported. This should not happen for servers that are HTTP 1.1 compatible.
If an `UnsupportedResponse` exception has been raised, this typically means
that synchronization with the server will be lost (i.e., dugong cannot
determine where the current response ends and the next response starts), so
the connection needs to be reset by calling the
:meth:`~HTTPConnection.disconnect` method.
'''
msg = 'Server sent unsupported response'
[docs]class ConnectionClosed(_GeneralError):
'''
Raised if the server unexpectedly closed the connection.
'''
msg = 'connection closed unexpectedly'
class _Buffer:
'''
This class represents a buffer with a fixed size, but varying
fill level.
'''
__slots__ = ('d', 'b', 'e')
def __init__(self, size):
#: Holds the actual data
self.d = bytearray(size)
#: Position of the first buffered byte that has not yet
#: been consumed ("*b*eginning")
self.b = 0
#: Fill-level of the buffer ("*e*nd")
self.e = 0
def __len__(self):
'''Return amount of data ready for consumption'''
return self.e - self.b
def clear(self):
'''Forget all buffered data'''
self.b = 0
self.e = 0
def compact(self):
'''Ensure that buffer can be filled up to its maximum size
If part of the buffer data has been consumed, the unconsumed part is
copied to the beginning of the buffer to maximize the available space.
'''
if self.b == 0:
return
log.debug('compacting buffer')
buf = memoryview(self.d)[self.b:self.e]
len_ = len(buf)
self.d = bytearray(len(self.d))
self.d[:len_] = buf
self.b = 0
self.e = len_
def exhaust(self):
'''Return (and consume) all available data'''
if self.b == 0:
log.debug('exhausting buffer (truncating)')
# Return existing buffer after truncating it
buf = self.d
self.d = bytearray(len(self.d))
buf[self.e:] = b''
else:
log.debug('exhausting buffer (copying)')
buf = self.d[self.b:self.e]
self.b = 0
self.e = 0
return buf
[docs]class HTTPConnection:
'''
This class encapsulates a HTTP connection. Methods whose name begin with
``co_`` return coroutines. Instead of blocking, a coroutines will yield
a `PollNeeded` instance that encapsulates information about the IO operation
that would block. The coroutine should be resumed once the operation can be
performed without blocking.
'''
def __init__(self, hostname, port=None, ssl_context=None, proxy=None):
if port is None:
if ssl_context is None:
self.port = HTTP_PORT
else:
self.port = HTTPS_PORT
else:
self.port = port
self.ssl_context = ssl_context
self.hostname = hostname
#: Socket object connecting to the server
self._sock = None
#: Read-buffer
self._rbuf = _Buffer(BUFFER_SIZE)
#: a tuple ``(hostname, port)`` of the proxy server to use or `None`.
#: Note that currently only CONNECT-style proxying is supported.
self.proxy = proxy
#: a deque of ``(method, path, body_len)`` tuples corresponding to
#: requests whose response has not yet been read completely. Requests
#: with Expect: 100-continue will be added twice to this queue, once
#: after the request header has been sent, and once after the request
#: body data has been sent. *body_len* is `None`, or the size of the
#: **request** body that still has to be sent when using 100-continue.
self._pending_requests = deque()
#: This attribute is `None` when a request has been sent completely. If
#: request headers have been sent, but request body data is still
#: pending, it is set to a ``(method, path, body_len)`` tuple. *body_len*
#: is the number of bytes that that still need to send, or
#: WAITING_FOR_100c if we are waiting for a 100 response from the server.
self._out_remaining = None
#: Number of remaining bytes of the current response body (or current
#: chunk), or `None` if the response header has not yet been read.
self._in_remaining = None
#: Transfer encoding of the active response (if any).
self._encoding = None
# Implement bare-bones `io.BaseIO` interface, so that instances
# can be wrapped in `io.TextIOWrapper` if desired.
def writable(self):
return True
def readable(self):
return True
def seekable(self):
return False
# We consider the stream closed if there is no active response
# from which body data could be read.
@property
def closed(self):
return self._in_remaining is None
[docs] def connect(self):
"""Connect to the remote server
This method generally does not need to be called manually.
"""
log.debug('start')
if self.proxy:
log.debug('connecting to %s', self.proxy)
self._sock = socket.create_connection(self.proxy)
eval_coroutine(self._co_tunnel())
else:
log.debug('connecting to %s', (self.hostname, self.port))
self._sock = socket.create_connection((self.hostname, self.port))
if self.ssl_context:
log.debug('establishing ssl layer')
server_hostname = self.hostname if ssl.HAS_SNI else None
self._sock = self.ssl_context.wrap_socket(self._sock, server_hostname=server_hostname)
try:
ssl.match_hostname(self._sock.getpeercert(), self.hostname)
except:
self.close()
raise
self._sock.setblocking(False)
self._rbuf.clear()
self._out_remaining = None
self._in_remaining = None
self._pending_requests = deque()
log.debug('done')
def _co_tunnel(self):
'''Set up CONNECT tunnel to destination server'''
log.debug('start connecting to %s:%d', self.hostname, self.port)
yield from self._co_send(("CONNECT %s:%d HTTP/1.0\r\n\r\n"
% (self.hostname, self.port)).encode('latin1'))
(status, reason) = yield from self._co_read_status()
log.debug('got %03d %s', status, reason)
yield from self._co_read_header()
if status != 200:
self.disconnect()
raise ConnectionError("Tunnel connection failed: %d %s" % (status, reason))
[docs] def get_ssl_peercert(self, binary_form=False):
'''Get peer SSL certificate
If plain HTTP is used, return `None`. Otherwise, the call is delegated
to the underlying SSL sockets `~ssl.SSLSocket.getpeercert` method.
'''
if not self.ssl_context:
return None
else:
if not self._sock:
self.connect()
return self._sock.getpeercert()
[docs] def get_ssl_cipher(self):
'''Get active SSL cipher
If plain HTTP is used, return `None`. Otherwise, the call is delegated
to the underlying SSL sockets `~ssl.SSLSocket.cipher` method.
'''
if not self.ssl_context:
return None
else:
if not self._sock:
self.connect()
return self._sock.cipher()
[docs] def send_request(self, method, path, headers=None, body=None, expect100=False):
'''placeholder, will be replaced dynamically'''
eval_coroutine(self.co_send_request(method, path, headers=headers,
body=body, expect100=expect100))
[docs] def co_send_request(self, method, path, headers=None, body=None, expect100=False):
'''Send a new HTTP request to the server
The message body may be passed in the *body* argument or be sent
separately. In the former case, *body* must be a :term:`bytes-like
object`. In the latter case, *body* must be an a `BodyFollowing`
instance specifying the length of the data that will be sent. If no
length is specified, the data will be send using chunked encoding.
*headers* should be a mapping containing the HTTP headers to be send
with the request. Multiple header lines with the same key are not
supported. It is recommended to pass a `CaseInsensitiveDict` instance,
other mappings will be converted to `CaseInsensitiveDict` automatically.
If *body* is a provided as a :term:`bytes-like object`, a
``Content-MD5`` header is generated automatically unless it has been
provided in *headers* already.
'''
log.debug('start')
if expect100 and not isinstance(body, BodyFollowing):
raise ValueError('expect100 only allowed for separate body')
if self._sock is None:
self.connect()
if self._out_remaining:
raise StateError('body data has not been sent completely yet')
if headers is None:
headers = CaseInsensitiveDict()
elif not isinstance(headers, CaseInsensitiveDict):
headers = CaseInsensitiveDict(headers)
pending_body_size = None
if body is None:
headers['Content-Length'] = '0'
elif isinstance(body, BodyFollowing):
if body.length is None:
raise ValueError('Chunked encoding not yet supported.')
log.debug('preparing to send %d bytes of body data', body.length)
if expect100:
headers['Expect'] = '100-continue'
# Do not set _out_remaining, we must only send data once we've
# read the response. Instead, save body size in
# _pending_requests so that it can be restored by
# read_response().
pending_body_size = body.length
self._out_remaining = (method, path, WAITING_FOR_100c)
else:
self._out_remaining = (method, path, body.length)
headers['Content-Length'] = str(body.length)
body = None
elif isinstance(body, (bytes, bytearray, memoryview)):
headers['Content-Length'] = str(len(body))
if 'Content-MD5' not in headers:
log.debug('computing content-md5')
headers['Content-MD5'] = b64encode(hashlib.md5(body).digest()).decode('ascii')
else:
raise TypeError('*body* must be None, bytes-like or BodyFollowing')
# Generate host header
host = self.hostname
if host.find(':') >= 0:
host = '[{}]'.format(host)
default_port = HTTPS_PORT if self.ssl_context else HTTP_PORT
if self.port == default_port:
headers['Host'] = host
else:
headers['Host'] = '{}:{}'.format(host, self.port)
# Assemble request
headers['Accept-Encoding'] = 'identity'
if 'Connection' not in headers:
headers['Connection'] = 'keep-alive'
request = [ '{} {} HTTP/1.1'.format(method, path).encode('latin1') ]
for key, val in headers.items():
request.append('{}: {}'.format(key, val).encode('latin1'))
request.append(b'')
if body is not None:
request.append(body)
else:
request.append(b'')
buf = b'\r\n'.join(request)
log.debug('sending %s %s', method, path)
yield from self._co_send(buf)
if not self._out_remaining or expect100:
self._pending_requests.append((method, path, pending_body_size))
def _co_send(self, buf):
'''Send *buf* to server'''
log.debug('trying to send %d bytes', len(buf))
if not isinstance(buf, memoryview):
buf = memoryview(buf)
fd = self._sock.fileno()
while True:
if not select((), (fd,), (), 0)[1]:
log.debug('yielding')
yield PollNeeded(fd, EPOLLOUT)
continue
try:
len_ = self._sock.send(buf)
except BrokenPipeError:
raise ConnectionClosed('found closed when trying to write')
except OSError as exc:
if exc.errno == errno.EINVAL:
# Blackhole routing, according to ip(7)
raise ConnectionClosed('ip route goes into black hole')
else:
raise
except InterruptedError:
# According to send(2), this means that no data has been sent
# at all before the interruption, so we just try again.
pass
log.debug('sent %d bytes', len_)
buf = buf[len_:]
if len(buf) == 0:
log.debug('done')
return
[docs] def write(self, buf):
'''placeholder, will be replaced dynamically'''
eval_coroutine(self.co_write(buf))
[docs] def co_write(self, buf):
'''Write request body data
`ExcessBodyData` will be raised when attempting to send more data than
required to complete the request body of the active request.
'''
log.debug('start (len=%d)', len(buf))
if not self._out_remaining:
raise StateError('No active request with pending body data')
(method, path, remaining) = self._out_remaining
if remaining is WAITING_FOR_100c:
raise StateError("can't write when waiting for 100-continue")
if len(buf) > remaining:
raise ExcessBodyData('trying to write %d bytes, but only %d bytes pending'
% (len(buf), remaining))
yield from self._co_send(buf)
len_ = len(buf)
if len_ == remaining:
log.debug('body sent fully')
self._out_remaining = None
self._pending_requests.append((method, path, None))
else:
self._out_remaining = (method, path, remaining - len_)
log.debug('done')
[docs] def response_pending(self):
'''Return `True` if there are still outstanding responses
This includes responses that have been partially read.
'''
return len(self._pending_requests) > 0
[docs] def read_response(self):
'''placeholder, will be replaced dynamically'''
return eval_coroutine(self.co_read_response())
[docs] def co_read_response(self):
'''Read response status line and headers
Return a `HTTPResponse` instance containing information about response
status, reason, and headers. The response body data must be retrieved
separately (e.g. using `.read` or `.readall`).
Even for a response with empty body, one of the body reading method must
be called once before the next response can be processed.
'''
log.debug('start')
if len(self._pending_requests) == 0:
raise StateError('No pending requests')
if self._in_remaining is not None:
raise StateError('Previous response not read completely')
(method, path, body_size) = self._pending_requests[0]
# Need to loop to handle any 1xx responses
while True:
(status, reason) = yield from self._co_read_status()
log.debug('got %03d %s', status, reason)
hstring = yield from self._co_read_header()
header = email.message_from_string(hstring, policy=email.policy.HTTP)
if status < 100 or status > 199:
break
# We are waiting for 100-continue
if body_size is not None and status == 100:
break
# Handle (expected) 100-continue
if status == 100:
assert self._out_remaining == (method, path, WAITING_FOR_100c)
# We're ready to sent request body now
self._out_remaining = self._pending_requests.popleft()
self._in_remaining = None
# Return early, because we don't have to prepare
# for reading the response body at this time
return HTTPResponse(method, path, status, reason, header, length=0)
# Handle non-100 status when waiting for 100-continue
elif body_size is not None:
assert self._out_remaining == (method, path, WAITING_FOR_100c)
# RFC 2616 actually states that the server MAY continue to read
# the request body after it has sent a final status code
# (http://tools.ietf.org/html/rfc2616#section-8.2.3). However,
# that totally defeats the purpose of 100-continue, so we hope
# that the server behaves sanely and does not attempt to read
# the body of a request it has already handled. (As a side note,
# this ambuigity in the RFC also totally breaks HTTP pipelining,
# as we can never be sure if the server is going to expect the
# request or some request body data).
self._out_remaining = None
#
# Prepare to read body
#
body_length = None
tc = header['Transfer-Encoding']
if tc:
tc = tc.lower()
if tc and tc == 'chunked':
log.debug('Chunked encoding detected')
self._encoding = CHUNKED_ENCODING
self._in_remaining = 0
elif tc and tc != 'identity':
# Server must not sent anything other than identity or chunked, so
# we raise InvalidResponse rather than UnsupportedResponse. We defer
# raising the exception to read(), so that we can still return the
# headers and status (and don't fail if the response body is empty).
log.warning('Server uses invalid response encoding "%s"', tc)
self._encoding = InvalidResponse('Cannot handle %s encoding' % tc)
else:
log.debug('identity encoding detected')
self._encoding = IDENTITY_ENCODING
# does the body have a fixed length? (of zero)
if (status == NO_CONTENT or status == NOT_MODIFIED or
100 <= status < 200 or method == 'HEAD'):
log.debug('no content by RFC')
body_length = 0
self._in_remaining = 0
# for these cases, there isn't even a zero chunk we could read
self._encoding = IDENTITY_ENCODING
# Chunked doesn't require content-length
elif self._encoding is CHUNKED_ENCODING:
pass
# Otherwise we require a content-length. We defer raising the exception
# to read(), so that we can still return the headers and status.
elif ('Content-Length' not in header
and not isinstance(self._encoding, InvalidResponse)):
log.debug('no content length and no chunkend encoding, will raise on read')
self._encoding = UnsupportedResponse('No content-length and no chunked encoding')
self._in_remaining = 0
else:
self._in_remaining = int(header['Content-Length'])
body_length = self._in_remaining
log.debug('done (in_remaining=%d)', self._in_remaining)
return HTTPResponse(method, path, status, reason, header, body_length)
def _co_read_status(self):
'''Read response line'''
log.debug('start')
# read status
try:
line = yield from self._co_readstr_until(b'\r\n', MAX_LINE_SIZE)
except _ChunkTooLong:
raise InvalidResponse('server send ridicously long status line')
try:
version, status, reason = line.split(None, 2)
except ValueError:
try:
version, status = line.split(None, 1)
reason = ""
except ValueError:
# empty version will cause next test to fail.
version = ""
if not version.startswith("HTTP/1"):
raise UnsupportedResponse('%s not supported' % version)
# The status code is a three-digit number
try:
status = int(status)
if status < 100 or status > 999:
raise InvalidResponse('%d is not a valid status' % status)
except ValueError:
raise InvalidResponse('%s is not a valid status' % status)
log.debug('done')
return (status, reason.strip())
def _co_read_header(self):
'''Read response header'''
log.debug('start')
# Peek into buffer. If the first characters are \r\n, then the header
# is empty (so our search for \r\n\r\n would fail)
rbuf = self._rbuf
if len(rbuf) < 2:
yield from self._co_fill_buffer(2)
if rbuf.d[rbuf.b:rbuf.b+2] == b'\r\n':
log.debug('done (empty header)')
rbuf.b += 2
return ''
try:
hstring = yield from self._co_readstr_until(b'\r\n\r\n', MAX_HEADER_SIZE)
except _ChunkTooLong:
raise InvalidResponse('server sent ridicously long header')
log.debug('done (%d characters)', len(hstring))
return hstring
[docs] def read(self, len_=None):
'''placeholder, will be replaced dynamically'''
if len_ is None:
return self.readall()
return eval_coroutine(self.co_read(len_))
[docs] def co_read(self, len_=None):
'''Read up to *len_* bytes of response body data
This method may return less than *len_* bytes, but will return ``b''`` only
if the response body has been read completely. Further attempts to read
more data after ``b''`` has been returned will result in `StateError` being
raised.
If *len_* is `None`, this method returns the entire response body. Further
calls will not return ``b''`` but directly raise `StateError`.
'''
log.debug('start (len=%d)', len_)
if len_ is None:
return (yield from self.co_readall())
if len_ == 0:
return b''
if self._in_remaining is None:
raise StateError('No active response with body')
if self._encoding is IDENTITY_ENCODING:
return (yield from self._co_read_id(len_))
elif self._encoding is CHUNKED_ENCODING:
return (yield from self._co_read_chunked(len_=len_))
elif isinstance(self._encoding, Exception):
raise self._encoding
else:
raise RuntimeError('ooops, this should not be possible')
[docs] def readinto(self, buf):
'''placeholder, will be replaced dynamically'''
return eval_coroutine(self.co_readinto(buf))
[docs] def co_readinto(self, buf):
'''Read response body data into *buf*
Return the number of bytes written or zero if the response body has been
read completely. Further attempts to read more data after zero has been
returned will result in `StateError` being raised.
*buf* must implement the memoryview protocol.
'''
log.debug('start (buflen=%d)', len(buf))
if len(buf) == 0:
return 0
if self._in_remaining is None:
raise StateError('No active response with body')
if self._encoding is IDENTITY_ENCODING:
return (yield from self._co_readinto_id(buf))
elif self._encoding is CHUNKED_ENCODING:
return (yield from self._co_read_chunked(buf=buf))
elif isinstance(self._encoding, Exception):
raise self._encoding
else:
raise RuntimeError('ooops, this should not be possible')
def _co_read_id(self, len_):
'''Read up to *len* bytes of response body assuming identity encoding'''
log.debug('start (len=%d)', len_)
assert self._in_remaining is not None
if not self._in_remaining:
# Body retrieved completely, clean up
self._in_remaining = None
self._pending_requests.popleft()
return b''
sock_fd = self._sock.fileno()
rbuf = self._rbuf
len_ = min(len_, self._in_remaining)
log.debug('updated len_=%d', len_)
# Buffer might be empty, but have no capacity. This is handled
# in _try_fill_buffer(), but we have to take this into account
# in the while condition already.
if rbuf.b == rbuf.e:
rbuf.b = 0
rbuf.e = 0
# Loop while we could return more data than we have buffered
# and buffer is not full
while len(rbuf) < len_ and rbuf.e < len(rbuf.d):
got_data = self._try_fill_buffer()
if not got_data and not rbuf:
log.debug('buffer empty and nothing to read, yielding..')
yield PollNeeded(sock_fd, EPOLLIN)
elif not got_data:
log.debug('nothing more to read')
break
len_ = min(len_, len(rbuf))
self._in_remaining -= len_
if len_ < len(rbuf):
buf = rbuf.d[rbuf.b:rbuf.b+len_]
rbuf.b += len_
else:
buf = rbuf.exhaust()
log.debug('done (%d bytes)', len(buf))
return buf
def _co_readinto_id(self, buf):
'''Read response body into *buf* assuming identity encoding'''
log.debug('start (buflen=%d)', len(buf))
assert self._in_remaining is not None
if not self._in_remaining:
# Body retrieved completely, clean up
self._in_remaining = None
self._pending_requests.popleft()
return 0
sock_fd = self._sock.fileno()
rbuf = self._rbuf
if not isinstance(buf, memoryview):
buf = memoryview(buf)
len_ = min(len(buf), self._in_remaining)
log.debug('updated len_=%d', len_)
# First use read buffer contents
pos = min(len(rbuf), len_)
if pos:
log.debug('using buffered data')
buf[:pos] = rbuf.d[rbuf.b:rbuf.b+pos]
rbuf.b += pos
self._in_remaining -= pos
# If we've read enough, return immediately
if pos == len_:
log.debug('done (got all we need, %d bytes)', pos)
return pos
# Otherwise, prepare to read more from socket
log.debug('got %d bytes from buffer', pos)
assert not len(rbuf)
while True:
log.debug('trying to read from socket')
try:
read = self._sock.recv_into(buf[pos:len_])
except (socket.timeout, ssl.SSLWantReadError, BlockingIOError):
if pos:
log.debug('done (nothing more to read, got %d bytes)', pos)
return pos
else:
log.debug('no data yet and nothing to read, yielding..')
yield PollNeeded(sock_fd, EPOLLIN)
continue
if not read:
raise ConnectionClosed('connection closed unexpectedly')
log.debug('got %d bytes', read)
self._in_remaining -= read
pos += read
if pos == len_:
log.debug('done (got all we need, %d bytes)', pos)
return pos
def _co_read_chunked(self, len_=None, buf=None):
'''Read response body assuming chunked encoding
If *len_* is not `None`, reads up to *len_* bytes of data and returns
a `bytes-like object`. If *buf* is not `None`, reads data into *buf*.
'''
# TODO: In readinto mode, we always need an extra sock.recv()
# to get the chunk trailer.. is there some way to avoid that? And
# maybe also put the beginning of the next chunk into the read buffer right away?
log.debug('start (%s mode)', 'readinto' if buf else 'read')
assert (len_ is None) != (buf is None)
assert bool(len_) or bool(buf)
assert self._in_remaining is not None
if self._in_remaining == 0:
log.debug('starting next chunk')
try:
line = yield from self._co_readstr_until(b'\r\n', MAX_LINE_SIZE)
except _ChunkTooLong:
raise InvalidResponse('could not find next chunk marker')
i = line.find(";")
if i >= 0:
log.debug('stripping chunk extensions: %s', line[i:])
line = line[:i] # strip chunk-extensions
try:
self._in_remaining = int(line, 16)
except ValueError:
raise InvalidResponse('Cannot read chunk size %r' % line[:20])
log.debug('chunk size is %d', self._in_remaining)
if self._in_remaining == 0:
self._in_remaining = None
self._pending_requests.popleft()
if self._in_remaining is None:
res = 0 if buf else b''
elif buf:
res = yield from self._co_readinto_id(buf)
else:
res = yield from self._co_read_id(len_)
if not self._in_remaining:
log.debug('chunk complete')
yield from self._co_read_header()
log.debug('done')
return res
def _co_readstr_until(self, substr, maxsize):
'''Read from server until *substr*, and decode to latin1
If *substr* cannot be found in the next *maxsize* bytes,
raises `_ChunkTooLong`.
'''
if not isinstance(substr, (bytes, bytearray, memoryview)):
raise TypeError('*substr* must be bytes-like')
log.debug('reading until %s', substr)
sock_fd = self._sock.fileno()
rbuf = self._rbuf
sub_len = len(substr)
# Make sure that substr cannot be split over more than one part
assert len(rbuf.d) > sub_len
parts = []
while True:
# substr may be split between last part and current buffer
# This isn't very performant, but it should be pretty rare
if parts and sub_len > 1:
buf = _join((parts[-1][-sub_len:],
rbuf.d[rbuf.b:min(rbuf.e, rbuf.b+sub_len-1)]))
idx = buf.find(substr)
if idx >= 0:
idx -= sub_len
break
#log.debug('rbuf is: %s', rbuf.d[rbuf.b:min(rbuf.e, rbuf.b+512)])
stop = min(rbuf.e, rbuf.b + maxsize)
idx = rbuf.d.find(substr, rbuf.b, stop)
if idx >= 0: # found
break
if stop != rbuf.e:
raise _ChunkTooLong()
# If buffer is full, store away the part that we need for sure
if rbuf.e == len(rbuf.d):
log.debug('buffer is full, storing part')
buf = rbuf.exhaust()
parts.append(buf)
maxsize -= len(buf)
# Refill buffer
while not self._try_fill_buffer():
log.debug('need more data, yielding')
yield PollNeeded(sock_fd, EPOLLIN)
log.debug('found substr at %d', idx)
idx += len(substr)
buf = rbuf.d[rbuf.b:idx]
rbuf.b = idx
if parts:
parts.append(buf)
buf = _join(parts)
try:
return buf.decode('latin1')
except UnicodeDecodeError:
raise InvalidResponse('server response cannot be decoded to latin1')
def _try_fill_buffer(self):
'''Try to fill up read buffer
Returns the number of bytes read into buffer, or `None` if no
data was available on the socket. May raise `ConnectionClosed`.
'''
log.debug('start')
rbuf = self._rbuf
# If no capacity or empty, reset
# (this clause cover *both* cases)
if rbuf.d == rbuf.e:
rbuf.e = 0
rbuf.d = 0
try:
len_ = self._sock.recv_into(memoryview(rbuf.d)[rbuf.e:])
except (socket.timeout, ssl.SSLWantReadError, BlockingIOError):
log.debug('done (nothing ready)')
return None
if not len_:
assert rbuf.e < len(rbuf.d)
raise ConnectionClosed('connection closed unexpectedly')
rbuf.e += len_
log.debug('done (got %d bytes)', len_)
return len_
def _co_fill_buffer(self, len_):
'''Make sure that there are at least *len_* bytes in buffer'''
rbuf = self._rbuf
sock_fd = self._sock.fileno()
while len(rbuf) < len_:
if len(rbuf.d) - rbuf.b < len_:
self._rbuf.compact()
if not self._try_fill_buffer():
yield PollNeeded(sock_fd, EPOLLIN)
[docs] def readall(self):
'''placeholder, will be replaced dynamically'''
return eval_coroutine(self.co_readall())
[docs] def co_readall(self):
'''Read and return complete response body
After this function has returned, attemps to read more body data
for the same response will raise `StateError`.
'''
log.debug('start')
parts = []
while True:
buf = yield from self.co_read(BUFFER_SIZE)
log.debug('got %d bytes', len(buf))
if not buf:
break
parts.append(buf)
buf = _join(parts)
log.debug('done (%d bytes)', len(buf))
return buf
[docs] def discard(self):
'''placeholder, will be replaced dynamically'''
return eval_coroutine(self.co_discard())
[docs] def co_discard(self):
'''Read and discard current response body
After this function has returned, attempts to read more body data
for the same response will raise `StateError`.
'''
log.debug('start')
buf = memoryview(bytearray(BUFFER_SIZE))
while True:
len_ = yield from self.co_readinto(buf)
if not len_:
break
log.debug('discarding %d bytes', len_)
log.debug('done')
[docs] def disconnect(self):
'''Close HTTP connection'''
log.debug('start')
if self._sock:
try:
self._sock.shutdown(socket.SHUT_RDWR)
except OSError:
# When called to reset after connection problems, socket
# may have shut down already.
pass
self._sock.close()
self._sock = None
self._rbuf.clear()
else:
log.debug('already closed')
def _extend_HTTPConnection_docstrings():
co_suffix = '\n\n' + textwrap.fill(
'This method returns a coroutine. `.%s` is a regular method '
'implementing the same functionality.', width=78)
reg_suffix = '\n\n' + textwrap.fill(
'This method may block. `.co_%s` provides a coroutine '
'implementing the same functionality without blocking.', width=78)
for name in ('read', 'read_response', 'readall', 'readinto', 'send_request',
'write', 'discard'):
fn = getattr(HTTPConnection, name)
cofn = getattr(HTTPConnection, 'co_' + name)
fn.__doc__ = getdoc(cofn) + reg_suffix % name
cofn.__doc__ = getdoc(cofn) + co_suffix % name
_extend_HTTPConnection_docstrings()
def _join(parts):
'''Join a sequence of byte-like objects
This method is necessary because `bytes.join` does not work with
memoryviews.
'''
size = 0
for part in parts:
size += len(parts)
buf = bytearray(size)
i = 0
for part in parts:
len_ = len(part)
buf[i:i+len_] = part
i += len_
return buf
def eval_coroutine(crt):
'''Evaluate *crt* (polling as needed) and return its result'''
try:
while True:
assert next(crt).poll()
log.debug('polling')
except StopIteration as exc:
return exc.value
[docs]def is_temp_network_error(exc):
'''Return true if *exc* represents a potentially temporary network problem'''
if isinstance(exc, (socket.timeout, ConnectionError, TimeoutError, InterruptedError,
ConnectionClosed, ssl.SSLZeroReturnError, ssl.SSLEOFError,
ssl.SSLSyscallError)):
return True
# Formally this is a permanent error. However, it may also indicate
# that there is currently no network connection to the DNS server
elif (isinstance(exc, (socket.gaierror, socket.herror))
and exc.errno in (socket.EAI_AGAIN, socket.EAI_NONAME)):
return True
return False
[docs]class CaseInsensitiveDict(MutableMapping):
"""A case-insensitive `dict`-like object.
Implements all methods and operations of
:class:`collections.abc.MutableMapping` as well as `.copy`.
All keys are expected to be strings. The structure remembers the case of the
last key to be set, and :meth:`!iter`, :meth:`!keys` and :meth:`!items` will
contain case-sensitive keys. However, querying and contains testing is case
insensitive::
cid = CaseInsensitiveDict()
cid['Accept'] = 'application/json'
cid['aCCEPT'] == 'application/json' # True
list(cid) == ['Accept'] # True
For example, ``headers['content-encoding']`` will return the value of a
``'Content-Encoding'`` response header, regardless of how the header name
was originally stored.
If the constructor, :meth:`!update`, or equality comparison operations are
given multiple keys that have equal lower-case representions, the behavior
is undefined.
"""
def __init__(self, data=None, **kwargs):
self._store = dict()
if data is None:
data = {}
self.update(data, **kwargs)
def __setitem__(self, key, value):
# Use the lowercased key for lookups, but store the actual
# key alongside the value.
self._store[key.lower()] = (key, value)
def __getitem__(self, key):
return self._store[key.lower()][1]
def __delitem__(self, key):
del self._store[key.lower()]
def __iter__(self):
return (casedkey for casedkey, mappedvalue in self._store.values())
def __len__(self):
return len(self._store)
[docs] def lower_items(self):
"""Like :meth:`!items`, but with all lowercase keys."""
return (
(lowerkey, keyval[1])
for (lowerkey, keyval)
in self._store.items()
)
def __eq__(self, other):
if isinstance(other, Mapping):
other = CaseInsensitiveDict(other)
else:
return NotImplemented
# Compare insensitively
return dict(self.lower_items()) == dict(other.lower_items())
# Copy is required
def copy(self):
return CaseInsensitiveDict(self._store.values())
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
if asyncio:
class AioFuture(asyncio.Future):
'''
This class wraps a coroutine that yields `PollNeeded` instances
into an `asyncio` compatible `~asyncio.Future`.
This is done by registering a callback with the event loop that resumes
the coroutine when the requested IO is available.
'''
#: Set of fds that that any `_Future` instance currently has
#: read callbacks registered for (class attribute)
_read_fds = dict()
#: Set of fds that that any `_Future` instance currently has
#: write callbacks registered for (class attribute)
_write_fds = dict()
def __init__(self, crt, loop=None):
super().__init__(loop=loop)
self._crt = crt
#: The currently pending io request (that we have registered
#: callbacks for).
self._io_req = None
self._loop.call_soon(self._resume_crt)
def _resume_crt(self, exc=None):
'''Resume coroutine
If coroutine has completed, mark self as done. Otherwise, reschedule
call when requested io is available. If *exc* is specified, raise
*exc* in coroutine.
'''
log.debug('start')
try:
if exc is not None:
io_req = self._crt.throw(exc)
else:
io_req = next(self._crt)
except Exception as exc:
if isinstance(exc, StopIteration):
log.debug('coroutine completed')
self.set_result(exc.value)
else:
log.debug('coroutine raised exception')
self.set_exception(exc)
io_req = self._io_req
if io_req:
# This is a bit fragile.. what if there is more than one
# reader or writer? However, in practice this should not be
# the case: they would read or write unpredictable parts of
# the input/output.
if io_req.mask & EPOLLIN:
self._loop.remove_reader(io_req.fd)
del self._read_fds[io_req.fd]
if io_req.mask & EPOLLOUT:
self._loop.remove_writer(io_req.fd)
del self._write_fds[io_req.fd]
self._io_req = None
return
if not isinstance(io_req, PollNeeded):
self._loop.call_soon(self._resume_crt,
TypeError('Coroutine passed to asyncio_future did not yield '
'PollNeeded instance!'))
return
if io_req.mask & EPOLLIN:
reader = self._read_fds.get(io_req.fd, None)
if reader is None:
log.debug('got poll needed, registering reader')
self._loop.add_reader(io_req.fd, self._resume_crt)
self._read_fds[io_req.fd] = self
elif reader is self:
log.debug('got poll needed, reusing read callback')
else:
self._loop.call_soon(self._resume_crt,
RuntimeError('There is already a read callback for this socket'))
return
if io_req.mask & EPOLLOUT:
writer = self._read_fds.get(io_req.fd, None)
if writer is None:
log.debug('got poll needed, registering writer')
self._loop.add_writer(io_req.fd, self._resume_crt)
self._write_fds[io_req.fd] = self
elif writer is self:
log.debug('got poll needed, reusing write callback')
else:
self._loop.call_soon(self._resume_crt,
RuntimeError('There is already a write callback for this socket'))
return
self._io_req = io_req