You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			461 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			461 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software
 | 
						|
# Foundation; All Rights Reserved
 | 
						|
 | 
						|
"""A HTTPSConnection/Handler with additional proxy and cert validation features.
 | 
						|
 | 
						|
In particular, monkey patches in Python r74203 to provide support for CONNECT
 | 
						|
proxies and adds SSL cert validation if the ssl module is present.
 | 
						|
"""
 | 
						|
 | 
						|
__author__ = "{frew,nick.johnson}@google.com (Fred Wulff and Nick Johnson)"
 | 
						|
 | 
						|
import base64
 | 
						|
import httplib
 | 
						|
import logging
 | 
						|
import socket
 | 
						|
from urllib import splitpasswd
 | 
						|
from urllib import splittype
 | 
						|
from urllib import splituser
 | 
						|
import urllib2
 | 
						|
 | 
						|
 | 
						|
class InvalidCertificateException(httplib.HTTPException):
 | 
						|
  """Raised when a certificate is provided with an invalid hostname."""
 | 
						|
 | 
						|
  def __init__(self, host, cert, reason):
 | 
						|
    """Constructor.
 | 
						|
 | 
						|
    Args:
 | 
						|
      host: The hostname the connection was made to.
 | 
						|
      cert: The SSL certificate (as a dictionary) the host returned.
 | 
						|
      reason: user readable error reason.
 | 
						|
    """
 | 
						|
    httplib.HTTPException.__init__(self)
 | 
						|
    self.host = host
 | 
						|
    self.cert = cert
 | 
						|
    self.reason = reason
 | 
						|
 | 
						|
  def __str__(self):
 | 
						|
    return ("Host %s returned an invalid certificate (%s): %s\n"
 | 
						|
            "To learn more, see "
 | 
						|
            "http://code.google.com/appengine/kb/general.html#rpcssl" %
 | 
						|
            (self.host, self.reason, self.cert))
 | 
						|
 | 
						|
 | 
						|
try:
 | 
						|
  import ssl
 | 
						|
  _CAN_VALIDATE_CERTS = True
 | 
						|
except ImportError:
 | 
						|
  _CAN_VALIDATE_CERTS = False
 | 
						|
 | 
						|
 | 
						|
def can_validate_certs():
 | 
						|
  """Return True if we have the SSL package and can validate certificates."""
 | 
						|
  return _CAN_VALIDATE_CERTS
 | 
						|
 | 
						|
 | 
						|
# Reexport SSLError so clients don't have to to do their own checking for ssl's
 | 
						|
# existence.
 | 
						|
if can_validate_certs():
 | 
						|
  SSLError = ssl.SSLError
 | 
						|
else:
 | 
						|
  SSLError = None
 | 
						|
 | 
						|
 | 
						|
def create_fancy_connection(tunnel_host=None, key_file=None,
 | 
						|
                            cert_file=None, ca_certs=None,
 | 
						|
                            proxy_authorization=None):
 | 
						|
  # This abomination brought to you by the fact that
 | 
						|
  # the HTTPHandler creates the connection instance in the middle
 | 
						|
  # of do_open so we need to add the tunnel host to the class.
 | 
						|
 | 
						|
  class PresetProxyHTTPSConnection(httplib.HTTPSConnection):
 | 
						|
    """An HTTPS connection that uses a proxy defined by the enclosing scope."""
 | 
						|
 | 
						|
    def __init__(self, *args, **kwargs):
 | 
						|
      httplib.HTTPSConnection.__init__(self, *args, **kwargs)
 | 
						|
 | 
						|
      self._tunnel_host = tunnel_host
 | 
						|
      if tunnel_host:
 | 
						|
        logging.debug("Creating preset proxy https conn: %s", tunnel_host)
 | 
						|
 | 
						|
      self.key_file = key_file
 | 
						|
      self.cert_file = cert_file
 | 
						|
      self.ca_certs = ca_certs
 | 
						|
      if can_validate_certs():
 | 
						|
        if self.ca_certs:
 | 
						|
          self.cert_reqs = ssl.CERT_REQUIRED
 | 
						|
        else:
 | 
						|
          self.cert_reqs = ssl.CERT_NONE
 | 
						|
 | 
						|
    def _get_hostport(self, host, port):
 | 
						|
      # Python 2.7.7rc1 (hg r90728:568041fd8090), 3.4.1 and 3.5 rename
 | 
						|
      # _set_hostport to _get_hostport and changes it's functionality.  The
 | 
						|
      # Python 2.7.7rc1 version of this method is included here for
 | 
						|
      # compatibility with earlier versions of Python.  Without this, HTTPS over
 | 
						|
      # HTTP CONNECT proxies cannot be used.
 | 
						|
 | 
						|
      # This method may be removed if compatibility with Python <2.7.7rc1 is not
 | 
						|
      # required.
 | 
						|
 | 
						|
      # Python bug: http://bugs.python.org/issue7776
 | 
						|
      if port is None:
 | 
						|
        i = host.rfind(":")
 | 
						|
        j = host.rfind("]")         # ipv6 addresses have [...]
 | 
						|
        if i > j:
 | 
						|
          try:
 | 
						|
            port = int(host[i+1:])
 | 
						|
          except ValueError:
 | 
						|
            if host[i+1:] == "":  # http://foo.com:/ == http://foo.com/
 | 
						|
              port = self.default_port
 | 
						|
            else:
 | 
						|
              raise httplib.InvalidURL("nonnumeric port: '%s'" % host[i+1:])
 | 
						|
          host = host[:i]
 | 
						|
        else:
 | 
						|
          port = self.default_port
 | 
						|
        if host and host[0] == "[" and host[-1] == "]":
 | 
						|
          host = host[1:-1]
 | 
						|
 | 
						|
      return (host, port)
 | 
						|
 | 
						|
    def _tunnel(self):
 | 
						|
      self.host, self.port = self._get_hostport(self._tunnel_host, None)
 | 
						|
      logging.info("Connecting through tunnel to: %s:%d",
 | 
						|
                   self.host, self.port)
 | 
						|
 | 
						|
      self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port))
 | 
						|
 | 
						|
      if proxy_authorization:
 | 
						|
        self.send("Proxy-Authorization: %s\r\n" % proxy_authorization)
 | 
						|
 | 
						|
      # blank line
 | 
						|
      self.send("\r\n")
 | 
						|
 | 
						|
      response = self.response_class(self.sock, strict=self.strict,
 | 
						|
                                     method=self._method)
 | 
						|
      # pylint: disable=protected-access
 | 
						|
      (_, code, message) = response._read_status()
 | 
						|
 | 
						|
      if code != 200:
 | 
						|
        self.close()
 | 
						|
        raise socket.error("Tunnel connection failed: %d %s" %
 | 
						|
                           (code, message.strip()))
 | 
						|
 | 
						|
      while True:
 | 
						|
        line = response.fp.readline()
 | 
						|
        if line == "\r\n":
 | 
						|
          break
 | 
						|
 | 
						|
    def _get_valid_hosts_for_cert(self, cert):
 | 
						|
      """Returns a list of valid host globs for an SSL certificate.
 | 
						|
 | 
						|
      Args:
 | 
						|
        cert: A dictionary representing an SSL certificate.
 | 
						|
      Returns:
 | 
						|
        list: A list of valid host globs.
 | 
						|
      """
 | 
						|
      if "subjectAltName" in cert:
 | 
						|
        return [x[1] for x in cert["subjectAltName"] if x[0].lower() == "dns"]
 | 
						|
      else:
 | 
						|
        # Return a list of commonName fields
 | 
						|
        return [x[0][1] for x in cert["subject"]
 | 
						|
                if x[0][0].lower() == "commonname"]
 | 
						|
 | 
						|
    def _validate_certificate_hostname(self, cert, hostname):
 | 
						|
      """Perform RFC2818/6125 validation against a cert and hostname.
 | 
						|
 | 
						|
      Args:
 | 
						|
        cert: A dictionary representing an SSL certificate.
 | 
						|
        hostname: The hostname to test.
 | 
						|
      Returns:
 | 
						|
        bool: Whether or not the hostname is valid for this certificate.
 | 
						|
      """
 | 
						|
      hosts = self._get_valid_hosts_for_cert(cert)
 | 
						|
      for host in hosts:
 | 
						|
        # Wildcards are only valid when the * exists at the end of the last
 | 
						|
        # (left-most) label, and there are at least 3 labels in the expression.
 | 
						|
        if ("*." in host and host.count("*") == 1 and
 | 
						|
            host.count(".") > 1 and "." in hostname):
 | 
						|
          left_expected, right_expected = host.split("*.")
 | 
						|
          left_hostname, right_hostname = hostname.split(".", 1)
 | 
						|
          if (left_hostname.startswith(left_expected) and
 | 
						|
              right_expected == right_hostname):
 | 
						|
            return True
 | 
						|
        elif host == hostname:
 | 
						|
          return True
 | 
						|
      return False
 | 
						|
 | 
						|
    def connect(self):
 | 
						|
      # TODO(frew): When we drop support for <2.6 (in the far distant future),
 | 
						|
      # change this to socket.create_connection.
 | 
						|
      self.sock = _create_connection((self.host, self.port))
 | 
						|
 | 
						|
      if self._tunnel_host:
 | 
						|
        self._tunnel()
 | 
						|
 | 
						|
      # ssl and FakeSocket got deprecated. Try for the new hotness of wrap_ssl,
 | 
						|
      # with fallback. Note: Since can_validate_certs() just checks for the
 | 
						|
      # ssl module, it's equivalent to attempting to import ssl from
 | 
						|
      # the function, but doesn't require a dynamic import, which doesn't
 | 
						|
      # play nicely with dev_appserver.
 | 
						|
      if can_validate_certs():
 | 
						|
        self.sock = ssl.wrap_socket(self.sock,
 | 
						|
                                    keyfile=self.key_file,
 | 
						|
                                    certfile=self.cert_file,
 | 
						|
                                    ca_certs=self.ca_certs,
 | 
						|
                                    cert_reqs=self.cert_reqs)
 | 
						|
 | 
						|
        if self.cert_reqs & ssl.CERT_REQUIRED:
 | 
						|
          cert = self.sock.getpeercert()
 | 
						|
          hostname = self.host.split(":", 0)[0]
 | 
						|
          if not self._validate_certificate_hostname(cert, hostname):
 | 
						|
            raise InvalidCertificateException(hostname, cert,
 | 
						|
                                              "hostname mismatch")
 | 
						|
      else:
 | 
						|
        ssl_socket = socket.ssl(self.sock,
 | 
						|
                                keyfile=self.key_file,
 | 
						|
                                certfile=self.cert_file)
 | 
						|
        self.sock = httplib.FakeSocket(self.sock, ssl_socket)
 | 
						|
 | 
						|
  return PresetProxyHTTPSConnection
 | 
						|
 | 
						|
 | 
						|
# Here to end of _create_connection copied wholesale from Python 2.6"s socket.py
 | 
						|
_GLOBAL_DEFAULT_TIMEOUT = object()
 | 
						|
 | 
						|
 | 
						|
def _create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT):
 | 
						|
  """Connect to *address* and return the socket object.
 | 
						|
 | 
						|
  Convenience function.  Connect to *address* (a 2-tuple ``(host,
 | 
						|
  port)``) and return the socket object.  Passing the optional
 | 
						|
  *timeout* parameter will set the timeout on the socket instance
 | 
						|
  before attempting to connect.  If no *timeout* is supplied, the
 | 
						|
  global default timeout setting returned by :func:`getdefaulttimeout`
 | 
						|
  is used.
 | 
						|
  """
 | 
						|
 | 
						|
  msg = "getaddrinfo returns an empty list"
 | 
						|
  host, port = address
 | 
						|
  for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
 | 
						|
    af, socktype, proto, canonname, sa = res
 | 
						|
    sock = None
 | 
						|
    try:
 | 
						|
      sock = socket.socket(af, socktype, proto)
 | 
						|
      if timeout is not _GLOBAL_DEFAULT_TIMEOUT:
 | 
						|
        sock.settimeout(timeout)
 | 
						|
      sock.connect(sa)
 | 
						|
      return sock
 | 
						|
 | 
						|
    except socket.error, msg:
 | 
						|
      if sock is not None:
 | 
						|
        sock.close()
 | 
						|
 | 
						|
  raise socket.error, msg
 | 
						|
 | 
						|
 | 
						|
class FancyRequest(urllib2.Request):
 | 
						|
  """A request that allows the use of a CONNECT proxy."""
 | 
						|
 | 
						|
  def __init__(self, *args, **kwargs):
 | 
						|
    urllib2.Request.__init__(self, *args, **kwargs)
 | 
						|
    self._tunnel_host = None
 | 
						|
    self._key_file = None
 | 
						|
    self._cert_file = None
 | 
						|
    self._ca_certs = None
 | 
						|
 | 
						|
  def set_proxy(self, host, type):
 | 
						|
    saved_type = None
 | 
						|
 | 
						|
    if self.get_type() == "https" and not self._tunnel_host:
 | 
						|
      self._tunnel_host = self.get_host()
 | 
						|
      saved_type = self.get_type()
 | 
						|
    urllib2.Request.set_proxy(self, host, type)
 | 
						|
 | 
						|
    if saved_type:
 | 
						|
      # Don't set self.type, we want to preserve the
 | 
						|
      # type for tunneling.
 | 
						|
      self.type = saved_type
 | 
						|
 | 
						|
  def set_ssl_info(self, key_file=None, cert_file=None, ca_certs=None):
 | 
						|
    self._key_file = key_file
 | 
						|
    self._cert_file = cert_file
 | 
						|
    self._ca_certs = ca_certs
 | 
						|
 | 
						|
 | 
						|
class FancyProxyHandler(urllib2.ProxyHandler):
 | 
						|
  """A ProxyHandler that works with CONNECT-enabled proxies."""
 | 
						|
 | 
						|
  # Taken verbatim from /usr/lib/python2.5/urllib2.py
 | 
						|
  def _parse_proxy(self, proxy):
 | 
						|
    """Return (scheme, user, password, host/port) given a URL or an authority.
 | 
						|
 | 
						|
    If a URL is supplied, it must have an authority (host:port) component.
 | 
						|
    According to RFC 3986, having an authority component means the URL must
 | 
						|
    have two slashes after the scheme:
 | 
						|
 | 
						|
    >>> _parse_proxy('file:/ftp.example.com/')
 | 
						|
    Traceback (most recent call last):
 | 
						|
    ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
 | 
						|
 | 
						|
    The first three items of the returned tuple may be None.
 | 
						|
 | 
						|
    Examples of authority parsing:
 | 
						|
 | 
						|
    >>> _parse_proxy('proxy.example.com')
 | 
						|
    (None, None, None, 'proxy.example.com')
 | 
						|
    >>> _parse_proxy('proxy.example.com:3128')
 | 
						|
    (None, None, None, 'proxy.example.com:3128')
 | 
						|
 | 
						|
    The authority component may optionally include userinfo (assumed to be
 | 
						|
    username:password):
 | 
						|
 | 
						|
    >>> _parse_proxy('joe:password@proxy.example.com')
 | 
						|
    (None, 'joe', 'password', 'proxy.example.com')
 | 
						|
    >>> _parse_proxy('joe:password@proxy.example.com:3128')
 | 
						|
    (None, 'joe', 'password', 'proxy.example.com:3128')
 | 
						|
 | 
						|
    Same examples, but with URLs instead:
 | 
						|
 | 
						|
    >>> _parse_proxy('http://proxy.example.com/')
 | 
						|
    ('http', None, None, 'proxy.example.com')
 | 
						|
    >>> _parse_proxy('http://proxy.example.com:3128/')
 | 
						|
    ('http', None, None, 'proxy.example.com:3128')
 | 
						|
    >>> _parse_proxy('http://joe:password@proxy.example.com/')
 | 
						|
    ('http', 'joe', 'password', 'proxy.example.com')
 | 
						|
    >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
 | 
						|
    ('http', 'joe', 'password', 'proxy.example.com:3128')
 | 
						|
 | 
						|
    Everything after the authority is ignored:
 | 
						|
 | 
						|
    >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
 | 
						|
    ('ftp', 'joe', 'password', 'proxy.example.com')
 | 
						|
 | 
						|
    Test for no trailing '/' case:
 | 
						|
 | 
						|
    >>> _parse_proxy('http://joe:password@proxy.example.com')
 | 
						|
    ('http', 'joe', 'password', 'proxy.example.com')
 | 
						|
 | 
						|
    """
 | 
						|
    scheme, r_scheme = splittype(proxy)
 | 
						|
    if not r_scheme.startswith("/"):
 | 
						|
      # authority
 | 
						|
      scheme = None
 | 
						|
      authority = proxy
 | 
						|
    else:
 | 
						|
      # URL
 | 
						|
      if not r_scheme.startswith("//"):
 | 
						|
        raise ValueError("proxy URL with no authority: %r" % proxy)
 | 
						|
      # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
 | 
						|
      # and 3.3.), path is empty or starts with '/'
 | 
						|
      end = r_scheme.find("/", 2)
 | 
						|
      if end == -1:
 | 
						|
        end = None
 | 
						|
      authority = r_scheme[2:end]
 | 
						|
    userinfo, hostport = splituser(authority)
 | 
						|
    if userinfo is not None:
 | 
						|
      user, password = splitpasswd(userinfo)
 | 
						|
    else:
 | 
						|
      user = password = None
 | 
						|
    return scheme, user, password, hostport
 | 
						|
 | 
						|
  def proxy_open(self, req, proxy, type):
 | 
						|
    # This block is copied wholesale from Python2.6 urllib2.
 | 
						|
    # It is idempotent, so the superclass method call executes as normal
 | 
						|
    # if invoked.
 | 
						|
    orig_type = req.get_type()
 | 
						|
    proxy_type, user, password, hostport = self._parse_proxy(proxy)
 | 
						|
    if proxy_type is None:
 | 
						|
      proxy_type = orig_type
 | 
						|
    if user and password:
 | 
						|
      user_pass = "%s:%s" % (urllib2.unquote(user), urllib2.unquote(password))
 | 
						|
      creds = base64.b64encode(user_pass).strip()
 | 
						|
      # Later calls overwrite earlier calls for the same header
 | 
						|
      req.add_header("Proxy-authorization", "Basic " + creds)
 | 
						|
    hostport = urllib2.unquote(hostport)
 | 
						|
    req.set_proxy(hostport, proxy_type)
 | 
						|
    # This condition is the change
 | 
						|
    if orig_type == "https":
 | 
						|
      return None
 | 
						|
 | 
						|
    return urllib2.ProxyHandler.proxy_open(self, req, proxy, type)
 | 
						|
 | 
						|
 | 
						|
class FancyHTTPSHandler(urllib2.HTTPSHandler):
 | 
						|
  """An HTTPSHandler that works with CONNECT-enabled proxies."""
 | 
						|
 | 
						|
  def do_open(self, http_class, req, *args, **kwargs):
 | 
						|
    proxy_authorization = None
 | 
						|
    for header in req.headers:
 | 
						|
      if header.lower() == "proxy-authorization":
 | 
						|
        proxy_authorization = req.headers[header]
 | 
						|
        break
 | 
						|
 | 
						|
    # Intentionally very specific so as to opt for false negatives
 | 
						|
    # rather than false positives.
 | 
						|
    try:
 | 
						|
      return urllib2.HTTPSHandler.do_open(
 | 
						|
          self,
 | 
						|
          create_fancy_connection(req._tunnel_host,
 | 
						|
                                  req._key_file,
 | 
						|
                                  req._cert_file,
 | 
						|
                                  req._ca_certs,
 | 
						|
                                  proxy_authorization),
 | 
						|
          req, *args, **kwargs)
 | 
						|
    except urllib2.URLError, url_error:
 | 
						|
      try:
 | 
						|
        import ssl
 | 
						|
        if (type(url_error.reason) == ssl.SSLError and
 | 
						|
            url_error.reason.args[0] == 1):
 | 
						|
          # Display the reason to the user. Need to use args for python2.5
 | 
						|
          # compat.
 | 
						|
          raise InvalidCertificateException(req.host, "",
 | 
						|
                                            url_error.reason.args[1])
 | 
						|
      except ImportError:
 | 
						|
        pass
 | 
						|
 | 
						|
      raise url_error
 | 
						|
 | 
						|
 | 
						|
# We have to implement this so that we persist the tunneling behavior
 | 
						|
# through redirects.
 | 
						|
class FancyRedirectHandler(urllib2.HTTPRedirectHandler):
 | 
						|
  """A redirect handler that persists CONNECT-enabled proxy information."""
 | 
						|
 | 
						|
  def redirect_request(self, req, *args, **kwargs):
 | 
						|
    new_req = urllib2.HTTPRedirectHandler.redirect_request(
 | 
						|
        self, req, *args, **kwargs)
 | 
						|
    # Same thing as in our set_proxy implementation, but in this case
 | 
						|
    # we"ve only got a Request to work with, so it was this or copy
 | 
						|
    # everything over piecemeal.
 | 
						|
    #
 | 
						|
    # Note that we do not persist tunneling behavior from an http request
 | 
						|
    # to an https request, because an http request does not set _tunnel_host.
 | 
						|
    #
 | 
						|
    # Also note that in Python < 2.6, you will get an error in
 | 
						|
    # FancyHTTPSHandler.do_open() on an https urllib2.Request that uses an http
 | 
						|
    # proxy, since the proxy type will be set to http instead of https.
 | 
						|
    # (FancyRequest, and urllib2.Request in Python >= 2.6 set the proxy type to
 | 
						|
    # https.)  Such an urllib2.Request could result from this redirect
 | 
						|
    # if you are redirecting from an http request (since an an http request
 | 
						|
    # does not have _tunnel_host set, and thus you will not set the proxy
 | 
						|
    # in the code below), and if you have defined a proxy for https in, say,
 | 
						|
    # FancyProxyHandler, and that proxy has type http.
 | 
						|
    if hasattr(req, "_tunnel_host") and isinstance(new_req, urllib2.Request):
 | 
						|
      if new_req.get_type() == "https":
 | 
						|
        if req._tunnel_host:
 | 
						|
          # req is proxied, so copy the proxy info.
 | 
						|
          new_req._tunnel_host = new_req.get_host()
 | 
						|
          new_req.set_proxy(req.host, "https")
 | 
						|
        else:
 | 
						|
          # req is not proxied, so just make sure _tunnel_host is defined.
 | 
						|
          new_req._tunnel_host = None
 | 
						|
        new_req.type = "https"
 | 
						|
    if hasattr(req, "_key_file") and isinstance(new_req, urllib2.Request):
 | 
						|
      # Copy the auxiliary data in case this or any further redirect is https
 | 
						|
      new_req._key_file = req._key_file
 | 
						|
      new_req._cert_file = req._cert_file
 | 
						|
      new_req._ca_certs = req._ca_certs
 | 
						|
 | 
						|
    return new_req
 |