You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			399 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			399 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
| #!/usr/bin/env python
 | |
| #
 | |
| # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software
 | |
| # Foundation; All Rights Reserved
 | |
| 
 | |
| """A HTTPSConnection/Handler with additional proxy and cert validation features.
 | |
| 
 | |
| In particular, monkey patches in Python r74203 to provide support for CONNECT
 | |
| proxies and adds SSL cert validation if the ssl module is present.
 | |
| """
 | |
| 
 | |
| __author__ = "{frew,nick.johnson}@google.com (Fred Wulff and Nick Johnson)"
 | |
| 
 | |
| import base64
 | |
| import httplib
 | |
| import logging
 | |
| import re
 | |
| import socket
 | |
| import urllib2
 | |
| 
 | |
| from urllib import splittype
 | |
| from urllib import splituser
 | |
| from urllib import splitpasswd
 | |
| 
 | |
| class InvalidCertificateException(httplib.HTTPException):
 | |
|   """Raised when a certificate is provided with an invalid hostname."""
 | |
| 
 | |
|   def __init__(self, host, cert, reason):
 | |
|     """Constructor.
 | |
| 
 | |
|     Args:
 | |
|       host: The hostname the connection was made to.
 | |
|       cert: The SSL certificate (as a dictionary) the host returned.
 | |
|     """
 | |
|     httplib.HTTPException.__init__(self)
 | |
|     self.host = host
 | |
|     self.cert = cert
 | |
|     self.reason = reason
 | |
| 
 | |
|   def __str__(self):
 | |
|     return ('Host %s returned an invalid certificate (%s): %s\n'
 | |
|             'To learn more, see '
 | |
|             'http://code.google.com/appengine/kb/general.html#rpcssl' %
 | |
|             (self.host, self.reason, self.cert))
 | |
| 
 | |
| def can_validate_certs():
 | |
|   """Return True if we have the SSL package and can validate certificates."""
 | |
|   try:
 | |
|     import ssl
 | |
|     return True
 | |
|   except ImportError:
 | |
|     return False
 | |
| 
 | |
| def _create_fancy_connection(tunnel_host=None, key_file=None,
 | |
|                              cert_file=None, ca_certs=None):
 | |
|   # This abomination brought to you by the fact that
 | |
|   # the HTTPHandler creates the connection instance in the middle
 | |
|   # of do_open so we need to add the tunnel host to the class.
 | |
| 
 | |
|   class PresetProxyHTTPSConnection(httplib.HTTPSConnection):
 | |
|     """An HTTPS connection that uses a proxy defined by the enclosing scope."""
 | |
| 
 | |
|     def __init__(self, *args, **kwargs):
 | |
|       httplib.HTTPSConnection.__init__(self, *args, **kwargs)
 | |
| 
 | |
|       self._tunnel_host = tunnel_host
 | |
|       if tunnel_host:
 | |
|         logging.debug("Creating preset proxy https conn: %s", tunnel_host)
 | |
| 
 | |
|       self.key_file = key_file
 | |
|       self.cert_file = cert_file
 | |
|       self.ca_certs = ca_certs
 | |
|       try:
 | |
|         import ssl
 | |
|         if self.ca_certs:
 | |
|           self.cert_reqs = ssl.CERT_REQUIRED
 | |
|         else:
 | |
|           self.cert_reqs = ssl.CERT_NONE
 | |
|       except ImportError:
 | |
|         pass
 | |
| 
 | |
|     def _tunnel(self):
 | |
|       self._set_hostport(self._tunnel_host, None)
 | |
|       logging.info("Connecting through tunnel to: %s:%d",
 | |
|                    self.host, self.port)
 | |
|       self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self.host, self.port))
 | |
|       response = self.response_class(self.sock, strict=self.strict,
 | |
|                                      method=self._method)
 | |
|       (_, code, message) = response._read_status()
 | |
| 
 | |
|       if code != 200:
 | |
|         self.close()
 | |
|         raise socket.error, "Tunnel connection failed: %d %s" % (
 | |
|             code, message.strip())
 | |
| 
 | |
|       while True:
 | |
|         line = response.fp.readline()
 | |
|         if line == "\r\n":
 | |
|           break
 | |
| 
 | |
|     def _get_valid_hosts_for_cert(self, cert):
 | |
|       """Returns a list of valid host globs for an SSL certificate.
 | |
| 
 | |
|       Args:
 | |
|         cert: A dictionary representing an SSL certificate.
 | |
|       Returns:
 | |
|         list: A list of valid host globs.
 | |
|       """
 | |
|       if 'subjectAltName' in cert:
 | |
|         return [x[1] for x in cert['subjectAltName'] if x[0].lower() == 'dns']
 | |
|       else:
 | |
|         # Return a list of commonName fields
 | |
|         return [x[0][1] for x in cert['subject']
 | |
|                 if x[0][0].lower() == 'commonname']
 | |
| 
 | |
|     def _validate_certificate_hostname(self, cert, hostname):
 | |
|       """Validates that a given hostname is valid for an SSL certificate.
 | |
| 
 | |
|       Args:
 | |
|         cert: A dictionary representing an SSL certificate.
 | |
|         hostname: The hostname to test.
 | |
|       Returns:
 | |
|         bool: Whether or not the hostname is valid for this certificate.
 | |
|       """
 | |
|       hosts = self._get_valid_hosts_for_cert(cert)
 | |
|       for host in hosts:
 | |
|         # Convert the glob-style hostname expression (eg, '*.google.com') into a
 | |
|         # valid regular expression.
 | |
|         host_re = host.replace('.', '\.').replace('*', '[^.]*')
 | |
|         if re.search('^%s$' % (host_re,), hostname, re.I):
 | |
|           return True
 | |
|       return False
 | |
| 
 | |
| 
 | |
|     def connect(self):
 | |
|       # TODO(frew): When we drop support for <2.6 (in the far distant future),
 | |
|       # change this to socket.create_connection.
 | |
|       self.sock = _create_connection((self.host, self.port))
 | |
| 
 | |
|       if self._tunnel_host:
 | |
|         self._tunnel()
 | |
| 
 | |
|       # ssl and FakeSocket got deprecated. Try for the new hotness of wrap_ssl,
 | |
|       # with fallback.
 | |
|       try:
 | |
|         import ssl
 | |
|         self.sock = ssl.wrap_socket(self.sock,
 | |
|                                     keyfile=self.key_file,
 | |
|                                     certfile=self.cert_file,
 | |
|                                     ca_certs=self.ca_certs,
 | |
|                                     cert_reqs=self.cert_reqs)
 | |
| 
 | |
|         if self.cert_reqs & ssl.CERT_REQUIRED:
 | |
|           cert = self.sock.getpeercert()
 | |
|           hostname = self.host.split(':', 0)[0]
 | |
|           if not self._validate_certificate_hostname(cert, hostname):
 | |
|             raise InvalidCertificateException(hostname, cert,
 | |
|                                               'hostname mismatch')
 | |
|       except ImportError:
 | |
|         ssl = socket.ssl(self.sock,
 | |
|                          keyfile=self.key_file,
 | |
|                          certfile=self.cert_file)
 | |
|         self.sock = httplib.FakeSocket(self.sock, ssl)
 | |
| 
 | |
|   return PresetProxyHTTPSConnection
 | |
| 
 | |
| 
 | |
| # Here to end of _create_connection copied wholesale from Python 2.6"s socket.py
 | |
| _GLOBAL_DEFAULT_TIMEOUT = object()
 | |
| 
 | |
| 
 | |
| def _create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT):
 | |
|   """Connect to *address* and return the socket object.
 | |
| 
 | |
|   Convenience function.  Connect to *address* (a 2-tuple ``(host,
 | |
|   port)``) and return the socket object.  Passing the optional
 | |
|   *timeout* parameter will set the timeout on the socket instance
 | |
|   before attempting to connect.  If no *timeout* is supplied, the
 | |
|   global default timeout setting returned by :func:`getdefaulttimeout`
 | |
|   is used.
 | |
|   """
 | |
| 
 | |
|   msg = "getaddrinfo returns an empty list"
 | |
|   host, port = address
 | |
|   for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
 | |
|     af, socktype, proto, canonname, sa = res
 | |
|     sock = None
 | |
|     try:
 | |
|       sock = socket.socket(af, socktype, proto)
 | |
|       if timeout is not _GLOBAL_DEFAULT_TIMEOUT:
 | |
|         sock.settimeout(timeout)
 | |
|       sock.connect(sa)
 | |
|       return sock
 | |
| 
 | |
|     except socket.error, msg:
 | |
|       if sock is not None:
 | |
|         sock.close()
 | |
| 
 | |
|   raise socket.error, msg
 | |
| 
 | |
| 
 | |
| class FancyRequest(urllib2.Request):
 | |
|   """A request that allows the use of a CONNECT proxy."""
 | |
| 
 | |
|   def __init__(self, *args, **kwargs):
 | |
|     urllib2.Request.__init__(self, *args, **kwargs)
 | |
|     self._tunnel_host = None
 | |
|     self._key_file = None
 | |
|     self._cert_file = None
 | |
|     self._ca_certs = None
 | |
| 
 | |
|   def set_proxy(self, host, type):
 | |
|     saved_type = None
 | |
| 
 | |
|     if self.get_type() == "https" and not self._tunnel_host:
 | |
|       self._tunnel_host = self.get_host()
 | |
|       saved_type = self.get_type()
 | |
|     urllib2.Request.set_proxy(self, host, type)
 | |
| 
 | |
|     if saved_type:
 | |
|       # Don't set self.type, we want to preserve the
 | |
|       # type for tunneling.
 | |
|       self.type = saved_type
 | |
| 
 | |
|   def set_ssl_info(self, key_file=None, cert_file=None, ca_certs=None):
 | |
|     self._key_file = key_file
 | |
|     self._cert_file = cert_file
 | |
|     self._ca_certs = ca_certs
 | |
| 
 | |
| 
 | |
| class FancyProxyHandler(urllib2.ProxyHandler):
 | |
|   """A ProxyHandler that works with CONNECT-enabled proxies."""
 | |
| 
 | |
|   # Taken verbatim from /usr/lib/python2.5/urllib2.py
 | |
|   def _parse_proxy(self, proxy):
 | |
|     """Return (scheme, user, password, host/port) given a URL or an authority.
 | |
| 
 | |
|     If a URL is supplied, it must have an authority (host:port) component.
 | |
|     According to RFC 3986, having an authority component means the URL must
 | |
|     have two slashes after the scheme:
 | |
| 
 | |
|     >>> _parse_proxy('file:/ftp.example.com/')
 | |
|     Traceback (most recent call last):
 | |
|     ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
 | |
| 
 | |
|     The first three items of the returned tuple may be None.
 | |
| 
 | |
|     Examples of authority parsing:
 | |
| 
 | |
|     >>> _parse_proxy('proxy.example.com')
 | |
|     (None, None, None, 'proxy.example.com')
 | |
|     >>> _parse_proxy('proxy.example.com:3128')
 | |
|     (None, None, None, 'proxy.example.com:3128')
 | |
| 
 | |
|     The authority component may optionally include userinfo (assumed to be
 | |
|     username:password):
 | |
| 
 | |
|     >>> _parse_proxy('joe:password@proxy.example.com')
 | |
|     (None, 'joe', 'password', 'proxy.example.com')
 | |
|     >>> _parse_proxy('joe:password@proxy.example.com:3128')
 | |
|     (None, 'joe', 'password', 'proxy.example.com:3128')
 | |
| 
 | |
|     Same examples, but with URLs instead:
 | |
| 
 | |
|     >>> _parse_proxy('http://proxy.example.com/')
 | |
|     ('http', None, None, 'proxy.example.com')
 | |
|     >>> _parse_proxy('http://proxy.example.com:3128/')
 | |
|     ('http', None, None, 'proxy.example.com:3128')
 | |
|     >>> _parse_proxy('http://joe:password@proxy.example.com/')
 | |
|     ('http', 'joe', 'password', 'proxy.example.com')
 | |
|     >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
 | |
|     ('http', 'joe', 'password', 'proxy.example.com:3128')
 | |
| 
 | |
|     Everything after the authority is ignored:
 | |
| 
 | |
|     >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
 | |
|     ('ftp', 'joe', 'password', 'proxy.example.com')
 | |
| 
 | |
|     Test for no trailing '/' case:
 | |
| 
 | |
|     >>> _parse_proxy('http://joe:password@proxy.example.com')
 | |
|     ('http', 'joe', 'password', 'proxy.example.com')
 | |
| 
 | |
|     """
 | |
|     scheme, r_scheme = splittype(proxy)
 | |
|     if not r_scheme.startswith("/"):
 | |
|       # authority
 | |
|       scheme = None
 | |
|       authority = proxy
 | |
|     else:
 | |
|       # URL
 | |
|       if not r_scheme.startswith("//"):
 | |
|         raise ValueError("proxy URL with no authority: %r" % proxy)
 | |
|       # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
 | |
|       # and 3.3.), path is empty or starts with '/'
 | |
|       end = r_scheme.find("/", 2)
 | |
|       if end == -1:
 | |
|         end = None
 | |
|       authority = r_scheme[2:end]
 | |
|     userinfo, hostport = splituser(authority)
 | |
|     if userinfo is not None:
 | |
|       user, password = splitpasswd(userinfo)
 | |
|     else:
 | |
|       user = password = None
 | |
|     return scheme, user, password, hostport
 | |
| 
 | |
|   def proxy_open(self, req, proxy, type):
 | |
|     # This block is copied wholesale from Python2.6 urllib2.
 | |
|     # It is idempotent, so the superclass method call executes as normal
 | |
|     # if invoked.
 | |
|     orig_type = req.get_type()
 | |
|     proxy_type, user, password, hostport = self._parse_proxy(proxy)
 | |
|     if proxy_type is None:
 | |
|       proxy_type = orig_type
 | |
|     if user and password:
 | |
|       user_pass = "%s:%s" % (urllib2.unquote(user), urllib2.unquote(password))
 | |
|       creds = base64.b64encode(user_pass).strip()
 | |
|       # Later calls overwrite earlier calls for the same header
 | |
|       req.add_header("Proxy-authorization", "Basic " + creds)
 | |
|     hostport = urllib2.unquote(hostport)
 | |
|     req.set_proxy(hostport, proxy_type)
 | |
|     # This condition is the change
 | |
|     if orig_type == "https":
 | |
|       return None
 | |
| 
 | |
|     return urllib2.ProxyHandler.proxy_open(self, req, proxy, type)
 | |
| 
 | |
| 
 | |
| class FancyHTTPSHandler(urllib2.HTTPSHandler):
 | |
|   """An HTTPSHandler that works with CONNECT-enabled proxies."""
 | |
| 
 | |
|   def do_open(self, http_class, req):
 | |
|     # Intentionally very specific so as to opt for false negatives
 | |
|     # rather than false positives.
 | |
|     try:
 | |
|       return urllib2.HTTPSHandler.do_open(
 | |
|           self,
 | |
|           _create_fancy_connection(req._tunnel_host,
 | |
|                                    req._key_file,
 | |
|                                    req._cert_file,
 | |
|                                    req._ca_certs),
 | |
|           req)
 | |
|     except urllib2.URLError, url_error:
 | |
|       try:
 | |
|         import ssl
 | |
|         if (type(url_error.reason) == ssl.SSLError and
 | |
|             url_error.reason.args[0] == 1):
 | |
|           # Display the reason to the user. Need to use args for python2.5
 | |
|           # compat.
 | |
|           raise InvalidCertificateException(req.host, '',
 | |
|                                             url_error.reason.args[1])
 | |
|       except ImportError:
 | |
|         pass
 | |
| 
 | |
|       raise url_error
 | |
| 
 | |
| 
 | |
| # We have to implement this so that we persist the tunneling behavior
 | |
| # through redirects.
 | |
| class FancyRedirectHandler(urllib2.HTTPRedirectHandler):
 | |
|   """A redirect handler that persists CONNECT-enabled proxy information."""
 | |
| 
 | |
|   def redirect_request(self, req, *args, **kwargs):
 | |
|     new_req = urllib2.HTTPRedirectHandler.redirect_request(
 | |
|         self, req, *args, **kwargs)
 | |
|     # Same thing as in our set_proxy implementation, but in this case
 | |
|     # we"ve only got a Request to work with, so it was this or copy
 | |
|     # everything over piecemeal.
 | |
|     #
 | |
|     # Note that we do not persist tunneling behavior from an http request
 | |
|     # to an https request, because an http request does not set _tunnel_host.
 | |
|     #
 | |
|     # Also note that in Python < 2.6, you will get an error in
 | |
|     # FancyHTTPSHandler.do_open() on an https urllib2.Request that uses an http
 | |
|     # proxy, since the proxy type will be set to http instead of https.
 | |
|     # (FancyRequest, and urllib2.Request in Python >= 2.6 set the proxy type to
 | |
|     # https.)  Such an urllib2.Request could result from this redirect
 | |
|     # if you are redirecting from an http request (since an an http request
 | |
|     # does not have _tunnel_host set, and thus you will not set the proxy
 | |
|     # in the code below), and if you have defined a proxy for https in, say,
 | |
|     # FancyProxyHandler, and that proxy has type http.
 | |
|     if hasattr(req, "_tunnel_host") and isinstance(new_req, urllib2.Request):
 | |
|       if new_req.get_type() == "https":
 | |
|         if req._tunnel_host:
 | |
|           # req is proxied, so copy the proxy info.
 | |
|           new_req._tunnel_host = new_req.get_host()
 | |
|           new_req.set_proxy(req.host, "https")
 | |
|         else:
 | |
|           # req is not proxied, so just make sure _tunnel_host is defined.
 | |
|           new_req._tunnel_host = None
 | |
|         new_req.type = "https"
 | |
|     if hasattr(req, "_key_file") and isinstance(new_req, urllib2.Request):
 | |
|       # Copy the auxiliary data in case this or any further redirect is https
 | |
|       new_req._key_file = req._key_file
 | |
|       new_req._cert_file = req._cert_file
 | |
|       new_req._ca_certs = req._ca_certs
 | |
| 
 | |
|     return new_req
 |