diff --git a/third_party/fancy_urllib/README b/third_party/fancy_urllib/README new file mode 100644 index 000000000..91da20ec7 --- /dev/null +++ b/third_party/fancy_urllib/README @@ -0,0 +1,21 @@ +The fancy_urllib library was obtained from +http://googleappengine.googlecode.com/svn/trunk/python/lib/fancy_urllib/fancy_urllib/__init__.py +under the following license (http://googleappengine.googlecode.com/svn/trunk/python/LICENSE): + + +GOOGLE APP ENGINE SDK +===================== +Copyright 2008 Google Inc. +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/third_party/fancy_urllib/__init__.py b/third_party/fancy_urllib/__init__.py new file mode 100644 index 000000000..d4da0dd19 --- /dev/null +++ b/third_party/fancy_urllib/__init__.py @@ -0,0 +1,398 @@ +#!/usr/bin/env python +# +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software +# Foundation; All Rights Reserved + +"""A HTTPSConnection/Handler with additional proxy and cert validation features. + +In particular, monkey patches in Python r74203 to provide support for CONNECT +proxies and adds SSL cert validation if the ssl module is present. +""" + +__author__ = "{frew,nick.johnson}@google.com (Fred Wulff and Nick Johnson)" + +import base64 +import httplib +import logging +import re +import socket +import urllib2 + +from urllib import splittype +from urllib import splituser +from urllib import splitpasswd + +class InvalidCertificateException(httplib.HTTPException): + """Raised when a certificate is provided with an invalid hostname.""" + + def __init__(self, host, cert, reason): + """Constructor. + + Args: + host: The hostname the connection was made to. + cert: The SSL certificate (as a dictionary) the host returned. + """ + httplib.HTTPException.__init__(self) + self.host = host + self.cert = cert + self.reason = reason + + def __str__(self): + return ('Host %s returned an invalid certificate (%s): %s\n' + 'To learn more, see ' + 'http://code.google.com/appengine/kb/general.html#rpcssl' % + (self.host, self.reason, self.cert)) + +def can_validate_certs(): + """Return True if we have the SSL package and can validate certificates.""" + try: + import ssl + return True + except ImportError: + return False + +def _create_fancy_connection(tunnel_host=None, key_file=None, + cert_file=None, ca_certs=None): + # This abomination brought to you by the fact that + # the HTTPHandler creates the connection instance in the middle + # of do_open so we need to add the tunnel host to the class. + + class PresetProxyHTTPSConnection(httplib.HTTPSConnection): + """An HTTPS connection that uses a proxy defined by the enclosing scope.""" + + def __init__(self, *args, **kwargs): + httplib.HTTPSConnection.__init__(self, *args, **kwargs) + + self._tunnel_host = tunnel_host + if tunnel_host: + logging.debug("Creating preset proxy https conn: %s", tunnel_host) + + self.key_file = key_file + self.cert_file = cert_file + self.ca_certs = ca_certs + try: + import ssl + if self.ca_certs: + self.cert_reqs = ssl.CERT_REQUIRED + else: + self.cert_reqs = ssl.CERT_NONE + except ImportError: + pass + + def _tunnel(self): + self._set_hostport(self._tunnel_host, None) + logging.info("Connecting through tunnel to: %s:%d", + self.host, self.port) + self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self.host, self.port)) + response = self.response_class(self.sock, strict=self.strict, + method=self._method) + (_, code, message) = response._read_status() + + if code != 200: + self.close() + raise socket.error, "Tunnel connection failed: %d %s" % ( + code, message.strip()) + + while True: + line = response.fp.readline() + if line == "\r\n": + break + + def _get_valid_hosts_for_cert(self, cert): + """Returns a list of valid host globs for an SSL certificate. + + Args: + cert: A dictionary representing an SSL certificate. + Returns: + list: A list of valid host globs. + """ + if 'subjectAltName' in cert: + return [x[1] for x in cert['subjectAltName'] if x[0].lower() == 'dns'] + else: + # Return a list of commonName fields + return [x[0][1] for x in cert['subject'] + if x[0][0].lower() == 'commonname'] + + def _validate_certificate_hostname(self, cert, hostname): + """Validates that a given hostname is valid for an SSL certificate. + + Args: + cert: A dictionary representing an SSL certificate. + hostname: The hostname to test. + Returns: + bool: Whether or not the hostname is valid for this certificate. + """ + hosts = self._get_valid_hosts_for_cert(cert) + for host in hosts: + # Convert the glob-style hostname expression (eg, '*.google.com') into a + # valid regular expression. + host_re = host.replace('.', '\.').replace('*', '[^.]*') + if re.search('^%s$' % (host_re,), hostname, re.I): + return True + return False + + + def connect(self): + # TODO(frew): When we drop support for <2.6 (in the far distant future), + # change this to socket.create_connection. + self.sock = _create_connection((self.host, self.port)) + + if self._tunnel_host: + self._tunnel() + + # ssl and FakeSocket got deprecated. Try for the new hotness of wrap_ssl, + # with fallback. + try: + import ssl + self.sock = ssl.wrap_socket(self.sock, + keyfile=self.key_file, + certfile=self.cert_file, + ca_certs=self.ca_certs, + cert_reqs=self.cert_reqs) + + if self.cert_reqs & ssl.CERT_REQUIRED: + cert = self.sock.getpeercert() + hostname = self.host.split(':', 0)[0] + if not self._validate_certificate_hostname(cert, hostname): + raise InvalidCertificateException(hostname, cert, + 'hostname mismatch') + except ImportError: + ssl = socket.ssl(self.sock, + keyfile=self.key_file, + certfile=self.cert_file) + self.sock = httplib.FakeSocket(self.sock, ssl) + + return PresetProxyHTTPSConnection + + +# Here to end of _create_connection copied wholesale from Python 2.6"s socket.py +_GLOBAL_DEFAULT_TIMEOUT = object() + + +def _create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT): + """Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. + """ + + msg = "getaddrinfo returns an empty list" + host, port = address + for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket.socket(af, socktype, proto) + if timeout is not _GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + sock.connect(sa) + return sock + + except socket.error, msg: + if sock is not None: + sock.close() + + raise socket.error, msg + + +class FancyRequest(urllib2.Request): + """A request that allows the use of a CONNECT proxy.""" + + def __init__(self, *args, **kwargs): + urllib2.Request.__init__(self, *args, **kwargs) + self._tunnel_host = None + self._key_file = None + self._cert_file = None + self._ca_certs = None + + def set_proxy(self, host, type): + saved_type = None + + if self.get_type() == "https" and not self._tunnel_host: + self._tunnel_host = self.get_host() + saved_type = self.get_type() + urllib2.Request.set_proxy(self, host, type) + + if saved_type: + # Don't set self.type, we want to preserve the + # type for tunneling. + self.type = saved_type + + def set_ssl_info(self, key_file=None, cert_file=None, ca_certs=None): + self._key_file = key_file + self._cert_file = cert_file + self._ca_certs = ca_certs + + +class FancyProxyHandler(urllib2.ProxyHandler): + """A ProxyHandler that works with CONNECT-enabled proxies.""" + + # Taken verbatim from /usr/lib/python2.5/urllib2.py + def _parse_proxy(self, proxy): + """Return (scheme, user, password, host/port) given a URL or an authority. + + If a URL is supplied, it must have an authority (host:port) component. + According to RFC 3986, having an authority component means the URL must + have two slashes after the scheme: + + >>> _parse_proxy('file:/ftp.example.com/') + Traceback (most recent call last): + ValueError: proxy URL with no authority: 'file:/ftp.example.com/' + + The first three items of the returned tuple may be None. + + Examples of authority parsing: + + >>> _parse_proxy('proxy.example.com') + (None, None, None, 'proxy.example.com') + >>> _parse_proxy('proxy.example.com:3128') + (None, None, None, 'proxy.example.com:3128') + + The authority component may optionally include userinfo (assumed to be + username:password): + + >>> _parse_proxy('joe:password@proxy.example.com') + (None, 'joe', 'password', 'proxy.example.com') + >>> _parse_proxy('joe:password@proxy.example.com:3128') + (None, 'joe', 'password', 'proxy.example.com:3128') + + Same examples, but with URLs instead: + + >>> _parse_proxy('http://proxy.example.com/') + ('http', None, None, 'proxy.example.com') + >>> _parse_proxy('http://proxy.example.com:3128/') + ('http', None, None, 'proxy.example.com:3128') + >>> _parse_proxy('http://joe:password@proxy.example.com/') + ('http', 'joe', 'password', 'proxy.example.com') + >>> _parse_proxy('http://joe:password@proxy.example.com:3128') + ('http', 'joe', 'password', 'proxy.example.com:3128') + + Everything after the authority is ignored: + + >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128') + ('ftp', 'joe', 'password', 'proxy.example.com') + + Test for no trailing '/' case: + + >>> _parse_proxy('http://joe:password@proxy.example.com') + ('http', 'joe', 'password', 'proxy.example.com') + + """ + scheme, r_scheme = splittype(proxy) + if not r_scheme.startswith("/"): + # authority + scheme = None + authority = proxy + else: + # URL + if not r_scheme.startswith("//"): + raise ValueError("proxy URL with no authority: %r" % proxy) + # We have an authority, so for RFC 3986-compliant URLs (by ss 3. + # and 3.3.), path is empty or starts with '/' + end = r_scheme.find("/", 2) + if end == -1: + end = None + authority = r_scheme[2:end] + userinfo, hostport = splituser(authority) + if userinfo is not None: + user, password = splitpasswd(userinfo) + else: + user = password = None + return scheme, user, password, hostport + + def proxy_open(self, req, proxy, type): + # This block is copied wholesale from Python2.6 urllib2. + # It is idempotent, so the superclass method call executes as normal + # if invoked. + orig_type = req.get_type() + proxy_type, user, password, hostport = self._parse_proxy(proxy) + if proxy_type is None: + proxy_type = orig_type + if user and password: + user_pass = "%s:%s" % (urllib2.unquote(user), urllib2.unquote(password)) + creds = base64.b64encode(user_pass).strip() + # Later calls overwrite earlier calls for the same header + req.add_header("Proxy-authorization", "Basic " + creds) + hostport = urllib2.unquote(hostport) + req.set_proxy(hostport, proxy_type) + # This condition is the change + if orig_type == "https": + return None + + return urllib2.ProxyHandler.proxy_open(self, req, proxy, type) + + +class FancyHTTPSHandler(urllib2.HTTPSHandler): + """An HTTPSHandler that works with CONNECT-enabled proxies.""" + + def do_open(self, http_class, req): + # Intentionally very specific so as to opt for false negatives + # rather than false positives. + try: + return urllib2.HTTPSHandler.do_open( + self, + _create_fancy_connection(req._tunnel_host, + req._key_file, + req._cert_file, + req._ca_certs), + req) + except urllib2.URLError, url_error: + try: + import ssl + if (type(url_error.reason) == ssl.SSLError and + url_error.reason.args[0] == 1): + # Display the reason to the user. Need to use args for python2.5 + # compat. + raise InvalidCertificateException(req.host, '', + url_error.reason.args[1]) + except ImportError: + pass + + raise url_error + + +# We have to implement this so that we persist the tunneling behavior +# through redirects. +class FancyRedirectHandler(urllib2.HTTPRedirectHandler): + """A redirect handler that persists CONNECT-enabled proxy information.""" + + def redirect_request(self, req, *args, **kwargs): + new_req = urllib2.HTTPRedirectHandler.redirect_request( + self, req, *args, **kwargs) + # Same thing as in our set_proxy implementation, but in this case + # we"ve only got a Request to work with, so it was this or copy + # everything over piecemeal. + # + # Note that we do not persist tunneling behavior from an http request + # to an https request, because an http request does not set _tunnel_host. + # + # Also note that in Python < 2.6, you will get an error in + # FancyHTTPSHandler.do_open() on an https urllib2.Request that uses an http + # proxy, since the proxy type will be set to http instead of https. + # (FancyRequest, and urllib2.Request in Python >= 2.6 set the proxy type to + # https.) Such an urllib2.Request could result from this redirect + # if you are redirecting from an http request (since an an http request + # does not have _tunnel_host set, and thus you will not set the proxy + # in the code below), and if you have defined a proxy for https in, say, + # FancyProxyHandler, and that proxy has type http. + if hasattr(req, "_tunnel_host") and isinstance(new_req, urllib2.Request): + if new_req.get_type() == "https": + if req._tunnel_host: + # req is proxied, so copy the proxy info. + new_req._tunnel_host = new_req.get_host() + new_req.set_proxy(req.host, "https") + else: + # req is not proxied, so just make sure _tunnel_host is defined. + new_req._tunnel_host = None + new_req.type = "https" + if hasattr(req, "_key_file") and isinstance(new_req, urllib2.Request): + # Copy the auxiliary data in case this or any further redirect is https + new_req._key_file = req._key_file + new_req._cert_file = req._cert_file + new_req._ca_certs = req._ca_certs + + return new_req diff --git a/third_party/gsutil/CHECKSUM b/third_party/gsutil/CHECKSUM new file mode 100644 index 000000000..f5a029e1f --- /dev/null +++ b/third_party/gsutil/CHECKSUM @@ -0,0 +1 @@ +010822c61d38d70ac23600bc955fccf5 diff --git a/third_party/gsutil/COPYING b/third_party/gsutil/COPYING new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/third_party/gsutil/COPYING @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/third_party/gsutil/LICENSE.third_party b/third_party/gsutil/LICENSE.third_party new file mode 100644 index 000000000..7a418bb72 --- /dev/null +++ b/third_party/gsutil/LICENSE.third_party @@ -0,0 +1,295 @@ +GOOGLE APP ENGINE SDK +************************************************************************** +Copyright 2008 Google Inc. +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + +fancy_urllib +************************************************************************** + +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software +# Foundation; All Rights Reserved + + +A. HISTORY OF THE SOFTWARE +========================== + +Python was created in the early 1990s by Guido van Rossum at Stichting +Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands +as a successor of a language called ABC. Guido remains Python's +principal author, although it includes many contributions from others. + +In 1995, Guido continued his work on Python at the Corporation for +National Research Initiatives (CNRI, see http://www.cnri.reston.va.us) +in Reston, Virginia where he released several versions of the +software. + +In May 2000, Guido and the Python core development team moved to +BeOpen.com to form the BeOpen PythonLabs team. In October of the same +year, the PythonLabs team moved to Digital Creations (now Zope +Corporation, see http://www.zope.com). In 2001, the Python Software +Foundation (PSF, see http://www.python.org/psf/) was formed, a +non-profit organization created specifically to own Python-related +Intellectual Property. Zope Corporation is a sponsoring member of +the PSF. + +All Python releases are Open Source (see http://www.opensource.org for +the Open Source Definition). Historically, most, but not all, Python +releases have also been GPL-compatible; the table below summarizes +the various releases. + + Release Derived Year Owner GPL- + from compatible? (1) + + 0.9.0 thru 1.2 1991-1995 CWI yes + 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes + 1.6 1.5.2 2000 CNRI no + 2.0 1.6 2000 BeOpen.com no + 1.6.1 1.6 2001 CNRI yes (2) + 2.1 2.0+1.6.1 2001 PSF no + 2.0.1 2.0+1.6.1 2001 PSF yes + 2.1.1 2.1+2.0.1 2001 PSF yes + 2.2 2.1.1 2001 PSF yes + 2.1.2 2.1.1 2002 PSF yes + 2.1.3 2.1.2 2002 PSF yes + 2.2.1 2.2 2002 PSF yes + 2.2.2 2.2.1 2002 PSF yes + 2.2.3 2.2.2 2003 PSF yes + 2.3 2.2.2 2002-2003 PSF yes + 2.3.1 2.3 2002-2003 PSF yes + 2.3.2 2.3.1 2002-2003 PSF yes + 2.3.3 2.3.2 2002-2003 PSF yes + 2.3.4 2.3.3 2004 PSF yes + 2.3.5 2.3.4 2005 PSF yes + 2.4 2.3 2004 PSF yes + 2.4.1 2.4 2005 PSF yes + 2.4.2 2.4.1 2005 PSF yes + 2.4.3 2.4.2 2006 PSF yes + 2.5 2.4 2006 PSF yes + 2.5.1 2.5 2007 PSF yes + +Footnotes: + +(1) GPL-compatible doesn't mean that we're distributing Python under + the GPL. All Python licenses, unlike the GPL, let you distribute + a modified version without making your changes open source. The + GPL-compatible licenses make it possible to combine Python with + other software that is released under the GPL; the others don't. + +(2) According to Richard Stallman, 1.6.1 is not GPL-compatible, + because its license has a choice of law clause. According to + CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 + is "not incompatible" with the GPL. + +Thanks to the many outside volunteers who have worked under Guido's +direction to make these releases possible. + + +B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON +=============================================================== + +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF +hereby grants Licensee a nonexclusive, royalty-free, world-wide +license to reproduce, analyze, test, perform and/or display publicly, +prepare derivative works, distribute, and otherwise use Python +alone or in any derivative version, provided, however, that PSF's +License Agreement and PSF's notice of copyright, i.e., "Copyright (c) +2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software Foundation; +All Rights Reserved" are retained in Python alone or in any derivative +version prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 +------------------------------------------- + +BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 + +1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an +office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the +Individual or Organization ("Licensee") accessing and otherwise using +this software in source or binary form and its associated +documentation ("the Software"). + +2. Subject to the terms and conditions of this BeOpen Python License +Agreement, BeOpen hereby grants Licensee a non-exclusive, +royalty-free, world-wide license to reproduce, analyze, test, perform +and/or display publicly, prepare derivative works, distribute, and +otherwise use the Software alone or in any derivative version, +provided, however, that the BeOpen Python License is retained in the +Software, alone or in any derivative version prepared by Licensee. + +3. BeOpen is making the Software available to Licensee on an "AS IS" +basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE +SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS +AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY +DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +5. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +6. This License Agreement shall be governed by and interpreted in all +respects by the law of the State of California, excluding conflict of +law provisions. Nothing in this License Agreement shall be deemed to +create any relationship of agency, partnership, or joint venture +between BeOpen and Licensee. This License Agreement does not grant +permission to use BeOpen trademarks or trade names in a trademark +sense to endorse or promote products or services of Licensee, or any +third party. As an exception, the "BeOpen Python" logos available at +http://www.pythonlabs.com/logos.html may be used according to the +permissions granted on that web page. + +7. By copying, installing or otherwise using the software, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 +--------------------------------------- + +1. This LICENSE AGREEMENT is between the Corporation for National +Research Initiatives, having an office at 1895 Preston White Drive, +Reston, VA 20191 ("CNRI"), and the Individual or Organization +("Licensee") accessing and otherwise using Python 1.6.1 software in +source or binary form and its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, CNRI +hereby grants Licensee a nonexclusive, royalty-free, world-wide +license to reproduce, analyze, test, perform and/or display publicly, +prepare derivative works, distribute, and otherwise use Python 1.6.1 +alone or in any derivative version, provided, however, that CNRI's +License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) +1995-2001 Corporation for National Research Initiatives; All Rights +Reserved" are retained in Python 1.6.1 alone or in any derivative +version prepared by Licensee. Alternately, in lieu of CNRI's License +Agreement, Licensee may substitute the following text (omitting the +quotes): "Python 1.6.1 is made available subject to the terms and +conditions in CNRI's License Agreement. This Agreement together with +Python 1.6.1 may be located on the Internet using the following +unique, persistent identifier (known as a handle): 1895.22/1013. This +Agreement may also be obtained from a proxy server on the Internet +using the following URL: http://hdl.handle.net/1895.22/1013". + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python 1.6.1 or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python 1.6.1. + +4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" +basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. This License Agreement shall be governed by the federal +intellectual property law of the United States, including without +limitation the federal copyright law, and, to the extent such +U.S. federal law does not apply, by the law of the Commonwealth of +Virginia, excluding Virginia's conflict of law provisions. +Notwithstanding the foregoing, with regard to derivative works based +on Python 1.6.1 that incorporate non-separable material that was +previously distributed under the GNU General Public License (GPL), the +law of the Commonwealth of Virginia shall govern this License +Agreement only as to issues arising under or with respect to +Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this +License Agreement shall be deemed to create any relationship of +agency, partnership, or joint venture between CNRI and Licensee. This +License Agreement does not grant permission to use CNRI trademarks or +trade name in a trademark sense to endorse or promote products or +services of Licensee, or any third party. + +8. By clicking on the "ACCEPT" button where indicated, or by copying, +installing or otherwise using Python 1.6.1, Licensee agrees to be +bound by the terms and conditions of this License Agreement. + + ACCEPT + + +CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 +-------------------------------------------------- + +Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, +The Netherlands. All rights reserved. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both that copyright notice and this permission notice appear in +supporting documentation, and that the name of Stichting Mathematisch +Centrum or CWI not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior +permission. + +STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE +FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/third_party/gsutil/MANIFEST.in b/third_party/gsutil/MANIFEST.in new file mode 100644 index 000000000..a10453d49 --- /dev/null +++ b/third_party/gsutil/MANIFEST.in @@ -0,0 +1,5 @@ +include gsutil COPYING VERSION LICENSE.third_party README setup.py pkg_util.py +recursive-include gslib * +recursive-include oauth2_plugin * +recursive-include third_party * +recursive-include boto * diff --git a/third_party/gsutil/README b/third_party/gsutil/README new file mode 100644 index 000000000..709dc9a5a --- /dev/null +++ b/third_party/gsutil/README @@ -0,0 +1,38 @@ +This directory contains the Python command line tool gsutil, which Google +has released as open source, to demonstrate the Google Storage API and to +provide a tool for manipulating data in the system. + +Prerequisites: + +Gsutil requires Python 2.6 or later. + +To install gsutil take the following steps: + +1. Pick a place where you want to install the software. You can + install the code wherever you prefer; for brevity the instructions below + assume you want to install in $HOME/gsutil. + +2. To install gsutil on Linux/Unix or MacOS, open a shell window, change + directories to where you downloaded the gsutil.tar.gz file, and do this: + % tar xfz gsutil.tar.gz -C $HOME + + Then add the following line to your $HOME/.bashrc shell initialization + file: + export PATH=${PATH}:$HOME/gsutil + + The next time you start a shell you should be able to run gsutil from + the command line. + +3. To install gsutil on Windows, install cygwin (http://www.cygwin.com/), + with at least version 2.6.5 of Python. Once you have that, start a shell + and follow the Linux instructions above for unpacking and installing gsutil. + +4. The first time you try to run gsutil, it will detect that you have no + configuration file containing your credentials, interactively prompt you, + and create the file. + + After this you can use the tool. Running gsutil with with no arguments + will print a help summary. + +For more information on installing and using gsutil, see +. diff --git a/third_party/gsutil/README.chromium b/third_party/gsutil/README.chromium new file mode 100644 index 000000000..2e79469b2 --- /dev/null +++ b/third_party/gsutil/README.chromium @@ -0,0 +1,19 @@ +Name: gsutil +URL: https://github.com/GoogleCloudPlatform/gsutil +Version: 3.25 +License: Apache 2.0 + +Description: +Set of tools to allow querying, uploading, and downloading objects from +Google Storage. + +Modifications: +* Removed gsutil/gslib/commands/test.py +* Removed gsutil/pkg_gen.sh +* Removed gsutil/gslib/tests/ +* Moved gsutil/boto as a depot_tools third_party lib +* Moved gsutil/third_party into our own third_party directory +* Append sys.path in gsutil/gsutil to find the moved third_party modules +* Updated checksum ce71ac982f1148315e7fa65cff2f83e8 -> bf29190007bc7557c33806367ee3ce9e + +Full license is in the COPYING file. diff --git a/third_party/gsutil/README.pkg b/third_party/gsutil/README.pkg new file mode 100644 index 000000000..067ba71bd --- /dev/null +++ b/third_party/gsutil/README.pkg @@ -0,0 +1,49 @@ +Package Generation Notes for gsutil + +Gsutil can be distributed in one of three ways: + +1. legacy mode - User unpacks archive file into a private directory tree +and maintains his/her own private copy of gsutil, boto, etc. This is the +only supported installation mode for Windows users. + +2. enterprise mode - User unpacks the gsutil archive file and runs +'python setup.py install' (as root), which installs everything into +a shared directory tree (/usr/share/gsutil) with a symlink from +/usr/bin/gsutil to /usr/share/gsutil/gsutil to provide easy access to +the shared gsutil command. In enterprise mode, the software gets installed +in one shared location, which makes it easier to install, update and +manage gsutil for a community of users. + +NOTE: Enterprise mode (installing gsutil via setup.py) is no longer +officially supported - unpacking the zip file into a directory is the +preferred method for installing gsutil for both shared and private +configurations. + +3. rpm mode - User installs the gsutil rpm package file on a Red Hat +Linux system using the rpm command. The resulting installation image +looks precisely the same as the results of installing with enterprise +mode, i.e. a shared directory tree (/usr/share/gsutil) with a symlink +from /usr/bin/gsutil. rpm mode is intended for enterprises that want +a stable release that does not necessarily contain the latest changes. + +All three modes derive their inventory from a common text file called +MANIFEST.in. If you want to add one or more new files or directories, +you only need to edit that one file and all three installation modes +will automatically inherit the change(s). + +GENERATING PACKAGE FILES + +First update the VERSION file and the gsutil.spec files to reflect the +new version number. + +Legacy mode and enterprise mode are both embodied in the same gsutil +archive file, the only difference being that the latter entails running +one additional command after unpacking the gsutil archive file. So the +same archive file we've always distributed for gsutil will be used for +both legacy and enterprise installation modes. + +For rpm mode, there's a new tool call pkg_gen.sh, which when run with no +arguments creates an rpm file at this location: + + $HOME/rpmbuild/RPMS/noarch/gsutil-2.0-1.noarch.rpm + diff --git a/third_party/gsutil/ReleaseNotes.txt b/third_party/gsutil/ReleaseNotes.txt new file mode 100644 index 000000000..07bd5205f --- /dev/null +++ b/third_party/gsutil/ReleaseNotes.txt @@ -0,0 +1,825 @@ +Release 3.25 (release-date: 2013-02-21) + + +Bug Fixes +--------- + + - Fixed two version-specific URI bugs: + + 1. gsutil cp -r gs://bucket1 gs://bucket2 would create objects in bucket2 + with names corresponding to version-specific URIs in bucket1 (e.g., + gs://bucket2/obj#1361417568482000, where the "#1361417568482000" part was + part of the object name, not the object's generation). + + This problem similarly caused gsutil cp -r gs://bucket1 ./dir to create + files names corresponding to version-specific URIs in bucket1. + + 2. gsutil rm -a gs://bucket/obj would attempt to delete the same object + twice, getting a NoSuchKey error on the second attempt. + + +================================================================================ + + +Release 3.24 (release-date: 2013-02-19) + + +Bug Fixes +--------- + + - Fixed bug that caused attempt to dupe-encode a unicode filename. + + +Other Changes +--------- + + - Refactored retry logic from setmeta and chacl to use @Retry decorator. + + - Moved @Retry decorator to third_party. + + - Fixed flaky tests. + + +================================================================================ + + +Release 3.23 (release-date: 2013-02-16) + + +Bug Fixes +--------- + + - Make version-specific URI parsing more robust. This fixes a bug where + listing buckets in certain cases would result in the error + 'BucketStorageUri' object has no attribute 'version_specific_uri' + + +================================================================================ + + +Release 3.22 (release-date: 2013-02-15) + + +New Features +------------ + + - Implemented new chacl command, which makes it easy to add and remove bucket + and object ACL grants without having to edit XML (like the older setacl + command). + + - Implemented new "daisy-chain" copying mode, which allows cross-provider + copies to run without buffering to local disk, and to use resumable uploads. + This copying mode also allows copying between locations and between storage + classes, using the new gsutil cp -D option. (Daisy-chain copying is the + default when copying between providers, but must be explicitly requested for + the other cases to keep costs and performance expectations clear.) + + - Implemented new perfdiag command to run a diagnostic test against + a bucket, collect system information, and report results. Useful + when working with Google Cloud Storage team to resolve questions + about performance. + + - Added SIGQUIT (^\) handler, to allow breakpointing a running gsutil. + + +Bug Fixes +--------- + + - Fixed bug where gsutil setwebcfg signature didn't match with + HMAC authentication. + + - Fixed ASCII codec decode error when constructing tracker filename + from non-7bit ASCII intput filename. + + - Changed boto auth plugin framework to allow multiple plugins + supporting requested capability, which fixes gsutil exception + that used to happen where a GCE user had a service account + configured and then ran gsutil config. + + - Changed Command.Apply method to be resilient to name expansion + exceptions. Before this change, if an exception was raised + during iteration of NameExpansionResult, the parent process + would immediately stop execution, causing the + _EOF_NAME_EXPANSION_RESULT to never be sent to child processes. + This resulted in the process hanging forever. + + - Fixed various bugs for gsutil running on Windows: + - Fixed various places from a hard-coded '/' to os.sep. + - Fixed a bug in the cp command where it was using the destination + URI's .delim property instead of the source URI. + - Fixed a bug in the cp command's _SrcDstSame function by + simplifying it to use os.path.normpath. + - Fixed windows bug in tests/util.py _NormalizeURI function. + - Fixed ZeroDivisionError sometimes happening during unit tests + on Windows. + + - Fixed gsutil rm bug that caused exit status 1 when encountered + non-existent URI. + + - Fixed support for gsutil cp file -. + + - Added preconditions and retry logic to setmeta command, to + enforce concurrency control. + + - Fixed bug in copying subdirs to subdirs. + + - Fixed cases where boto debug_level caused too much or too little + logging: + - resumable and one-shot uploads weren't showing response headers + when connection.debug > 0. + - payload was showing up in debug output when connection.debug + < 4 for streaming uploads. + + - Removed XML parsing from setacl. The previous implementation + relied on loose XML handling, which could truncate what it sends + to the service, allowing invalid XML to be specified by the + user. Instead now the ACL XML is passed verbatim and we rely + on server-side schema enforcement. + + - Added user-agent header to resumable uploads. + + - Fixed reporting bits/s when it was really bytes/s. + + - Changed so we now pass headers with API version & project ID + to create_bucket(). + + - Made "gsutil rm -r gs://bucket/folder" remove xyz_$folder$ object + (which is created by various GUI tools). + + - Fixed bug where gsutil binary was shipped with protection 750 + instead of 755. + + +Other Changes +--------- + + - Reworked versioned object handling: + - Removed need for commands to specify -v option to parse + versions. Versioned URIs are now uniformly handled by all + commands. + - Refactored StorageUri parsing that had been split across + storage_uri and conveience; made versioned URIs render with + version string so StorageUri is round-trippable (boto change). + - Implemented gsutil cp -v option for printing the version-specific + URI that was just created. + - Added error detail for attempt to delete non-empty versioned + bucket. Also added versioning state to ls -L -b gs://bucket + output. + - Changed URI parsing to use pre-compiled regex's. + - Other bug fixes. + + - Rewrote/deepened/improved various parts of built-in help: + - Updated 'gsutil help dev'. + - Fixed help command handling when terminal does not have the + number of rows set. + - Rewrote versioning help. + - Added gsutil help text for common 403 AccountProblem error. + - Added text to 'gsutil help dev' about legal agreement needed + with code submissions. + - Fixed various other typos. + - Updated doc for cp command regarding metadata not being + preserved when copying between providers. + - Fixed gsutil ls command documentation typo for the -L option. + - Added HTTP scheme to doc/examples for gsutil setcors command. + - Changed minimum version in documentation from 2.5 to 2.6 since + gsutil no longer works in Python 2.5. + - Cleaned up/clarify/deepen various other parts of gsutil + built-in documentation. + + - Numerous improvements to testing infrastructure: + - Completely refactored infrastructure, allowing deeper testing + and more readable test code, and enabling better debugging + output when tests fail. + - Moved gslib/test_*.py unit tests to gslib/tests module. + - Made all tests (unit and integration, per-command and modules + (like naming) run from single gsutil test command. + - Moved TempDir functions from GsUtilIntegrationTestCase to + GsUtilTestCase. + - Made test runner message show the test function being run. + - Added file path support to ObjectToURI function. + - Disabled the test command if running on Python 2.6 and unittest2 + is not available instead of breaking all of gsutil. + - Changed to pass GCS V2 API and project_id from boto config + if necessary in integration_testcase#CreateBucket(). + - Fixed unit tests by using a GS-specific mocking class to + override the S3 provider. + - Added friendlier error message if test path munging fails. + - Fixed bug where gsutil test only cleaned up first few test files. + - Implemented setacl integration tests. + - Implemented StorageUri parsing unit tests. + - Implemented test for gsutil cp -D. + - Implemented setacl integration tests. + - Implemented tests for reading and seeking past end of file. + - Implemented and tests for it in new tests module. + - Changed cp tests that don't specify a Content-Type to check + for new binary/octet-stream default instead of server-detected + mime type. + + - Changed gsutil mv to allow moving local files/dirs to the cloud. + Previously this was disallowed in the belief we should be + conservative about deleting data from local disk but there are + legitimate use cases for moving data from a local dir to the + cloud, it's clear to the user this would remove data from the + local disk, and allowing it makes the tool behavior more + consistent with what users would expect. + + - Changed gsutil update command to insist on is_secure and + https_validate_certificates. + + - Fixed release no longer to include extraneous boto dirs in + top-level of gsutil distribution (like bin/ and docs/). + + - Changed resumable upload threshold from 1 MB to 2 MB. + + - Removed leftover cloudauth and cloudreader dirs. Sample code + now lives at https://github.com/GoogleCloudPlatform. + + - Updated copyright notice on code files. + + +================================================================================ + + +Release 3.21 (release-date: 2012-12-10) + +New Features +------------ + + - Added the ability for the cp command to continue even if there is an + error. This can be activated with the -c flag. + + - Added support for specifying src args for gsutil cp on stdin (-I option) + + +Bug Fixes +--------- + + - Fixed gsutil test cp, which assumed it was run from gsutil install dir. + + - Mods so we send generation subresource only when user requested + version parsing (-v option for cp and cat commands). + + +Other Changes +------------- + + - Updated docs about using setmeta with versioning enabled. + + - Changed GCS endpoint in boto to storage.googleapis.com. + + +================================================================================ + + +Release 3.20 (release-date: 2012-11-30) + +New Features +------------ + + - Added a noclobber (-n) setting for the cp command. Existing objects/files + will not be overwritten when using this setting. + + +Bug Fixes +--------- + + - Fixed off-by-one error when reporting bytes transferred. + + +Other Changes +------------- + + - Improved versioning support for the remove command. + + - Improved test runner support. + + +================================================================================ + + +Release 3.19 (release-date: 2012-11-26) + +New Features +------------ + - Added support for object versions. + + - Added support for storage classes (including Durable Reduced Availability). + + +Bug Fixes +--------- + - Fixed problem where cp -q prevented resumable uploads from being performed. + + - Made setwebcfg and setcors tests robust wrt XML formatting variation. + + +Other Changes +------------- + + - Incorporated vapier@ mods to make version command not fail if CHECKSUM file + missing. + + - Refactored gsutil such that most functionality exists in boto. + + - Updated gsutil help dev instructions for how to check out source. + + +================================================================================ + + +Release 3.18 (release-date: 2012-09-19) + +Bug Fixes +--------- + + - Fixed resumable upload boundary condition when handling POST request + when server already has complete file, which resulted in an infinite + loop that consumed 100% of the CPU. + + - Fixed one more place that outputted progress info when gsutil cp -q + specified (during streaming uploads). + + +Other Changes +------------- + + - Updated help text for "gsutil help setmeta" and "gsutil help metadata", to + clarify and deepen parts of the documentation. + + +================================================================================ + + +Release 3.17 (release-date: 2012-08-17) + +Bug Fixes +--------- + + - Fixed race condition when multiple threads attempt to get an OAuth2 refresh + token concurrently. + + +Other Changes +------------- + + - Implemented simplified syntax for setmeta command. The old syntax still + works but is now deprecated. + + - Added help to gsutil cp -z option, to describe how to change where temp + files are written. + + +================================================================================ + + +Release 3.16 (release-date: 2012-08-13) + +Bug Fixes +--------- + + - Added info to built-in help for setmeta command, to explain the syntax + needed when running from Windows. + +================================================================================ + + +Release 3.15 (release-date: 2012-08-12) + +New Features +------------ + - Implemented gsutil setmeta command. + + - Made gsutil understand bucket subdir conventions used by various tools + (like GCS Manager and CloudBerry) so if you cp or mv to a subdir you + created with one of those tools it will work as expected. + + - Added support for Windows drive letter-prefaced paths when using Storage + URIs. + + +Bug Fixes +--------- + + - Fixed performance bug when downloading a large object with Content- + Encoding:gzip, where decompression attempted to load the entire object + in memory. Also added "Uncompressing" log output if file is larger than + 50M, to make it clear the download hasn't stalled. + + - Fixed naming bug when performing gsutil mv from a bucket subdir to + and existing bucket subdir. + + - Fixed bug that caused cross-provider copies into Google Cloud Storage to + fail. + + - Made change needed to make resumable transfer progress messages not print + when running gsutil cp -q. + + - Fixed copy/paste error in config file documentation for + https_validate_certificates option. + + - Various typo fixes. + +Other Changes +------------- + + - Changed gsutil to unset http_proxy environment variable if it's set, + because it confuses boto. (Proxies should instead be configured via the + boto config file.) + +================================================================================ + + +Release 3.14 (release-date: 2012-07-28) + +New Features +------------ + - Added cp -q option, to support quiet operation from cron jobs. + + - Made config command restore backed up file if there was a failure or user + hits ^C. + +Bug Fixes +--------- + + - Fixed bug where gsutil cp -R from a source directory didn't generate + correct destination path. + + - Fixed file handle leak in gsutil cp -z + + - Fixed bug that caused cp -a option not to work when copying in the cloud. + + - Fixed bug that caused '/-' to be appended to object name for streaming + uploads. + + - Revert incorrect line I changed in previous CL, that attempted to + get fp from src_key object. The real fix that's needed is described in + http://code.google.com/p/gsutil/issues/detail?id=73. + +Other Changes +------------- + + - Changed logging to print "Copying..." and Content-Type on same line; + refactored content type and log handling. + + +================================================================================ + + +Release 3.13 (release-date: 2012-07-19) + +Bug Fixes +--------- + + - Included the fix to make 'gsutil config' honor BOTO_CONFIG environment + variable (which was intended to be included in Release 3.12) + + +================================================================================ + + +Release 3.11 (release-date: 2012-06-28) + +New Features +------------ + + - Added support for configuring website buckets. + +Bug Fixes +--------- + + - Fixed bug that caused simultaneous resumable downloads of the same source + object to use the same tracker file. + + - Changed language code spec pointer from Wikipedia to loc.gov (for + Content-Language header). + + +================================================================================ + + +Release 3.10 (release-date: 2012-06-19) + +New Features +------------ + + - Added support for setting and listing Content-Language header. + + +Bug Fixes +--------- + + - Fixed bug that caused getacl/setacl commands to get a character encoding + exception when ACL content contained content not representable in ISO-8859-1 + character set. + + - Fixed gsutil update not to fail under Windows exclusive file locking. + + - Fixed gsutil ls -L to continue past 403 errors. + + - Updated gsutil tests and also help dev with instructions on how to run + boto tests, based on recent test refactoring done to in boto library. + + - Cleaned up parts of cp help text. + + +================================================================================ + + +Release 3.9 (release-date: 2012-05-24) + +Bug Fixes +--------- + + - Fixed bug that caused extra "file:/" to be included in pathnames with + doing gsutil cp -R on Windows. + + +================================================================================ + + +Release 3.8 (release-date: 2012-05-20) + +Bug Fixes +--------- + + - Fixed problem with non-ASCII filename characters not setting encoding before + attempting to hash for generating resumable transfer filename. + + +================================================================================ + + +Release 3.7 (release-date: 2012-05-11) + +Bug Fixes +--------- + + - Fixed handling of HTTPS tunneling through a proxy. + + +================================================================================ + + +Release 3.6 (release-date: 2012-05-09) + +Bug Fixes +--------- + + - Fixed bug that caused wildcards spanning directories not to work. + - Fixed bug that gsutil cp -z not to find available tmp space correctly + under Windows. + + +================================================================================ + + +Release 3.5 (release-date: 2012-04-30) + +Performance Improvement +----------------------- + + - Change by Evan Worley to calculate MD5s incrementally during uploads and + downloads. This reduces overall transfer time substantially for large + objects. + +Bug Fixes +--------- + + - Fixed bug where uploading and moving multiple files to a bucket subdirectory + didn't work as intended. + (http://code.google.com/p/gsutil/issues/detail?id=93). + - Fixed bug where gsutil cp -r sourcedir didn't copy to specified subdir + if there is only one file in sourcedir. + - Fixed bug where tracker file included a timestamp that caused it not to + be recognized across sessions. + - Fixed bug where gs://bucket/*/dir wildcard matches too many objects. + - Fixed documentation errors in help associated with ACLs and projects. + - Changed GCS ACL parsing to be case-insensitive. + - Changed ls to print error and exit with non-0 status when wildcard matches + nothing, to be more consistent with UNIX shell behavior. + + +================================================================================ + + +Release 3.4 (release-date: 2012-04-06) + +Bug Fixes +--------- + + - Fixed problem where resumable uploads/downloads of objects with very long + names would generate tracking files with names that exceeded local file + system limits, making it impossible to complete resumable transfers for + those objects. Solution was to build the tracking file name from a fixed + prefix, SHA1 hash of the long filename, epoch timestamp and last 16 + chars of the long filename, which is guarantee to be a predicable and + reasonable length. + + - Fixed minor bug in output from 'gsutil help dev' which advised executing + an inconsequential test script (test_util.py). + + +================================================================================ + + +Release 3.3 (release-date: 2012-04-03) + +Bug Fixes +--------- + + - Fixed problem where gsutil ver and debug flags crashed when used + with newly generated boto config files. + + - Fixed gsutil bug in windows path handling, and make checksumming work + across platforms. + + - Fixed enablelogging to translate -b URI param to plain bucket name in REST + API request. + + +================================================================================ + + +Release 3.2 (release-date: 2012-03-30) + +Bug Fixes +--------- + + - Fixed problem where gsutil didn't convert between OS-specific directory + separators when copying individually-named files (issue 87). + + - Fixed problem where gsutil ls -R didn't work right if there was a key + with a leading path (like /foo/bar/baz) + + +================================================================================ + + +Release 3.1 (release-date: 2012-03-20) + +Bug Fixes +--------- + + - Removed erroneous setting of Content-Encoding when a gzip file is uploaded + (vs running gsutil cp -z, when Content-Encoding should be set). This + error caused users to get gsutil.tar.gz file uncompressed by the user + agent (like wget) while downloading, making the file appear to be of the + wrong size/content. + + - Fixed handling of gsutil help for Windows (previous code depended on + termios and fcntl libs, which are Linux/MacOS-specific). + + +================================================================================ + + +Release 3.0 (release-date: 2012-03-20) + + +Important Notes +--------------- + + - Backwards-incompatible wildcard change + + The '*' wildcard now only matches objects within a bucket directory. If + you have scripts that depend on being able to match spanning multiple + directories you need to use '**' instead. For example, the command: + + gsutil cp gs://bucket/*.txt + + will now only match .txt files in the top-level directory. + + gsutil cp gs://bucket/**.txt + + will match across all directories. + + - gsutil ls now lists one directory at a time. If you want to list all objects + in a bucket, you can use: + + gsutil ls gs://bucket/** + + or: + + gsutil ls -R gs://bucket + + +New Features +------------ + + - Built-in help for all commands and many additional topics. Try + "gsutil help" for a list of available commands and topics. + + - A new hierarchical file tree abstraction layer, which makes the flat bucket + name space look like a hierarchical file tree. This makes several things + possible: + - copying data to/from bucket sub-directories (see “gsutil help cp”). + - distributing large uploads/downloads across many machines + (see “gsutil help cp”) + - renaming bucket sub-directories (see “gsutil help mv”). + - listing individual bucket sub-directories and for listing directories + recursively (see “gsutil help ls”). + - setting ACLs for objects in a sub-directory (see “gsutil help setacl”). + + - Support for per-directory (*) and recursive (**) wildcards. Essentially, + ** works the way * did in previous gsutil releases, and * now behaves + consistently with how it works in command interpreters (like bash). The + ability to specify directory-only wildcards also enables a number of use + cases, such as distributing large uploads/downloads by wildcarded name. See + "gsutil help wildcards" for details. + + - Support for Cross-Origin Resource Sharing (CORS) configuration. See "gsutil + help cors" for details. + + - Support for multi-threading and recursive operation for setacl command + (see “gsutil help setacl”). + + - Ability to use the UNIX 'file' command to do content type recognition as + an alternative to filename extensions. + + - Introduction of new end-to-end test suite. + + - The gsutil version command now computes a checksum of the code, to detect + corruption and local modification when assisting with technical support. + + - The gsutil update command is no longer beta/experimental, and now also + supports updating from named URIs (for early/test releases). + + - Changed gsutil ls -L to also print Content-Disposition header. + + +Bug Fixes +--------- + + - The gsutil cp -t option previously didn't work as documented, and instead + Content-Type was always detected based on filename extension. Content-Type + detection is now the default, the -t option is deprecated (to be removed in + the future), and specifying a -h Content-Type header now correctly overrides + the filename extension based handling. For details see "gsutil help + metadata". + + - Fixed bug that caused multi-threaded mv command not to percolate failures + during the cp phase to the rm phase, which could under some circumstances + cause data that was not copied to be deleted. + + - Fixed bug that caused gsutil to use GET for ls -L requests. It now uses HEAD + for ls -L requests, which is more efficient and faster. + + - Fixed bug that caused gsutil not to preserve metadata during + copy-in-the-cloud. + + - Fixed bug that prevented setacl command from allowing DisplayName's in ACLs. + + - Fixed bug that caused gsutil/boto to suppress consecutive slashes in path + names. + + - Fixed spec-non-compliant URI construction for resumable uploads. + + - Fixed bug that caused rm -f not to work. + + - Fixed UnicodeEncodeError that happened when redirecting gsutil ls output + to a file with non-ASCII object names. + + +Other Changes +------------- + + - UserAgent sent in HTTP requests now includes gsutil version number and OS + name. + + - Starting with this release users are able to get individual named releases + from version-named objects: gs://pub/gsutil_.tar.gz + and gs://pub/gsutil_.zip. The version-less counterparts + (gs://pub/gsutil.tar.gz and gs://pub/gsutil.zip) will contain the latest + release. Also, the gs://pub bucket is now publicly readable (so, anyone + can list its contents). + + +================================================================================ + +Release 2.0 (release-date: 2012-01-13) + + +New Features +------------ + + - Support for for two new installation modes: enterprise and RPM. + Customers can now install gsutil one of three ways: + + - Individual user mode (previously the only available mode): unpacking from + a gzipped tarball (gs://pub/gsutil.tar.gz) or zip file + (gs://pub/gsutil.zip) and running the gsutil command in place in the + unpacked gsutil directory. + + - Enterprise mode (new): unpacking as above, and then running the setup.py + script in the unpacked gsutil directory. This allows a systems + administrator to install gsutil in a central location, using the Python + distutils facility. This mode is supported only on Linux and MacOS. + + - RPM mode (new). A RedHat RPM can be built from the gsutil.spec.in file + in the unpacked gsutil directory, allowing it to be installed as part of + a RedHat build. + + - Note: v2.0 is the first numbered gsutil release. Previous releases + were given timestamps for versions. Numbered releases enable downstream + package builds (like RPMs) to define dependencies more easily. + This is also the first version where we began including release notes. diff --git a/third_party/gsutil/VERSION b/third_party/gsutil/VERSION new file mode 100644 index 000000000..ed5e66e1d --- /dev/null +++ b/third_party/gsutil/VERSION @@ -0,0 +1 @@ +3.25 diff --git a/third_party/gsutil/gsutil b/third_party/gsutil/gsutil new file mode 100755 index 000000000..ac0b76525 --- /dev/null +++ b/third_party/gsutil/gsutil @@ -0,0 +1,382 @@ +#!/usr/bin/env python +# coding=utf8 +# Copyright 2010 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Main module for Google Cloud Storage command line tool.""" + + +import ConfigParser +import errno +import getopt +import logging +import os +import re +import signal +import socket +import sys +import traceback + +third_party_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, os.path.dirname(third_party_dir)) +sys.path.insert(0, third_party_dir) + + +def _OutputAndExit(message): + global debug + if debug == 4: + stack_trace = traceback.format_exc() + sys.stderr.write('DEBUG: Exception stack trace:\n %s\n' % + re.sub('\\n', '\n ', stack_trace)) + else: + sys.stderr.write('%s\n' % message) + sys.exit(1) + + +def _OutputUsageAndExit(command_runner): + command_runner.RunNamedCommand('help') + sys.exit(1) + + +debug = 0 +# Before importing boto, find where gsutil is installed and include its +# boto sub-directory at the start of the PYTHONPATH, to ensure the versions of +# gsutil and boto stay in sync after software updates. This also allows gsutil +# to be used without explicitly adding it to the PYTHONPATH. +# We use realpath() below to unwind symlinks if any were used in the gsutil +# installation. +gsutil_bin_dir = os.path.dirname(os.path.realpath(sys.argv[0])) +if not gsutil_bin_dir: + _OutputAndExit('Unable to determine where gsutil is installed. Sorry, ' + 'cannot run correctly without this.\n') +boto_lib_dir = os.path.join(gsutil_bin_dir, '..', 'boto') +if not os.path.isdir(boto_lib_dir): + _OutputAndExit('There is no boto library under the gsutil install directory ' + '(%s).\nThe gsutil command cannot work properly when installed ' + 'this way.\nPlease re-install gsutil per the installation ' + 'instructions.' % gsutil_bin_dir) +# sys.path.insert(0, boto_lib_dir) +import boto +from boto.exception import BotoClientError +from boto.exception import InvalidAclError +from boto.exception import InvalidUriError +from boto.exception import ResumableUploadException +from boto.exception import StorageResponseError +from gslib.command_runner import CommandRunner +from gslib.exception import CommandException +from gslib.exception import ProjectIdException +from gslib import util +from gslib.util import ExtractErrorDetail +from gslib.util import HasConfiguredCredentials +from gslib.wildcard_iterator import WildcardException + +# Load the gsutil version number and append it to boto.UserAgent so the value +# is set before anything instantiates boto. (If parts of boto were instantiated +# first those parts would have the old value of boto.UserAgent, so we wouldn't +# be guaranteed that all code paths send the correct user agent.) +ver_file_path = os.path.join(gsutil_bin_dir, 'VERSION') +if not os.path.isfile(ver_file_path): + raise CommandException( + '%s not found. Please reinstall gsutil from scratch' % ver_file_path) +ver_file = open(ver_file_path, 'r') +gsutil_ver = ver_file.read().rstrip() +ver_file.close() +boto.UserAgent += ' gsutil/%s (%s)' % (gsutil_ver, sys.platform) + +# We don't use the oauth2 authentication plugin directly; importing it here +# ensures that it's loaded and available by default when an operation requiring +# authentication is performed. +try: + from oauth2_plugin import oauth2_plugin +except ImportError: + pass + + +def main(): + global debug + + if sys.version_info[:3] < (2, 6): + raise CommandException('gsutil requires Python 2.6 or higher.') + + config_file_list = _GetBotoConfigFileList() + command_runner = CommandRunner(gsutil_bin_dir, boto_lib_dir, config_file_list, + gsutil_ver) + headers = {} + parallel_operations = False + debug = 0 + + # If user enters no commands just print the usage info. + if len(sys.argv) == 1: + sys.argv.append('help') + + # Change the default of the 'https_validate_certificates' boto option to + # True (it is currently False in boto). + if not boto.config.has_option('Boto', 'https_validate_certificates'): + if not boto.config.has_section('Boto'): + boto.config.add_section('Boto') + boto.config.setbool('Boto', 'https_validate_certificates', True) + + try: + opts, args = getopt.getopt(sys.argv[1:], 'dDvh:m', + ['debug', 'detailedDebug', 'version', 'help', + 'header', 'multithreaded']) + except getopt.GetoptError, e: + _HandleCommandException(CommandException(e.msg)) + for o, a in opts: + if o in ('-d', '--debug'): + # Passing debug=2 causes boto to include httplib header output. + debug = 2 + if o in ('-D', '--detailedDebug'): + # We use debug level 3 to ask gsutil code to output more detailed + # debug output. This is a bit of a hack since it overloads the same + # flag that was originally implemented for boto use. And we use -DD + # to ask for really detailed debugging (i.e., including HTTP payload). + if debug == 3: + debug = 4 + else: + debug = 3 + if o in ('-?', '--help'): + _OutputUsageAndExit(command_runner) + if o in ('-h', '--header'): + (hdr_name, unused_ptn, hdr_val) = a.partition(':') + if not hdr_name: + _OutputUsageAndExit(command_runner) + headers[hdr_name] = hdr_val + if o in ('-m', '--multithreaded'): + parallel_operations = True + if debug > 1: + sys.stderr.write( + '***************************** WARNING *****************************\n' + '*** You are running gsutil with debug output enabled.\n' + '*** Be aware that debug output includes authentication ' + 'credentials.\n' + '*** Do not share (e.g., post to support forums) debug output\n' + '*** unless you have sanitized authentication tokens in the\n' + '*** output, or have revoked your credentials.\n' + '***************************** WARNING *****************************\n') + if debug == 2: + logging.basicConfig(level=logging.INFO) + elif debug > 2: + logging.basicConfig(level=logging.DEBUG) + command_runner.RunNamedCommand('ver') + config_items = [] + try: + config_items.extend(boto.config.items('Boto')) + config_items.extend(boto.config.items('GSUtil')) + except ConfigParser.NoSectionError: + pass + sys.stderr.write('config_file_list: %s\n' % config_file_list) + sys.stderr.write('config: %s\n' % str(config_items)) + else: + logging.basicConfig() + + if not args: + command_name = 'help' + else: + command_name = args[0] + + # Unset http_proxy environment variable if it's set, because it confuses + # boto. (Proxies should instead be configured via the boto config file.) + if 'http_proxy' in os.environ: + if debug > 1: + sys.stderr.write( + 'Unsetting http_proxy environment variable within gsutil run.\n') + del os.environ['http_proxy'] + + return _RunNamedCommandAndHandleExceptions(command_runner, command_name, + args[1:], headers, debug, + parallel_operations) + + +def _GetBotoConfigFileList(): + """Returns list of boto config files that exist.""" + config_paths = boto.pyami.config.BotoConfigLocations + if 'AWS_CREDENTIAL_FILE' in os.environ: + config_paths.append(os.environ['AWS_CREDENTIAL_FILE']) + config_files = {} + for config_path in config_paths: + if os.path.exists(config_path): + config_files[config_path] = 1 + cf_list = [] + for config_file in config_files: + cf_list.append(config_file) + return cf_list + + +def _HandleUnknownFailure(e): + global debug + # Called if we fall through all known/handled exceptions. Allows us to + # print a stacktrace if -D option used. + if debug > 2: + stack_trace = traceback.format_exc() + sys.stderr.write('DEBUG: Exception stack trace:\n %s\n' % + re.sub('\\n', '\n ', stack_trace)) + else: + _OutputAndExit('Failure: %s.' % e) + + +def _HandleCommandException(e): + if e.informational: + _OutputAndExit(e.reason) + else: + _OutputAndExit('CommandException: %s' % e.reason) + + +def _HandleControlC(signal_num, cur_stack_frame): + """Called when user hits ^C so we can print a brief message instead of + the normal Python stack trace (unless -D option is used).""" + global debug + if debug > 2: + stack_trace = ''.join(traceback.format_list(traceback.extract_stack())) + _OutputAndExit('DEBUG: Caught signal %d - Exception stack trace:\n' + ' %s' % (signal_num, re.sub('\\n', '\n ', stack_trace))) + else: + _OutputAndExit('Caught signal %d - exiting' % signal_num) + + +def _HandleSigQuit(signal_num, cur_stack_frame): + """Called when user hits ^\, so we can force breakpoint a running gsutil.""" + import pdb; pdb.set_trace() + + +def _RunNamedCommandAndHandleExceptions(command_runner, command_name, args=None, + headers=None, debug=0, + parallel_operations=False): + try: + # Catch ^C so we can print a brief message instead of the normal Python + # stack trace. + signal.signal(signal.SIGINT, _HandleControlC) + # Catch ^\ so we can force a breakpoint in a running gsutil. + if not util.IS_WINDOWS: + signal.signal(signal.SIGQUIT, _HandleSigQuit) + return command_runner.RunNamedCommand(command_name, args, headers, debug, + parallel_operations) + except AttributeError, e: + if str(e).find('secret_access_key') != -1: + _OutputAndExit('Missing credentials for the given URI(s). Does your ' + 'boto config file contain all needed credentials?') + else: + _OutputAndExit(str(e)) + except BotoClientError, e: + _OutputAndExit('BotoClientError: %s.' % e.reason) + except CommandException, e: + _HandleCommandException(e) + except getopt.GetoptError, e: + _HandleCommandException(CommandException(e.msg)) + except InvalidAclError, e: + _OutputAndExit('InvalidAclError: %s.' % str(e)) + except InvalidUriError, e: + _OutputAndExit('InvalidUriError: %s.' % e.message) + except ProjectIdException, e: + _OutputAndExit('ProjectIdException: %s.' % e.reason) + except boto.auth_handler.NotReadyToAuthenticate: + _OutputAndExit('NotReadyToAuthenticate') + except OSError, e: + _OutputAndExit('OSError: %s.' % e.strerror) + except WildcardException, e: + _OutputAndExit(e.reason) + except StorageResponseError, e: + # Check for access denied, and provide detail to users who have no boto + # config file (who might previously have been using gsutil only for + # accessing publicly readable buckets and objects). + if e.status == 403: + if not HasConfiguredCredentials(): + _OutputAndExit( + 'You are attempting to access protected data with no configured ' + 'credentials.\nPlease see ' + 'http://code.google.com/apis/storage/docs/signup.html for\ndetails ' + 'about activating the Google Cloud Storage service and then run ' + 'the\n"gsutil config" command to configure gsutil to use these ' + 'credentials.') + elif (e.error_code == 'AccountProblem' + and ','.join(args).find('gs://') != -1): + default_project_id = boto.config.get_value('GSUtil', + 'default_project_id') + acct_help_part_1 = ( +"""Your request resulted in an AccountProblem (403) error. Usually this happens +if you attempt to create a bucket or upload an object without having first +enabled billing for the project you are using. To remedy this problem, please do +the following: + +1. Navigate to the Google APIs console (https://code.google.com/apis/console), + and ensure the drop-down selector beneath "Google APIs" shows the project + you're attempting to use. + +""") + acct_help_part_2 = '\n' + if default_project_id: + acct_help_part_2 = ( +"""2. Click "Google Cloud Storage" on the left hand pane, and then check that + the value listed for "x-goog-project-id" on this page matches the project ID + (%s) from your boto config file. + +""" % default_project_id) + acct_help_part_3 = ( +"""Check whether there's an "!" next to Billing. If so, click Billing and then + enable billing for this project. Note that it can take up to one hour after + enabling billing for the project to become activated for creating buckets and + uploading objects. + +If the above doesn't resolve your AccountProblem, please send mail to +gs-team@google.com requesting assistance, noting the exact command you ran, the +fact that you received a 403 AccountProblem error, and your project ID. Please +do not post your project ID on the public discussion forum (gs-discussion) or on +StackOverflow. + +Note: It's possible to use Google Cloud Storage without enabling billing if +you're only listing or reading objects for which you're authorized, or if +you're uploading objects to a bucket billed to a project that has billing +enabled. But if you're attempting to create buckets or upload objects to a +bucket owned by your own project, you must first enable billing for that +project.""") + if default_project_id: + _OutputAndExit(acct_help_part_1 + acct_help_part_2 + '3. ' + + acct_help_part_3) + else: + _OutputAndExit(acct_help_part_1 + '2. ' + acct_help_part_3) + + if not e.body: + e.body = '' + exc_name, error_detail = ExtractErrorDetail(e) + if error_detail: + _OutputAndExit('%s: status=%d, code=%s, reason=%s, detail=%s.' % + (exc_name, e.status, e.code, e.reason, error_detail)) + else: + _OutputAndExit('%s: status=%d, code=%s, reason=%s.' % + (exc_name, e.status, e.code, e.reason)) + except ResumableUploadException, e: + _OutputAndExit('ResumableUploadException: %s.' % e.message) + except socket.error, e: + if e.args[0] == errno.EPIPE: + # Retrying with a smaller file (per suggestion below) works because + # the library code send loop (in boto/s3/key.py) can get through the + # entire file and then request the HTTP response before the socket + # gets closed and the response lost. + message = ( +""" +Got a "Broken pipe" error. This can happen to clients using Python 2.x, +when the server sends an error response and then closes the socket (see +http://bugs.python.org/issue5542). If you are trying to upload a large +object you might retry with a small (say 200k) object, and see if you get +a more specific error code. +""") + _OutputAndExit(message) + else: + _HandleUnknownFailure(e) + except Exception, e: + _HandleUnknownFailure(e) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/third_party/gsutil/gsutil.spec.in b/third_party/gsutil/gsutil.spec.in new file mode 100644 index 000000000..aaef8d1c7 --- /dev/null +++ b/third_party/gsutil/gsutil.spec.in @@ -0,0 +1,75 @@ +# +# gsutil.spec - RPM specification file for Google Cloud Storage command +# line utility (gsutil). +# +# Copyright 2011 Google Inc. +# + +Name: gsutil +Version: 2.0 +Release: 1%{?dist} +Summary: gsutil command line utility for Google Cloud Storage +License: ASL 2.0 +Group: Development/Libraries +Url: http://code.google.com/apis/storage/docs/gsutil.html +Source0: http://gsutil.googlecode.com/files/%{name}-%{version}.zip +BuildArch: noarch +BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) +# Dependency on boto commented out for now because initially we plan to +# bundle boto with this package, however, when we're ready to depend on +# a separate boto rpm package, this line should be uncommented. +#Requires: python-boto + +%description + +GSUtil is a Python application that facilitates access to Google Cloud Storage +from the command line. You can use GSUtil to do a wide range of bucket and +object management tasks, including: + +- Creating and deleting buckets. +- Uploading, downloading, and deleting objects. +- Listing buckets and objects. +- Moving, copying, and renaming objects. +- Setting object and bucket ACLs. + +%prep +%setup -q + +%build +python setup.py build + +%install +python setup.py install --skip-build --root=%{buildroot} +# Make all files and dirs in build area readable by other +# and make all directories executable by other. These steps +# are performed in support of the rpm installation mode, +# in which users with different user/group than the +# installation user/group must be able to run gsutil. +chmod -R o+r %{buildroot}/usr/share/gsutil +find %{buildroot}/usr/share/gsutil -type d | xargs chmod o+x +# Make main gsutil script readable and executable by other. +chmod o+rx %{buildroot}/usr/share/gsutil/gsutil +# Remove Python egg file, which we don't use (but setup.py insists on +# building) so we remove it here. +rm %{buildroot}/usr/local/lib/python2.6/dist-packages/gsutil-2.0.egg-info +# Remove update command, which shouldn't be used when gsutil is managed by RPM. +rm %{buildroot}/usr/share/gsutil/gslib/commands/update.py +# Create /usr/bin under buildroot and symlink gsutil so users don't +# need to add a custom directory to their PATH. +mkdir -p %{buildroot}%{_bindir} +cd %{buildroot}%{_bindir} +ln -s ../share/gsutil/gsutil gsutil + +%clean +rm -rf %{buildroot} + +%files +%defattr(-,root,root,-) +# Lines ending with a slash cause recursive enumeration of directory contents. +%{_bindir}/%{name} +###FILES_GO_HERE### + +%changelog +* Tue Dec 10 2011 Marc Cohen 2.0-1 +- initial version of rpm spec file for gsutil for inclusion in RHEL + diff --git a/third_party/gsutil/oauth2_plugin/__init__.py b/third_party/gsutil/oauth2_plugin/__init__.py new file mode 100644 index 000000000..92915ddf7 --- /dev/null +++ b/third_party/gsutil/oauth2_plugin/__init__.py @@ -0,0 +1,22 @@ +# Copyright 2011 Google Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +"""Package marker file.""" diff --git a/third_party/gsutil/oauth2_plugin/oauth2_client.py b/third_party/gsutil/oauth2_plugin/oauth2_client.py new file mode 100644 index 000000000..84c74c87c --- /dev/null +++ b/third_party/gsutil/oauth2_plugin/oauth2_client.py @@ -0,0 +1,642 @@ +# Copyright 2010 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""An OAuth2 client library. + +This library provides a client implementation of the OAuth2 protocol (see +http://code.google.com/apis/accounts/docs/OAuth2.html). + +**** Experimental API **** + +This module is experimental and is subject to modification or removal without +notice. +""" + +# This implementation is inspired by the implementation in +# http://code.google.com/p/google-api-python-client/source/browse/oauth2client/, +# with the following main differences: +# - This library uses the fancy_urllib monkey patch for urllib to correctly +# implement SSL certificate validation. +# - This library does not assume that client code is using the httplib2 library +# to make HTTP requests. +# - This library implements caching of access tokens independent of refresh +# tokens (in the python API client oauth2client, there is a single class that +# encapsulates both refresh and access tokens). + + +import cgi +import datetime +import errno +from hashlib import sha1 +import logging +import os +import tempfile +import threading +import urllib +import urllib2 +import urlparse + +from boto import cacerts +from third_party import fancy_urllib + +try: + import json +except ImportError: + try: + # Try to import from django, should work on App Engine + from django.utils import simplejson as json + except ImportError: + # Try for simplejson + import simplejson as json + +LOG = logging.getLogger('oauth2_client') +# Lock used for checking/exchanging refresh token, so multithreaded +# operation doesn't attempt concurrent refreshes. +token_exchange_lock = threading.Lock() + +# SHA1 sum of the CA certificates file imported from boto. +CACERTS_FILE_SHA1SUM = 'ed024a78d9327f8669b3b117d9eac9e3c9460e9b' + +class Error(Exception): + """Base exception for the OAuth2 module.""" + pass + + +class AccessTokenRefreshError(Error): + """Error trying to exchange a refresh token into an access token.""" + pass + + +class AuthorizationCodeExchangeError(Error): + """Error trying to exchange an authorization code into a refresh token.""" + pass + + +class TokenCache(object): + """Interface for OAuth2 token caches.""" + + def PutToken(self, key, value): + raise NotImplementedError + + def GetToken(self, key): + raise NotImplementedError + + +class NoopTokenCache(TokenCache): + """A stub implementation of TokenCache that does nothing.""" + + def PutToken(self, key, value): + pass + + def GetToken(self, key): + return None + + +class InMemoryTokenCache(TokenCache): + """An in-memory token cache. + + The cache is implemented by a python dict, and inherits the thread-safety + properties of dict. + """ + + def __init__(self): + super(InMemoryTokenCache, self).__init__() + self.cache = dict() + + def PutToken(self, key, value): + LOG.info('InMemoryTokenCache.PutToken: key=%s', key) + self.cache[key] = value + + def GetToken(self, key): + value = self.cache.get(key, None) + LOG.info('InMemoryTokenCache.GetToken: key=%s%s present', + key, ' not' if value is None else '') + return value + + +class FileSystemTokenCache(TokenCache): + """An implementation of a token cache that persists tokens on disk. + + Each token object in the cache is stored in serialized form in a separate + file. The cache file's name can be configured via a path pattern that is + parameterized by the key under which a value is cached and optionally the + current processes uid as obtained by os.getuid(). + + Since file names are generally publicly visible in the system, it is important + that the cache key does not leak information about the token's value. If + client code computes cache keys from token values, a cryptographically strong + one-way function must be used. + """ + + def __init__(self, path_pattern=None): + """Creates a FileSystemTokenCache. + + Args: + path_pattern: Optional string argument to specify the path pattern for + cache files. The argument should be a path with format placeholders + '%(key)s' and optionally '%(uid)s'. If the argument is omitted, the + default pattern + /oauth2client-tokencache.%(uid)s.%(key)s + is used, where is replaced with the system temp dir as + obtained from tempfile.gettempdir(). + """ + super(FileSystemTokenCache, self).__init__() + self.path_pattern = path_pattern + if not path_pattern: + self.path_pattern = os.path.join( + tempfile.gettempdir(), 'oauth2_client-tokencache.%(uid)s.%(key)s') + + def CacheFileName(self, key): + uid = '_' + try: + # os.getuid() doesn't seem to work in Windows + uid = str(os.getuid()) + except: + pass + return self.path_pattern % {'key': key, 'uid': uid} + + def PutToken(self, key, value): + """Serializes the value to the key's filename. + + To ensure that written tokens aren't leaked to a different users, we + a) unlink an existing cache file, if any (to ensure we don't fall victim + to symlink attacks and the like), + b) create a new file with O_CREAT | O_EXCL (to ensure nobody is trying to + race us) + If either of these steps fail, we simply give up (but log a warning). Not + caching access tokens is not catastrophic, and failure to create a file + can happen for either of the following reasons: + - someone is attacking us as above, in which case we want to default to + safe operation (not write the token); + - another legitimate process is racing us; in this case one of the two + will win and write the access token, which is fine; + - we don't have permission to remove the old file or write to the + specified directory, in which case we can't recover + + Args: + key: the refresh_token hash key to store. + value: the access_token value to serialize. + """ + + cache_file = self.CacheFileName(key) + LOG.info('FileSystemTokenCache.PutToken: key=%s, cache_file=%s', + key, cache_file) + try: + os.unlink(cache_file) + except: + # Ignore failure to unlink the file; if the file exists and can't be + # unlinked, the subsequent open with O_CREAT | O_EXCL will fail. + pass + + flags = os.O_RDWR | os.O_CREAT | os.O_EXCL + + # Accommodate Windows; stolen from python2.6/tempfile.py. + if hasattr(os, 'O_NOINHERIT'): + flags |= os.O_NOINHERIT + if hasattr(os, 'O_BINARY'): + flags |= os.O_BINARY + + try: + fd = os.open(cache_file, flags, 0600) + except (OSError, IOError), e: + LOG.warning('FileSystemTokenCache.PutToken: ' + 'Failed to create cache file %s: %s', cache_file, e) + return + f = os.fdopen(fd, 'w+b') + f.write(value.Serialize()) + f.close() + + def GetToken(self, key): + """Returns a deserialized access token from the key's filename.""" + value = None + cache_file = self.CacheFileName(key) + try: + f = open(cache_file) + value = AccessToken.UnSerialize(f.read()) + f.close() + except (IOError, OSError), e: + if e.errno != errno.ENOENT: + LOG.warning('FileSystemTokenCache.GetToken: ' + 'Failed to read cache file %s: %s', cache_file, e) + except Exception, e: + LOG.warning('FileSystemTokenCache.GetToken: ' + 'Failed to read cache file %s (possibly corrupted): %s', + cache_file, e) + + LOG.info('FileSystemTokenCache.GetToken: key=%s%s present (cache_file=%s)', + key, ' not' if value is None else '', cache_file) + return value + + +class OAuth2Provider(object): + """Encapsulates information about an OAuth2 provider.""" + + def __init__(self, label, authorization_uri, token_uri): + """Creates an OAuth2Provider. + + Args: + label: A string identifying this oauth2 provider, e.g. "Google". + authorization_uri: The provider's authorization URI. + token_uri: The provider's token endpoint URI. + """ + self.label = label + self.authorization_uri = authorization_uri + self.token_uri = token_uri + + +class OAuth2Client(object): + """An OAuth2 client.""" + + def __init__(self, provider, client_id, client_secret, + url_opener=None, + proxy=None, + access_token_cache=None, + datetime_strategy=datetime.datetime): + """Creates an OAuth2Client. + + Args: + provider: The OAuth2Provider provider this client will authenticate + against. + client_id: The OAuth2 client ID of this client. + client_secret: The OAuth2 client secret of this client. + url_opener: An optinal urllib2.OpenerDirector to use for making HTTP + requests to the OAuth2 provider's token endpoint. The provided + url_opener *must* be configured to validate server SSL certificates + for requests to https connections, and to correctly handle proxying of + https requests. If this argument is omitted or None, a suitable + opener based on fancy_urllib is used. + proxy: An optional string specifying a HTTP proxy to be used, in the form + ':'. This option is only effective if the url_opener has + been configured with a fancy_urllib.FancyProxyHandler (this is the + case for the default url_opener). + access_token_cache: An optional instance of a TokenCache. If omitted or + None, an InMemoryTokenCache is used. + datetime_strategy: datetime module strategy to use. + """ + self.provider = provider + self.client_id = client_id + self.client_secret = client_secret + # datetime_strategy is used to invoke utcnow() on; it is injected into the + # constructor for unit testing purposes. + self.datetime_strategy = datetime_strategy + self._proxy = proxy + + self.access_token_cache = access_token_cache or InMemoryTokenCache() + + self.ca_certs_file = os.path.join( + os.path.dirname(os.path.abspath(cacerts.__file__)), 'cacerts.txt') + + if url_opener is None: + # Check that the cert file distributed with boto has not been tampered + # with. + h = sha1() + h.update(file(self.ca_certs_file).read()) + actual_sha1 = h.hexdigest() + if actual_sha1 != CACERTS_FILE_SHA1SUM: + raise Error( + 'CA certificates file does not have expected SHA1 sum; ' + 'expected: %s, actual: %s' % (CACERTS_FILE_SHA1SUM, actual_sha1)) + # TODO(Google): set user agent? + url_opener = urllib2.build_opener( + fancy_urllib.FancyProxyHandler(), + fancy_urllib.FancyRedirectHandler(), + fancy_urllib.FancyHTTPSHandler()) + self.url_opener = url_opener + + def _TokenRequest(self, request): + """Make a requst to this client's provider's token endpoint. + + Args: + request: A dict with the request parameteres. + Returns: + A tuple (response, error) where, + - response is the parsed JSON response received from the token endpoint, + or None if no parseable response was received, and + - error is None if the request succeeded or + an Exception if an error occurred. + """ + + body = urllib.urlencode(request) + LOG.debug('_TokenRequest request: %s', body) + response = None + try: + request = fancy_urllib.FancyRequest( + self.provider.token_uri, data=body) + if self._proxy: + request.set_proxy(self._proxy, 'http') + + request.set_ssl_info(ca_certs=self.ca_certs_file) + result = self.url_opener.open(request) + resp_body = result.read() + LOG.debug('_TokenRequest response: %s', resp_body) + except urllib2.HTTPError, e: + try: + response = json.loads(e.read()) + except: + pass + return (response, e) + + try: + response = json.loads(resp_body) + except ValueError, e: + return (None, e) + + return (response, None) + + def GetAccessToken(self, refresh_token): + """Given a RefreshToken, obtains a corresponding access token. + + First, this client's access token cache is checked for an existing, + not-yet-expired access token for the provided refresh token. If none is + found, the client obtains a fresh access token for the provided refresh + token from the OAuth2 provider's token endpoint. + + Args: + refresh_token: The RefreshToken object which to get an access token for. + Returns: + The cached or freshly obtained AccessToken. + Raises: + AccessTokenRefreshError if an error occurs. + """ + # Ensure only one thread at a time attempts to get (and possibly refresh) + # the access token. This doesn't prevent concurrent refresh attempts across + # multiple gsutil instances, but at least protects against multiple threads + # simultaneously attempting to refresh when gsutil -m is used. + token_exchange_lock.acquire() + try: + cache_key = refresh_token.CacheKey() + LOG.info('GetAccessToken: checking cache for key %s', cache_key) + access_token = self.access_token_cache.GetToken(cache_key) + LOG.debug('GetAccessToken: token from cache: %s', access_token) + if access_token is None or access_token.ShouldRefresh(): + LOG.info('GetAccessToken: fetching fresh access token...') + access_token = self.FetchAccessToken(refresh_token) + LOG.debug('GetAccessToken: fresh access token: %s', access_token) + self.access_token_cache.PutToken(cache_key, access_token) + return access_token + finally: + token_exchange_lock.release() + + def FetchAccessToken(self, refresh_token): + """Fetches an access token from the provider's token endpoint. + + Given a RefreshToken, fetches an access token from this client's OAuth2 + provider's token endpoint. + + Args: + refresh_token: The RefreshToken object which to get an access token for. + Returns: + The fetched AccessToken. + Raises: + AccessTokenRefreshError: if an error occurs. + """ + request = { + 'grant_type': 'refresh_token', + 'client_id': self.client_id, + 'client_secret': self.client_secret, + 'refresh_token': refresh_token.refresh_token, + } + LOG.debug('FetchAccessToken request: %s', request) + + response, error = self._TokenRequest(request) + LOG.debug( + 'FetchAccessToken response (error = %s): %s', error, response) + + if error: + oauth2_error = '' + if response and response['error']: + oauth2_error = '; OAuth2 error: %s' % response['error'] + raise AccessTokenRefreshError( + 'Failed to exchange refresh token into access token; ' + 'request failed: %s%s' % (error, oauth2_error)) + + if 'access_token' not in response: + raise AccessTokenRefreshError( + 'Failed to exchange refresh token into access token; response: %s' % + response) + + token_expiry = None + if 'expires_in' in response: + token_expiry = ( + self.datetime_strategy.utcnow() + + datetime.timedelta(seconds=int(response['expires_in']))) + + return AccessToken(response['access_token'], token_expiry, + datetime_strategy=self.datetime_strategy) + + def GetAuthorizationUri(self, redirect_uri, scopes, extra_params=None): + """Gets the OAuth2 authorization URI and the specified scope(s). + + Applications should navigate/redirect the user's user agent to this URI. The + user will be shown an approval UI requesting the user to approve access of + this client to the requested scopes under the identity of the authenticated + end user. + + The application should expect the user agent to be redirected to the + specified redirect_uri after the user's approval/disapproval. + + Installed applications may use the special redirect_uri + 'urn:ietf:wg:oauth:2.0:oob' to indicate that instead of redirecting the + browser, the user be shown a confirmation page with a verification code. + The application should query the user for this code. + + Args: + redirect_uri: Either the string 'urn:ietf:wg:oauth:2.0:oob' for a + non-web-based application, or a URI that handles the callback from the + authorization server. + scopes: A list of strings specifying the OAuth scopes the application + requests access to. + extra_params: Optional dictionary of additional parameters to be passed to + the OAuth2 authorization URI. + Returns: + The authorization URI for the specified scopes as a string. + """ + + request = { + 'response_type': 'code', + 'client_id': self.client_id, + 'redirect_uri': redirect_uri, + 'scope': ' '.join(scopes), + } + + if extra_params: + request.update(extra_params) + url_parts = list(urlparse.urlparse(self.provider.authorization_uri)) + # 4 is the index of the query part + request.update(dict(cgi.parse_qsl(url_parts[4]))) + url_parts[4] = urllib.urlencode(request) + return urlparse.urlunparse(url_parts) + + def ExchangeAuthorizationCode(self, code, redirect_uri, scopes): + """Exchanges an authorization code for a refresh token. + + Invokes this client's OAuth2 provider's token endpoint to exchange an + authorization code into a refresh token. + + Args: + code: the authrorization code. + redirect_uri: Either the string 'urn:ietf:wg:oauth:2.0:oob' for a + non-web-based application, or a URI that handles the callback from the + authorization server. + scopes: A list of strings specifying the OAuth scopes the application + requests access to. + Returns: + A tuple consting of the resulting RefreshToken and AccessToken. + Raises: + AuthorizationCodeExchangeError: if an error occurs. + """ + request = { + 'grant_type': 'authorization_code', + 'client_id': self.client_id, + 'client_secret': self.client_secret, + 'code': code, + 'redirect_uri': redirect_uri, + 'scope': ' '.join(scopes), + } + LOG.debug('ExchangeAuthorizationCode request: %s', request) + + response, error = self._TokenRequest(request) + LOG.debug( + 'ExchangeAuthorizationCode response (error = %s): %s', + error, response) + + if error: + oauth2_error = '' + if response and response['error']: + oauth2_error = '; OAuth2 error: %s' % response['error'] + raise AuthorizationCodeExchangeError( + 'Failed to exchange refresh token into access token; ' + 'request failed: %s%s' % (str(error), oauth2_error)) + + if not 'access_token' in response: + raise AuthorizationCodeExchangeError( + 'Failed to exchange authorization code into access token; ' + 'response: %s' % response) + + token_expiry = None + if 'expires_in' in response: + token_expiry = ( + self.datetime_strategy.utcnow() + + datetime.timedelta(seconds=int(response['expires_in']))) + + access_token = AccessToken(response['access_token'], token_expiry, + datetime_strategy=self.datetime_strategy) + + refresh_token = None + refresh_token_string = response.get('refresh_token', None) + + token_exchange_lock.acquire() + try: + if refresh_token_string: + refresh_token = RefreshToken(self, refresh_token_string) + self.access_token_cache.PutToken(refresh_token.CacheKey(), access_token) + finally: + token_exchange_lock.release() + + return (refresh_token, access_token) + + +class AccessToken(object): + """Encapsulates an OAuth2 access token.""" + + def __init__(self, token, expiry, datetime_strategy=datetime.datetime): + self.token = token + self.expiry = expiry + self.datetime_strategy = datetime_strategy + + @staticmethod + def UnSerialize(query): + """Creates an AccessToken object from its serialized form.""" + + def GetValue(d, key): + return (d.get(key, [None]))[0] + kv = cgi.parse_qs(query) + if not kv['token']: + return None + expiry = None + expiry_tuple = GetValue(kv, 'expiry') + if expiry_tuple: + try: + expiry = datetime.datetime( + *[int(n) for n in expiry_tuple.split(',')]) + except: + return None + return AccessToken(GetValue(kv, 'token'), expiry) + + def Serialize(self): + """Serializes this object as URI-encoded key-value pairs.""" + # There's got to be a better way to serialize a datetime. Unfortunately, + # there is no reliable way to convert into a unix epoch. + kv = {'token': self.token} + if self.expiry: + t = self.expiry + tupl = (t.year, t.month, t.day, t.hour, t.minute, t.second, t.microsecond) + kv['expiry'] = ','.join([str(i) for i in tupl]) + return urllib.urlencode(kv) + + def ShouldRefresh(self, time_delta=300): + """Whether the access token needs to be refreshed. + + Args: + time_delta: refresh access token when it expires within time_delta secs. + + Returns: + True if the token is expired or about to expire, False if the + token should be expected to work. Note that the token may still + be rejected, e.g. if it has been revoked server-side. + """ + if self.expiry is None: + return False + return (self.datetime_strategy.utcnow() + + datetime.timedelta(seconds=time_delta) > self.expiry) + + def __eq__(self, other): + return self.token == other.token and self.expiry == other.expiry + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + return 'AccessToken(token=%s, expiry=%sZ)' % (self.token, self.expiry) + + +class RefreshToken(object): + """Encapsulates an OAuth2 refresh token.""" + + def __init__(self, oauth2_client, refresh_token): + self.oauth2_client = oauth2_client + self.refresh_token = refresh_token + + def CacheKey(self): + """Computes a cache key for this refresh token. + + The cache key is computed as the SHA1 hash of the token, and as such + satisfies the FileSystemTokenCache requirement that cache keys do not leak + information about token values. + + Returns: + A hash key for this refresh token. + """ + h = sha1() + h.update(self.refresh_token) + return h.hexdigest() + + def GetAuthorizationHeader(self): + """Gets the access token HTTP authorication header value. + + Returns: + The value of an Authorization HTTP header that authenticates + requests with an OAuth2 access token based on this refresh token. + """ + return 'Bearer %s' % self.oauth2_client.GetAccessToken(self).token diff --git a/third_party/gsutil/oauth2_plugin/oauth2_client_test.py b/third_party/gsutil/oauth2_plugin/oauth2_client_test.py new file mode 100644 index 000000000..1d8e58196 --- /dev/null +++ b/third_party/gsutil/oauth2_plugin/oauth2_client_test.py @@ -0,0 +1,374 @@ +# Copyright 2010 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for oauth2_client.""" + +import datetime +import logging +import os +import sys +import unittest +import urllib2 +import urlparse +from stat import S_IMODE +from StringIO import StringIO + +test_bin_dir = os.path.dirname(os.path.realpath(sys.argv[0])) + +lib_dir = os.path.join(test_bin_dir, '..') +sys.path.insert(0, lib_dir) + +# Needed for boto.cacerts +boto_lib_dir = os.path.join(test_bin_dir, '..', 'boto') +sys.path.insert(0, boto_lib_dir) + +import oauth2_client + +LOG = logging.getLogger('oauth2_client_test') + +class MockOpener: + def __init__(self): + self.reset() + + def reset(self): + self.open_error = None + self.open_result = None + self.open_capture_url = None + self.open_capture_data = None + + def open(self, req, data=None): + self.open_capture_url = req.get_full_url() + self.open_capture_data = req.get_data() + if self.open_error is not None: + raise self.open_error + else: + return StringIO(self.open_result) + + +class MockDateTime: + def __init__(self): + self.mock_now = None + + def utcnow(self): + return self.mock_now + + +class OAuth2ClientTest(unittest.TestCase): + def setUp(self): + self.opener = MockOpener() + self.mock_datetime = MockDateTime() + self.start_time = datetime.datetime(2011, 3, 1, 10, 25, 13, 300826) + self.mock_datetime.mock_now = self.start_time + self.client = oauth2_client.OAuth2Client( + oauth2_client.OAuth2Provider( + 'Sample OAuth Provider', + 'https://provider.example.com/oauth/provider?mode=authorize', + 'https://provider.example.com/oauth/provider?mode=token'), + 'clid', 'clsecret', + url_opener=self.opener, datetime_strategy=self.mock_datetime) + + def testFetchAccessToken(self): + refresh_token = '1/ZaBrxdPl77Bi4jbsO7x-NmATiaQZnWPB51nTvo8n9Sw' + access_token = '1/aalskfja-asjwerwj' + self.opener.open_result = ( + '{"access_token":"%s","expires_in":3600}' % access_token) + cred = oauth2_client.RefreshToken(self.client, refresh_token) + token = self.client.FetchAccessToken(cred) + + self.assertEquals( + self.opener.open_capture_url, + 'https://provider.example.com/oauth/provider?mode=token') + self.assertEquals({ + 'grant_type': ['refresh_token'], + 'client_id': ['clid'], + 'client_secret': ['clsecret'], + 'refresh_token': [refresh_token]}, + urlparse.parse_qs(self.opener.open_capture_data, keep_blank_values=True, + strict_parsing=True)) + self.assertEquals(access_token, token.token) + self.assertEquals( + datetime.datetime(2011, 3, 1, 11, 25, 13, 300826), + token.expiry) + + def testFetchAccessTokenFailsForBadJsonResponse(self): + self.opener.open_result = 'blah' + cred = oauth2_client.RefreshToken(self.client, 'abc123') + self.assertRaises( + oauth2_client.AccessTokenRefreshError, self.client.FetchAccessToken, cred) + + def testFetchAccessTokenFailsForErrorResponse(self): + self.opener.open_error = urllib2.HTTPError( + None, 400, 'Bad Request', None, StringIO('{"error": "invalid token"}')) + cred = oauth2_client.RefreshToken(self.client, 'abc123') + self.assertRaises( + oauth2_client.AccessTokenRefreshError, self.client.FetchAccessToken, cred) + + def testFetchAccessTokenFailsForHttpError(self): + self.opener.open_result = urllib2.HTTPError( + 'foo', 400, 'Bad Request', None, None) + cred = oauth2_client.RefreshToken(self.client, 'abc123') + self.assertRaises( + oauth2_client.AccessTokenRefreshError, self.client.FetchAccessToken, cred) + + def testGetAccessToken(self): + refresh_token = 'ref_token' + access_token_1 = 'abc123' + self.opener.open_result = ( + '{"access_token":"%s",' '"expires_in":3600}' % access_token_1) + cred = oauth2_client.RefreshToken(self.client, refresh_token) + + token_1 = self.client.GetAccessToken(cred) + + # There's no access token in the cache; verify that we fetched a fresh + # token. + self.assertEquals({ + 'grant_type': ['refresh_token'], + 'client_id': ['clid'], + 'client_secret': ['clsecret'], + 'refresh_token': [refresh_token]}, + urlparse.parse_qs(self.opener.open_capture_data, keep_blank_values=True, + strict_parsing=True)) + self.assertEquals(access_token_1, token_1.token) + self.assertEquals(self.start_time + datetime.timedelta(minutes=60), + token_1.expiry) + + # Advance time by less than expiry time, and fetch another token. + self.opener.reset() + self.mock_datetime.mock_now = ( + self.start_time + datetime.timedelta(minutes=55)) + token_2 = self.client.GetAccessToken(cred) + + # Since the access token wasn't expired, we get the cache token, and there + # was no refresh request. + self.assertEquals(token_1, token_2) + self.assertEquals(access_token_1, token_2.token) + self.assertEquals(None, self.opener.open_capture_url) + self.assertEquals(None, self.opener.open_capture_data) + + # Advance time past expiry time, and fetch another token. + self.opener.reset() + self.mock_datetime.mock_now = ( + self.start_time + datetime.timedelta(minutes=55, seconds=1)) + access_token_2 = 'zyx456' + self.opener.open_result = ( + '{"access_token":"%s",' '"expires_in":3600}' % access_token_2) + token_3 = self.client.GetAccessToken(cred) + + # This should have resulted in a refresh request and a fresh access token. + self.assertEquals({ + 'grant_type': ['refresh_token'], + 'client_id': ['clid'], + 'client_secret': ['clsecret'], + 'refresh_token': [refresh_token]}, + urlparse.parse_qs(self.opener.open_capture_data, keep_blank_values=True, + strict_parsing=True)) + self.assertEquals(access_token_2, token_3.token) + self.assertEquals(self.mock_datetime.mock_now + datetime.timedelta(minutes=60), + token_3.expiry) + + def testGetAuthorizationUri(self): + authn_uri = self.client.GetAuthorizationUri( + 'https://www.example.com/oauth/redir?mode=approve%20me', + ('scope_foo', 'scope_bar'), + {'state': 'this and that & sundry'}) + + uri_parts = urlparse.urlsplit(authn_uri) + self.assertEquals(('https', 'provider.example.com', '/oauth/provider'), + uri_parts[:3]) + + self.assertEquals({ + 'response_type': ['code'], + 'client_id': ['clid'], + 'redirect_uri': + ['https://www.example.com/oauth/redir?mode=approve%20me'], + 'scope': ['scope_foo scope_bar'], + 'state': ['this and that & sundry'], + 'mode': ['authorize']}, + urlparse.parse_qs(uri_parts[3])) + + def testExchangeAuthorizationCode(self): + code = 'codeABQ1234' + exp_refresh_token = 'ref_token42' + exp_access_token = 'access_tokenXY123' + self.opener.open_result = ( + '{"access_token":"%s","expires_in":3600,"refresh_token":"%s"}' + % (exp_access_token, exp_refresh_token)) + + refresh_token, access_token = self.client.ExchangeAuthorizationCode( + code, 'urn:ietf:wg:oauth:2.0:oob', ('scope1', 'scope2')) + + self.assertEquals({ + 'grant_type': ['authorization_code'], + 'client_id': ['clid'], + 'client_secret': ['clsecret'], + 'code': [code], + 'redirect_uri': ['urn:ietf:wg:oauth:2.0:oob'], + 'scope': ['scope1 scope2'] }, + urlparse.parse_qs(self.opener.open_capture_data, keep_blank_values=True, + strict_parsing=True)) + self.assertEquals(exp_access_token, access_token.token) + self.assertEquals(self.start_time + datetime.timedelta(minutes=60), + access_token.expiry) + + self.assertEquals(self.client, refresh_token.oauth2_client) + self.assertEquals(exp_refresh_token, refresh_token.refresh_token) + + # Check that the access token was put in the cache. + cached_token = self.client.access_token_cache.GetToken( + refresh_token.CacheKey()) + self.assertEquals(access_token, cached_token) + + +class AccessTokenTest(unittest.TestCase): + + def testShouldRefresh(self): + mock_datetime = MockDateTime() + start = datetime.datetime(2011, 3, 1, 11, 25, 13, 300826) + expiry = start + datetime.timedelta(minutes=60) + token = oauth2_client.AccessToken( + 'foo', expiry, datetime_strategy=mock_datetime) + + mock_datetime.mock_now = start + self.assertFalse(token.ShouldRefresh()) + + mock_datetime.mock_now = start + datetime.timedelta(minutes=54) + self.assertFalse(token.ShouldRefresh()) + + mock_datetime.mock_now = start + datetime.timedelta(minutes=55) + self.assertFalse(token.ShouldRefresh()) + + mock_datetime.mock_now = start + datetime.timedelta( + minutes=55, seconds=1) + self.assertTrue(token.ShouldRefresh()) + + mock_datetime.mock_now = start + datetime.timedelta( + minutes=61) + self.assertTrue(token.ShouldRefresh()) + + mock_datetime.mock_now = start + datetime.timedelta(minutes=58) + self.assertFalse(token.ShouldRefresh(time_delta=120)) + + mock_datetime.mock_now = start + datetime.timedelta( + minutes=58, seconds=1) + self.assertTrue(token.ShouldRefresh(time_delta=120)) + + def testShouldRefreshNoExpiry(self): + mock_datetime = MockDateTime() + start = datetime.datetime(2011, 3, 1, 11, 25, 13, 300826) + token = oauth2_client.AccessToken( + 'foo', None, datetime_strategy=mock_datetime) + + mock_datetime.mock_now = start + self.assertFalse(token.ShouldRefresh()) + + mock_datetime.mock_now = start + datetime.timedelta( + minutes=472) + self.assertFalse(token.ShouldRefresh()) + + def testSerialization(self): + expiry = datetime.datetime(2011, 3, 1, 11, 25, 13, 300826) + token = oauth2_client.AccessToken('foo', expiry) + serialized_token = token.Serialize() + LOG.debug('testSerialization: serialized_token=%s' % serialized_token) + + token2 = oauth2_client.AccessToken.UnSerialize(serialized_token) + self.assertEquals(token, token2) + + +class RefreshTokenTest(unittest.TestCase): + def setUp(self): + self.opener = MockOpener() + self.mock_datetime = MockDateTime() + self.start_time = datetime.datetime(2011, 3, 1, 10, 25, 13, 300826) + self.mock_datetime.mock_now = self.start_time + self.client = oauth2_client.OAuth2Client( + oauth2_client.OAuth2Provider( + 'Sample OAuth Provider', + 'https://provider.example.com/oauth/provider?mode=authorize', + 'https://provider.example.com/oauth/provider?mode=token'), + 'clid', 'clsecret', + url_opener=self.opener, datetime_strategy=self.mock_datetime) + + self.cred = oauth2_client.RefreshToken(self.client, 'ref_token_abc123') + + def testUniqeId(self): + cred_id = self.cred.CacheKey() + self.assertEquals('0720afed6871f12761fbea3271f451e6ba184bf5', cred_id) + + def testGetAuthorizationHeader(self): + access_token = 'access_123' + self.opener.open_result = ( + '{"access_token":"%s","expires_in":3600}' % access_token) + + self.assertEquals('Bearer %s' % access_token, + self.cred.GetAuthorizationHeader()) + + +class FileSystemTokenCacheTest(unittest.TestCase): + + def setUp(self): + self.cache = oauth2_client.FileSystemTokenCache() + self.start_time = datetime.datetime(2011, 3, 1, 10, 25, 13, 300826) + self.token_1 = oauth2_client.AccessToken('token1', self.start_time) + self.token_2 = oauth2_client.AccessToken( + 'token2', self.start_time + datetime.timedelta(seconds=492)) + self.key = 'token1key' + + def tearDown(self): + try: + os.unlink(self.cache.CacheFileName(self.key)) + except: + pass + + def testPut(self): + self.cache.PutToken(self.key, self.token_1) + # Assert that the cache file exists and has correct permissions. + self.assertEquals( + 0600, S_IMODE(os.stat(self.cache.CacheFileName(self.key)).st_mode)) + + def testPutGet(self): + # No cache file present. + self.assertEquals(None, self.cache.GetToken(self.key)) + + # Put a token + self.cache.PutToken(self.key, self.token_1) + cached_token = self.cache.GetToken(self.key) + self.assertEquals(self.token_1, cached_token) + + # Put a different token + self.cache.PutToken(self.key, self.token_2) + cached_token = self.cache.GetToken(self.key) + self.assertEquals(self.token_2, cached_token) + + def testGetBadFile(self): + f = open(self.cache.CacheFileName(self.key), 'w') + f.write('blah') + f.close() + self.assertEquals(None, self.cache.GetToken(self.key)) + + def testCacheFileName(self): + cache = oauth2_client.FileSystemTokenCache( + path_pattern='/var/run/ccache/token.%(uid)s.%(key)s') + self.assertEquals('/var/run/ccache/token.%d.abc123' % os.getuid(), + cache.CacheFileName('abc123')) + + cache = oauth2_client.FileSystemTokenCache( + path_pattern='/var/run/ccache/token.%(key)s') + self.assertEquals('/var/run/ccache/token.abc123', + cache.CacheFileName('abc123')) + + +if __name__ == '__main__': + logging.basicConfig(level=logging.DEBUG) + unittest.main() diff --git a/third_party/gsutil/oauth2_plugin/oauth2_helper.py b/third_party/gsutil/oauth2_plugin/oauth2_helper.py new file mode 100644 index 000000000..9cf22d264 --- /dev/null +++ b/third_party/gsutil/oauth2_plugin/oauth2_helper.py @@ -0,0 +1,110 @@ +# Copyright 2011 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Helper routines to facilitate use of oauth2_client in gsutil.""" + +import sys +import time +import webbrowser + +import oauth2_client + +GSUTIL_CLIENT_ID = '909320924072.apps.googleusercontent.com' +# Google OAuth2 clients always have a secret, even if the client is an installed +# application/utility such as gsutil. Of course, in such cases the "secret" is +# actually publicly known; security depends entirly on the secrecy of refresh +# tokens, which effectively become bearer tokens. +GSUTIL_CLIENT_NOTSOSECRET = 'p3RlpR10xMFh9ZXBS/ZNLYUu' + +GOOGLE_OAUTH2_PROVIDER_LABEL = 'Google' +GOOGLE_OAUTH2_PROVIDER_AUTHORIZATION_URI = ( + 'https://accounts.google.com/o/oauth2/auth') +GOOGLE_OAUTH2_PROVIDER_TOKEN_URI = ( + 'https://accounts.google.com/o/oauth2/token') + +OOB_REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob' + +def OAuth2ClientFromBotoConfig(config): + token_cache = None + token_cache_type = config.get('OAuth2', 'token_cache', 'file_system') + + if token_cache_type == 'file_system': + if config.has_option('OAuth2', 'token_cache_path_pattern'): + token_cache = oauth2_client.FileSystemTokenCache( + path_pattern=config.get('OAuth2', 'token_cache_path_pattern')) + else: + token_cache = oauth2_client.FileSystemTokenCache() + elif token_cache_type == 'in_memory': + token_cache = oauth2_client.InMemoryTokenCache() + else: + raise Exception( + "Invalid value for config option OAuth2/token_cache: %s" % + token_cache_type) + + proxy = None + if (config.has_option('Boto', 'proxy') + and config.has_option('Boto', 'proxy_port')): + proxy = "%s:%s" % (config.get('Boto', 'proxy'), + config.get('Boto', 'proxy_port')) + + provider_label = config.get( + 'OAuth2', 'provider_label', GOOGLE_OAUTH2_PROVIDER_LABEL) + provider_authorization_uri = config.get( + 'OAuth2', 'provider_authorization_uri', + GOOGLE_OAUTH2_PROVIDER_AUTHORIZATION_URI) + provider_token_uri = config.get( + 'OAuth2', 'provider_token_uri', GOOGLE_OAUTH2_PROVIDER_TOKEN_URI) + + client_id = config.get('OAuth2', 'client_id', GSUTIL_CLIENT_ID) + client_secret = config.get( + 'OAuth2', 'client_secret', GSUTIL_CLIENT_NOTSOSECRET) + + return oauth2_client.OAuth2Client( + oauth2_client.OAuth2Provider( + provider_label, provider_authorization_uri, provider_token_uri), + client_id, client_secret, + proxy=proxy, access_token_cache=token_cache) + +def OAuth2ApprovalFlow(oauth2_client, scopes, launch_browser=False): + approval_url = oauth2_client.GetAuthorizationUri(OOB_REDIRECT_URI, scopes) + if launch_browser: + sys.stdout.write( + 'Attempting to launch a browser with the OAuth2 approval dialog at ' + 'URL: %s\n\n' + '[Note: due to a Python bug, you may see a spurious error message "object is not\n' + 'callable [...] in [...] Popen.__del__" which can be ignored.]\n\n' % approval_url) + else: + sys.stdout.write( + 'Please navigate your browser to the following URL:\n%s\n' % + approval_url) + + sys.stdout.write( + 'In your browser you should see a page that requests you to authorize ' + 'gsutil to access\nGoogle Cloud Storage on your behalf. After you ' + 'approve, an authorization code will be displayed.\n\n') + if (launch_browser and + not webbrowser.open(approval_url, new=1, autoraise=True)): + sys.stdout.write( + 'Launching browser appears to have failed; please navigate a browser ' + 'to the following URL:\n%s\n' % approval_url) + # Short delay; webbrowser.open on linux insists on printing out a message + # which we don't want to run into the prompt for the auth code. + time.sleep(2) + code = raw_input('Enter the authorization code: ') + + refresh_token, access_token = oauth2_client.ExchangeAuthorizationCode( + code, OOB_REDIRECT_URI, scopes) + + return refresh_token + diff --git a/third_party/gsutil/oauth2_plugin/oauth2_plugin.py b/third_party/gsutil/oauth2_plugin/oauth2_plugin.py new file mode 100644 index 000000000..83fe40a71 --- /dev/null +++ b/third_party/gsutil/oauth2_plugin/oauth2_plugin.py @@ -0,0 +1,24 @@ +from boto.auth_handler import AuthHandler +from boto.auth_handler import NotReadyToAuthenticate +import oauth2_client +import oauth2_helper + +class OAuth2Auth(AuthHandler): + + capability = ['google-oauth2', 's3'] + + def __init__(self, path, config, provider): + if (provider.name == 'google' + and config.has_option('Credentials', 'gs_oauth2_refresh_token')): + + self.oauth2_client = oauth2_helper.OAuth2ClientFromBotoConfig(config) + + self.refresh_token = oauth2_client.RefreshToken( + self.oauth2_client, + config.get('Credentials', 'gs_oauth2_refresh_token')) + else: + raise NotReadyToAuthenticate() + + def add_auth(self, http_request): + http_request.headers['Authorization'] = \ + self.refresh_token.GetAuthorizationHeader() diff --git a/third_party/gsutil/pkg_util.py b/third_party/gsutil/pkg_util.py new file mode 100644 index 000000000..8a0befc6b --- /dev/null +++ b/third_party/gsutil/pkg_util.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python + +# Utilities to facilitate maintaining one master list of package contents +# in MANIFEST.in and allow us to import that list into various packaging +# tools (e.g. rpmbuid and setup.py). + +# Define the file in which we maintain package contents. Rather than +# hard-coding our package contents, to ease maintenance we read the +# manifest file to obtain the list of files and directories to include. +MANIFEST_IN = 'MANIFEST.in' + +# Define input and output files for customizing the rpm package spec. +SPEC_IN = 'gsutil.spec.in' +SPEC_OUT = 'gsutil.spec' + +# Root of rpmbuild tree for file enumeration in gsutil.spec file. +RPM_ROOT = '%{_datadir}/%{name}/' + +def parse_manifest(files, dirs): + '''Parse contents of manifest file and append results to passed lists + of files and directories. + ''' + f = open(MANIFEST_IN, 'r') + for line in f: + line = line.strip() + # Skip empty or comment lines. + if (len(line) <= 0) or (line[0] == '#'): + continue + tokens = line.split() + if len(tokens) >= 0: + if tokens[0] == 'include': + files.extend(tokens[1:]) + elif tokens[0] == 'recursive-include' and tokens[2] == '*': + dirs.append(tokens[1]) + else: + err = 'Unsupported type ' + tokens[0] + ' in ' + MANIFEST_IN + ' file.' + raise Exception(err) + f.close() + +# When executed as a separate script, create a dynamically generated rpm +# spec file. Otherwise, when loaded as a module by another script, no +# specific actions are taken, other than making utility functions available +# to the loading script. +if __name__ == '__main__': + # Running as main so generate a new rpm spec file. + files = [] + dirs = [] + parse_manifest(files, dirs) + fin = open(SPEC_IN, 'r') + fout = open(SPEC_OUT, 'w') + for line in fin: + if line.strip() == '###FILES_GO_HERE###': + for file in files: + fout.write(RPM_ROOT + file + '\n') + for dir in dirs: + fout.write(RPM_ROOT + dir + '/\n') + else: + fout.write(line) + fout.close() + fin.close() diff --git a/third_party/retry_decorator/LICENSE.google b/third_party/retry_decorator/LICENSE.google new file mode 100644 index 000000000..12610f278 --- /dev/null +++ b/third_party/retry_decorator/LICENSE.google @@ -0,0 +1,30 @@ +Copyright (c) 2013, SaltyCrane +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + + * Neither the name of the SaltyCrane nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/third_party/retry_decorator/__init__.py b/third_party/retry_decorator/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/third_party/retry_decorator/decorators.py b/third_party/retry_decorator/decorators.py new file mode 100644 index 000000000..e5eb9caa6 --- /dev/null +++ b/third_party/retry_decorator/decorators.py @@ -0,0 +1,45 @@ +import time +from functools import wraps + + +def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=None): + """Retry calling the decorated function using an exponential backoff. + + http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ + original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry + + :param ExceptionToCheck: the exception to check. may be a tuple of + exceptions to check + :type ExceptionToCheck: Exception or tuple + :param tries: number of times to try (not retry) before giving up + :type tries: int + :param delay: initial delay between retries in seconds + :type delay: int + :param backoff: backoff multiplier e.g. value of 2 will double the delay + each retry + :type backoff: int + :param logger: logger to use. If None, print + :type logger: logging.Logger instance + """ + def deco_retry(f): + + @wraps(f) + def f_retry(*args, **kwargs): + mtries, mdelay = tries, delay + while mtries > 1: + try: + return f(*args, **kwargs) + except ExceptionToCheck, e: + msg = "%s, Retrying in %d seconds..." % (str(e), mdelay) + if logger: + logger.warning(msg) + else: + print msg + time.sleep(mdelay) + mtries -= 1 + mdelay *= backoff + return f(*args, **kwargs) + + return f_retry # true decorator + + return deco_retry