diff --git a/gerrit_util.py b/gerrit_util.py index 4b104e66cf..074bc5aa29 100644 --- a/gerrit_util.py +++ b/gerrit_util.py @@ -11,82 +11,29 @@ from __future__ import annotations import base64 import contextlib -import functools import http.cookiejar import json import logging import os import random import re -import shutil import socket -import subprocess -import sys import tempfile -import threading import time import urllib.parse -from dataclasses import dataclass from io import StringIO from multiprocessing.pool import ThreadPool -from typing import Any, Container, Dict, List, Optional -from typing import Tuple, Type, TypedDict, cast +from typing import Any, Container, Dict, List, Optional, Tuple, Type, TypedDict import httplib2 -import httplib2.socks import auth import gclient_utils import metrics import metrics_utils import scm -import subprocess2 - - -# HACK: httplib2 has significant bugs with its proxy support in -# python3. All httplib2 code should be rewritten to just use python -# stdlib which does not have these bugs. -# -# Prior to that, however, we will directly patch the buggy -# implementation of httplib2.socks.socksocket.__rewriteproxy which does -# not properly expect bytes as its argument instead of str. -# -# Note that __rewriteproxy is inherently buggy, as it relies on the -# python stdlib client to send the entire request header in a single -# call to socket.sendall, which is not explicitly guaranteed. -# -# Changes: -# * all string literals changed to bytes literals. -# * all __symbols changed to _socksocket__symbols. -# * Type annotations added to function signature. -def __fixed_rewrite_proxy(self: httplib2.socks.socksocket, header: bytes): - """ rewrite HTTP request headers to support non-tunneling proxies - (i.e. those which do not support the CONNECT method). - This only works for HTTP (not HTTPS) since HTTPS requires tunneling. - """ - host, endpt = None, None - hdrs = header.split(b"\r\n") - for hdr in hdrs: - if hdr.lower().startswith(b"host:"): - host = hdr - elif hdr.lower().startswith(b"get") or hdr.lower().startswith(b"post"): - endpt = hdr - if host and endpt: - hdrs.remove(host) - hdrs.remove(endpt) - host = host.split(b" ")[1] - endpt = endpt.split(b" ") - if self._socksocket__proxy[4] != None \ - and self._socksocket__proxy[5] != None: - hdrs.insert(0, self._socksocket__getauthheader()) - hdrs.insert(0, b"Host: %s" % host) - hdrs.insert(0, - b"%s http://%s%s %s" % (endpt[0], host, endpt[1], endpt[2])) - return b"\r\n".join(hdrs) - - -httplib2.socks.socksocket._socksocket__rewriteproxy = __fixed_rewrite_proxy + # TODO: Should fix these warnings. # pylint: disable=line-too-long @@ -178,8 +125,6 @@ class Authenticator(object): Authenticator instance to use. """ authenticators: List[Type[Authenticator]] = [ - SSOAuthenticator, - # LUCI Context takes priority since it's normally present only on bots, # which then must use it. LuciContextAuthenticator, @@ -197,182 +142,6 @@ class Authenticator(object): f"Could not find suitable authenticator, tried: {authenticators}") -class SSOAuthenticator(Authenticator): - """SSOAuthenticator implements auth with an SSO helper. - - This helper has the same protocol as `git-remote-persistent-https`. - - TEMPORARY configuration for Googlers (one `url` block for each - Gerrit host): - - [url "sso://chromium/"] - insteadOf = https://chromium.googlesource.com/ - insteadOf = http://chromium.googlesource.com/ - [depot-tools] - useNewAuthStack = 1 - """ - - # The running persistent sso proxy process. - # - # Lazily spawned by `authenticate`. - _sso_info_lock = threading.Lock() - - @dataclass - class SSOInfo: - proxy: httplib2.ProxyInfo - cookies: http.cookiejar.CookieJar - headers: Dict[str, str] - - _sso_info: Optional[SSOInfo] = None - - @staticmethod - @functools.cache - def _resolve_sso_binary_path(): - return shutil.which('git-remote-sso') or '' - - @classmethod - def is_applicable(cls) -> bool: - """If the git-remote-sso binary is in $PATH, we consider this - authenticator to be applicable.""" - if scm.GIT.GetConfig(os.getcwd(), 'depot-tools.usenewauthstack') != '1': - LOGGER.debug('SSOAuthenticator: skipping due to missing opt-in.') - return False - - pth = cls._resolve_sso_binary_path() - if pth: - LOGGER.debug('SSOAuthenticator: enabled %r.', pth) - return bool(pth) - - @classmethod - def _parse_config(cls, config: str) -> SSOInfo: - parsed: Dict[str, str] = dict(line.strip().split('=', 1) - for line in config.splitlines()) - - fullAuthHeader = cast( - str, - scm.GIT.Capture([ - 'config', - '-f', - parsed['include.path'], - 'http.extraHeader', - ])) - headerKey, headerValue = fullAuthHeader.split(':', 1) - headers = {headerKey.strip(): headerValue.strip()} - - proxy_host, proxy_port = parsed['http.proxy'].split(':', 1) - cj = http.cookiejar.MozillaCookieJar(parsed['http.cookiefile']) - cj.load() - - return cls.SSOInfo(proxy=httplib2.ProxyInfo( - httplib2.socks.PROXY_TYPE_HTTP_NO_TUNNEL, proxy_host.encode(), - int(proxy_port)), - cookies=cj, - headers=headers) - - @classmethod - def _launch_sso_helper(cls) -> SSOInfo: - """Launches the git-remote-sso process and extracts the parsed SSOInfo. - - Raises an exception if something goes wrong. - """ - tdir = tempfile.mkdtemp(suffix='gerrit_util') - tf = os.path.join(tdir, 'git-remote-sso.stderr') - - with tempdir() as tdir: - cmd = [ - cls._resolve_sso_binary_path(), - '-print_config', - 'sso://*.git.corp.google.com', - ] - - stderr_file = open(tf, mode='w') - - # NOTE: The git-remote-sso process does the following in order: - # - # 1. writes file to disk - # 2. writes config to stdout, and closes stdout - # 3. waits for stdin to be closed - # 4. deletes file on disk before exiting - # - # Thus, we must fully parse stdout AND consume the - # cookiefile BEFORE stopping the process. - with subprocess2.Popen(cmd, - stdout=subprocess2.PIPE, - stderr=stderr_file, - stdin=subprocess2.PIPE, - encoding='utf-8') as proc: - timedout = False - - def _fire_timeout(): - nonlocal timedout - timedout = True - proc.kill() - - timer = threading.Timer(5, _fire_timeout) - timer.start() - try: - ret = cls._parse_config(proc.stdout.read()) - finally: - timer.cancel() - - if timedout: - LOGGER.error( - 'SSOAuthenticator: Timeout: %r: reading config.', cmd) - raise subprocess.TimeoutExpired(cmd=cmd, timeout=5) - - proc.stdin.close() - if proc.wait(timeout=2) is None: - proc.kill() - LOGGER.warning( - 'SSOAuthenticator: Helper did not exit properly after waiting; forcefully killed' - ' (This is cleanup and likely not the root cause of any error)' - ) - - return ret - - @classmethod - def _get_sso_info(cls) -> SSOInfo: - with cls._sso_info_lock: - info = cls._sso_info - if not info: - info = cls._launch_sso_helper() - cls._sso_info = info - return info - - def authenticate(self, conn: HttpConn): - sso_info = self._get_sso_info() - conn.proxy_info = sso_info.proxy - conn.req_headers.update(sso_info.headers) - - # Now we must rewrite: - # https://xxx.googlesource.com -> - # http://xxx.git.corp.google.com - parsed = urllib.parse.urlparse(conn.req_uri) - parsed = parsed._replace(scheme='http') - if (hostname := - parsed.hostname) and hostname.endswith('.googlesource.com'): - assert not parsed.port, "SSOAuthenticator: netloc: port not supported" - assert not parsed.username, "SSOAuthenticator: netloc: username not supported" - assert not parsed.password, "SSOAuthenticator: netloc: password not supported" - - hostname_parts = hostname.rsplit('.', 2) # X, googlesource, com - conn.req_host = hostname_parts[0] + '.git.corp.google.com' - parsed = parsed._replace(netloc=conn.req_host) - conn.req_uri = parsed.geturl() - - # Finally, add cookies - sso_info.cookies.add_cookie_header(conn) - if 'Cookie' not in conn.req_headers: - LOGGER.debug("SSOAuthenticator: request headers missing 'Cookie'") - LOGGER.debug("SSO cookies: %r", sso_info.cookies._cookies) - LOGGER.debug("Request URI: %r", conn.req_uri) - LOGGER.debug("Request host: %r", conn.req_host) - LOGGER.debug("Request headers: %r", conn.req_headers) - - def debug_summary_state(self) -> str: - return '' - - class CookiesAuthenticator(Authenticator): """Authenticator implementation that uses ".gitcookies" for token. @@ -665,43 +434,6 @@ class HttpConn(httplib2.Http): 'body': self.req_body, } - # NOTE: We want to use HttpConn with CookieJar.add_cookie_header, so have - # compatible interface for that here. - # - # NOTE: Someone should really normalize this 'HttpConn' and httplib2 - # implementation to just be plain python3 stdlib instead. All of this was - # written during the bad old days of python2.6/2.7, pre-vpython. - def has_header(self, header: str) -> bool: - return header in self.req_headers - - def get_full_url(self) -> str: - return self.req_uri - - def get_header(self, - header: str, - default: Optional[str] = None) -> Optional[str]: - return self.req_headers.get(header, default) - - def add_unredirected_header(self, header: str, value: str): - # NOTE: httplib2 does not support unredirected headers. - self.req_headers[header] = value - - @property - def unverifiable(self) -> bool: - return False - - @property - def origin_req_host(self) -> str: - return self.req_host - - @property - def type(self) -> str: - return urllib.parse.urlparse(self.req_uri).scheme - - @property - def host(self) -> str: - return self.req_host - def CreateHttpConn(host, path, @@ -745,9 +477,8 @@ def CreateHttpConn(host, if LOGGER.isEnabledFor(logging.DEBUG): LOGGER.debug('%s %s', conn.req_method, conn.req_uri) - LOGGER.debug('conn.proxy_info%s', conn.proxy_info) for key, val in conn.req_headers.items(): - if key in ('Authorization', 'Cookie'): + if key == 'Authorization': val = 'HIDDEN' LOGGER.debug('%s: %s', key, val) if conn.req_body: