#!/usr/bin/env python3
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""A git command for managing a local cache of git repositories."""

import contextlib
import logging
import optparse
import os
import re
import subprocess
import sys
import tempfile
import threading
import time
import urllib.parse

from download_from_google_storage import Gsutil
import gclient_utils
import lockfile
import metrics
import subcommand

# Analogous to gc.autopacklimit git config.
GC_AUTOPACKLIMIT = 50

GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'

# gsutil creates many processes and threads. Creating too many gsutil cp
# processes may result in running out of resources, and may perform worse due to
# contextr switching. This limits how many concurrent gsutil cp processes
# git_cache runs.
GSUTIL_CP_SEMAPHORE = threading.Semaphore(2)

try:
    # pylint: disable=undefined-variable
    WinErr = WindowsError
except NameError:

    class WinErr(Exception):
        pass


class ClobberNeeded(Exception):
    pass


def exponential_backoff_retry(fn,
                              excs=(Exception, ),
                              name=None,
                              count=10,
                              sleep_time=0.25,
                              printerr=None):
    """Executes |fn| up to |count| times, backing off exponentially.

  Args:
    fn (callable): The function to execute. If this raises a handled
        exception, the function will retry with exponential backoff.
    excs (tuple): A tuple of Exception types to handle. If one of these is
        raised by |fn|, a retry will be attempted. If |fn| raises an Exception
        that is not in this list, it will immediately pass through. If |excs|
        is empty, the Exception base class will be used.
    name (str): Optional operation name to print in the retry string.
    count (int): The number of times to try before allowing the exception to
        pass through.
    sleep_time (float): The initial number of seconds to sleep in between
        retries. This will be doubled each retry.
    printerr (callable): Function that will be called with the error string upon
        failures. If None, |logging.warning| will be used.

  Returns: The return value of the successful fn.
  """
    printerr = printerr or logging.warning
    for i in range(count):
        try:
            return fn()
        except excs as e:
            if (i + 1) >= count:
                raise

            printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' %
                     ((name or 'operation'), sleep_time, (i + 1), count, e))
            time.sleep(sleep_time)
            sleep_time *= 2


class Mirror(object):

    git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
    gsutil_exe = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              'gsutil.py')
    cachepath_lock = threading.Lock()

    UNSET_CACHEPATH = object()

    # Used for tests
    _GIT_CONFIG_LOCATION = []

    @staticmethod
    def parse_fetch_spec(spec):
        """Parses and canonicalizes a fetch spec.

    Returns (fetchspec, value_regex), where value_regex can be used
    with 'git config --replace-all'.
    """
        parts = spec.split(':', 1)
        src = parts[0].lstrip('+').rstrip('/')
        if not src.startswith('refs/'):
            src = 'refs/heads/%s' % src
        dest = parts[1].rstrip('/') if len(parts) > 1 else src
        regex = r'\+%s:.*' % src.replace('*', r'\*')
        return ('+%s:%s' % (src, dest), regex)

    def __init__(self, url, refs=None, commits=None, print_func=None):
        self.url = url
        self.fetch_specs = {self.parse_fetch_spec(ref) for ref in (refs or [])}
        self.fetch_commits = set(commits or [])
        self.basedir = self.UrlToCacheDir(url)
        self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
        if print_func:
            self.print = self.print_without_file
            self.print_func = print_func
        else:
            self.print = print

    def print_without_file(self, message, **_kwargs):
        self.print_func(message)

    @contextlib.contextmanager
    def print_duration_of(self, what):
        start = time.time()
        try:
            yield
        finally:
            self.print('%s took %.1f minutes' % (what,
                                                 (time.time() - start) / 60.0))

    @property
    def bootstrap_bucket(self):
        b = os.getenv('OVERRIDE_BOOTSTRAP_BUCKET')
        if b:
            return b
        u = urllib.parse.urlparse(self.url)
        if u.netloc == 'chromium.googlesource.com':
            return 'chromium-git-cache'
        # Not recognized.
        return None

    @property
    def _gs_path(self):
        return 'gs://%s/v2/%s' % (self.bootstrap_bucket, self.basedir)

    @classmethod
    def FromPath(cls, path):
        return cls(cls.CacheDirToUrl(path))

    @staticmethod
    def UrlToCacheDir(url):
        """Convert a git url to a normalized form for the cache dir path."""
        if os.path.isdir(url):
            # Ignore the drive letter in Windows
            url = os.path.splitdrive(url)[1]
            return url.replace('-', '--').replace(os.sep, '-')

        parsed = urllib.parse.urlparse(url)
        norm_url = parsed.netloc + parsed.path
        if norm_url.endswith('.git'):
            norm_url = norm_url[:-len('.git')]

        # Use the same dir for authenticated URLs and unauthenticated URLs.
        norm_url = norm_url.replace('googlesource.com/a/', 'googlesource.com/')

        return norm_url.replace('-', '--').replace('/', '-').lower()

    @staticmethod
    def CacheDirToUrl(path):
        """Convert a cache dir path to its corresponding url."""
        netpath = re.sub(r'\b-\b', '/',
                         os.path.basename(path)).replace('--', '-')
        return 'https://%s' % netpath

    @classmethod
    def SetCachePath(cls, cachepath):
        with cls.cachepath_lock:
            setattr(cls, 'cachepath', cachepath)

    @classmethod
    def GetCachePath(cls):
        with cls.cachepath_lock:
            if not hasattr(cls, 'cachepath'):
                try:
                    cachepath = subprocess.check_output(
                        [cls.git_exe, 'config'] + cls._GIT_CONFIG_LOCATION +
                        ['cache.cachepath']).decode('utf-8', 'ignore').strip()
                except subprocess.CalledProcessError:
                    cachepath = os.environ.get('GIT_CACHE_PATH',
                                               cls.UNSET_CACHEPATH)
                setattr(cls, 'cachepath', cachepath)

            ret = getattr(cls, 'cachepath')
            if ret is cls.UNSET_CACHEPATH:
                raise RuntimeError('No cache.cachepath git configuration or '
                                   '$GIT_CACHE_PATH is set.')
            return ret

    @staticmethod
    def _GetMostRecentCacheDirectory(ls_out_set):
        ready_file_pattern = re.compile(r'.*/(\d+).ready$')
        ready_dirs = []

        for name in ls_out_set:
            m = ready_file_pattern.match(name)
            # Given <path>/<number>.ready,
            # we are interested in <path>/<number> directory
            if m and (name[:-len('.ready')] + '/') in ls_out_set:
                ready_dirs.append((int(m.group(1)), name[:-len('.ready')]))

        if not ready_dirs:
            return None

        return max(ready_dirs)[1]

    def Rename(self, src, dst):
        # This is somehow racy on Windows.
        # Catching OSError because WindowsError isn't portable and
        # pylint complains.
        exponential_backoff_retry(lambda: os.rename(src, dst),
                                  excs=(OSError, ),
                                  name='rename [%s] => [%s]' % (src, dst),
                                  printerr=self.print)

    def RunGit(self, cmd, print_stdout=True, **kwargs):
        """Run git in a subprocess."""
        cwd = kwargs.setdefault('cwd', self.mirror_path)
        if "--git-dir" not in cmd:
            cmd = ['--git-dir', os.path.abspath(cwd)] + cmd

        kwargs.setdefault('print_stdout', False)
        if print_stdout:
            kwargs.setdefault('filter_fn', self.print)
        env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
        env.setdefault('GIT_ASKPASS', 'true')
        env.setdefault('SSH_ASKPASS', 'true')
        self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
        gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)

    def config(self, reset_fetch_config=False):
        if reset_fetch_config:
            try:
                self.RunGit(['config', '--unset-all', 'remote.origin.fetch'])
            except subprocess.CalledProcessError as e:
                # If exit code was 5, it means we attempted to unset a config
                # that didn't exist. Ignore it.
                if e.returncode != 5:
                    raise

        # Don't run git-gc in a daemon.  Bad things can happen if it gets
        # killed.
        try:
            self.RunGit(['config', 'gc.autodetach', '0'])
        except subprocess.CalledProcessError:
            # Hard error, need to clobber.
            raise ClobberNeeded()

        # Don't combine pack files into one big pack file.  It's really slow for
        # repositories, and there's no way to track progress and make sure it's
        # not stuck.
        if self.supported_project():
            self.RunGit(['config', 'gc.autopacklimit', '0'])

        # Allocate more RAM for cache-ing delta chains, for better performance
        # of "Resolving deltas".
        self.RunGit([
            'config', 'core.deltaBaseCacheLimit',
            gclient_utils.DefaultDeltaBaseCacheLimit()
        ])

        self.RunGit(['config', 'remote.origin.url', self.url])
        self.RunGit([
            'config', '--replace-all', 'remote.origin.fetch',
            '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'
        ])
        for spec, value_regex in self.fetch_specs:
            self.RunGit([
                'config', '--replace-all', 'remote.origin.fetch', spec,
                value_regex
            ])

    def bootstrap_repo(self, directory):
        """Bootstrap the repo from Google Storage if possible.

    More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
    """
        if not self.bootstrap_bucket:
            return False

        gsutil = Gsutil(self.gsutil_exe, boto_path=None)

        # Get the most recent version of the directory.
        # This is determined from the most recent version of a .ready file.
        # The .ready file is only uploaded when an entire directory has been
        # uploaded to GS.
        _, ls_out, ls_err = gsutil.check_call('ls', self._gs_path)
        ls_out_set = set(ls_out.strip().splitlines())
        latest_dir = self._GetMostRecentCacheDirectory(ls_out_set)

        if not latest_dir:
            self.print('No bootstrap file for %s found in %s, stderr:\n  %s' %
                       (self.mirror_path, self.bootstrap_bucket, '  '.join(
                           (ls_err or '').splitlines(True))))
            return False

        try:
            # create new temporary directory locally
            tempdir = tempfile.mkdtemp(prefix='_cache_tmp',
                                       dir=self.GetCachePath())
            self.RunGit(['init', '-b', 'main', '--bare'], cwd=tempdir)
            self.print('Downloading files in %s/* into %s.' %
                       (latest_dir, tempdir))
            with self.print_duration_of('download'):
                with GSUTIL_CP_SEMAPHORE:
                    code = gsutil.call('-m', 'cp', '-r', latest_dir + "/*",
                                       tempdir)
            if code:
                return False
            # A quick validation that all references are valid.
            self.RunGit(['for-each-ref'], print_stdout=False, cwd=tempdir)
        except Exception as e:
            self.print('Encountered error: %s' % str(e), file=sys.stderr)
            gclient_utils.rmtree(tempdir)
            return False
        # delete the old directory
        if os.path.exists(directory):
            gclient_utils.rmtree(directory)
        self.Rename(tempdir, directory)
        return True

    def contains_revision(self, revision):
        if not self.exists():
            return False

        if sys.platform.startswith('win'):
            # Windows .bat scripts use ^ as escape sequence, which means we have
            # to escape it with itself for every .bat invocation.
            needle = '%s^^^^{commit}' % revision
        else:
            needle = '%s^{commit}' % revision
        try:
            # cat-file exits with 0 on success, that is git object of given hash
            # was found.
            self.RunGit(['cat-file', '-e', needle])
            return True
        except subprocess.CalledProcessError:
            self.print('Commit with hash "%s" not found' % revision,
                       file=sys.stderr)
            return False

    def exists(self):
        return os.path.isfile(os.path.join(self.mirror_path, 'config'))

    def supported_project(self):
        """Returns true if this repo is known to have a bootstrap zip file."""
        u = urllib.parse.urlparse(self.url)
        return u.netloc in [
            'chromium.googlesource.com', 'chrome-internal.googlesource.com'
        ]

    def _preserve_fetchspec(self):
        """Read and preserve remote.origin.fetch from an existing mirror.

    This modifies self.fetch_specs.
    """
        if not self.exists():
            return
        try:
            config_fetchspecs = subprocess.check_output(
                [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
                cwd=self.mirror_path).decode('utf-8', 'ignore')
            for fetchspec in config_fetchspecs.splitlines():
                self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
        except subprocess.CalledProcessError:
            logging.warning(
                'Tried and failed to preserve remote.origin.fetch from the '
                'existing cache directory.  You may need to manually edit '
                '%s and "git cache fetch" again.' %
                os.path.join(self.mirror_path, 'config'))

    def _ensure_bootstrapped(self,
                             depth,
                             bootstrap,
                             reset_fetch_config,
                             force=False):
        pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
        pack_files = []
        if os.path.isdir(pack_dir):
            pack_files = [
                f for f in os.listdir(pack_dir) if f.endswith('.pack')
            ]
            self.print('%s has %d .pack files, re-bootstrapping if >%d or ==0' %
                       (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))

        # master->main branch migration left the cache in some builders to have
        # its HEAD still pointing to refs/heads/master. This causes bot_update
        # to fail. If in this state, delete the cache and force bootstrap.
        try:
            with open(os.path.join(self.mirror_path, 'HEAD')) as f:
                head_ref = f.read()
        except FileNotFoundError:
            head_ref = ''

        # Check only when HEAD points to master.
        if 'master' in head_ref:
            # Some repos could still have master so verify if the ref exists
            # first.
            show_ref_master_cmd = subprocess.run(
                [Mirror.git_exe, 'show-ref', '--verify', 'refs/heads/master'],
                cwd=self.mirror_path)

            if show_ref_master_cmd.returncode != 0:
                # Remove mirror
                gclient_utils.rmtree(self.mirror_path)

                # force bootstrap
                force = True

        should_bootstrap = (force or not self.exists()
                            or len(pack_files) > GC_AUTOPACKLIMIT
                            or len(pack_files) == 0)

        if not should_bootstrap:
            if depth and os.path.exists(
                    os.path.join(self.mirror_path, 'shallow')):
                logging.warning(
                    'Shallow fetch requested, but repo cache already exists.')
            return

        if not self.exists():
            if os.path.exists(self.mirror_path):
                # If the mirror path exists but self.exists() returns false,
                # we're in an unexpected state. Nuke the previous mirror
                # directory and start fresh.
                gclient_utils.rmtree(self.mirror_path)
            os.mkdir(self.mirror_path)
        elif not reset_fetch_config:
            # Re-bootstrapping an existing mirror; preserve existing fetch spec.
            self._preserve_fetchspec()

        bootstrapped = (not depth and bootstrap
                        and self.bootstrap_repo(self.mirror_path))

        if not bootstrapped:
            if not self.exists() or not self.supported_project():
                # Bootstrap failed due to:
                # 1. No previous cache.
                # 2. Project doesn't have a bootstrap folder.
                # Start with a bare git dir.
                self.RunGit(['init', '--bare'])
                # Set appropriate symbolic-ref
                remote_info = exponential_backoff_retry(
                    lambda: subprocess.check_output(
                        [
                            self.git_exe, '--git-dir',
                            os.path.abspath(self.mirror_path), 'remote', 'show',
                            self.url
                        ],
                        cwd=self.mirror_path).decode('utf-8', 'ignore').strip())
                default_branch_regexp = re.compile(r'HEAD branch: (.*)$')
                m = default_branch_regexp.search(remote_info, re.MULTILINE)
                if m:
                    self.RunGit(
                        ['symbolic-ref', 'HEAD', 'refs/heads/' + m.groups()[0]])
            else:
                # Bootstrap failed, previous cache exists; warn and continue.
                logging.warning(
                    'Git cache has a lot of pack files (%d). Tried to '
                    're-bootstrap but failed. Continuing with non-optimized '
                    'repository.' % len(pack_files))

    def _fetch(self,
               verbose,
               depth,
               no_fetch_tags,
               reset_fetch_config,
               prune=True):
        self.config(reset_fetch_config)

        fetch_cmd = ['fetch']
        if verbose:
            fetch_cmd.extend(['-v', '--progress'])
        if depth:
            fetch_cmd.extend(['--depth', str(depth)])
        if no_fetch_tags:
            fetch_cmd.append('--no-tags')
        if prune:
            fetch_cmd.append('--prune')
        fetch_cmd.append('origin')

        fetch_specs = subprocess.check_output(
            [
                self.git_exe, '--git-dir',
                os.path.abspath(self.mirror_path), 'config', '--get-all',
                'remote.origin.fetch'
            ],
            cwd=self.mirror_path).decode('utf-8',
                                         'ignore').strip().splitlines()
        for spec in fetch_specs:
            try:
                self.print('Fetching %s' % spec)
                with self.print_duration_of('fetch %s' % spec):
                    self.RunGit(fetch_cmd + [spec], retry=True)
            except subprocess.CalledProcessError:
                if spec == '+refs/heads/*:refs/heads/*':
                    raise ClobberNeeded()  # Corrupted cache.
                logging.warning('Fetch of %s failed' % spec)
        for commit in self.fetch_commits:
            self.print('Fetching %s' % commit)
            try:
                with self.print_duration_of('fetch %s' % commit):
                    self.RunGit(['fetch', 'origin', commit], retry=True)
            except subprocess.CalledProcessError:
                logging.warning('Fetch of %s failed' % commit)

    def populate(self,
                 depth=None,
                 no_fetch_tags=False,
                 shallow=False,
                 bootstrap=False,
                 verbose=False,
                 lock_timeout=0,
                 reset_fetch_config=False):
        assert self.GetCachePath()
        if shallow and not depth:
            depth = 10000
        gclient_utils.safe_makedirs(self.GetCachePath())

        with lockfile.lock(self.mirror_path, lock_timeout):
            try:
                self._ensure_bootstrapped(depth, bootstrap, reset_fetch_config)
                self._fetch(verbose, depth, no_fetch_tags, reset_fetch_config)
            except ClobberNeeded:
                # This is a major failure, we need to clean and force a
                # bootstrap.
                gclient_utils.rmtree(self.mirror_path)
                self.print(GIT_CACHE_CORRUPT_MESSAGE)
                self._ensure_bootstrapped(depth,
                                          bootstrap,
                                          reset_fetch_config,
                                          force=True)
                self._fetch(verbose, depth, no_fetch_tags, reset_fetch_config)

    def update_bootstrap(self, prune=False, gc_aggressive=False):
        # NOTE: There have been cases where repos were being recursively
        # uploaded to google storage. E.g.
        # `<host_url>-<repo>/<gen_number>/<host_url>-<repo>/` in GS and
        # <host_url>-<repo>/<host_url>-<repo>/ on the bot. Check for recursed
        # files on the bot here and remove them if found before we upload to GS.
        # See crbug.com/1370443; keep this check until root cause is found.
        recursed_dir = os.path.join(self.mirror_path,
                                    self.mirror_path.split(os.path.sep)[-1])
        if os.path.exists(recursed_dir):
            self.print('Deleting unexpected directory: %s' % recursed_dir)
            gclient_utils.rmtree(recursed_dir)

        # The folder is <git number>
        gen_number = subprocess.check_output([self.git_exe, 'number'],
                                             cwd=self.mirror_path).decode(
                                                 'utf-8', 'ignore').strip()
        gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)

        dest_prefix = '%s/%s' % (self._gs_path, gen_number)

        # ls_out lists contents in the format: gs://blah/blah/123...
        self.print('running "gsutil ls %s":' % self._gs_path)
        ls_code, ls_out, ls_error = gsutil.check_call_with_retries(
            'ls', self._gs_path)
        if ls_code != 0:
            self.print(ls_error)
        else:
            self.print(ls_out)

        # Check to see if folder already exists in gs
        ls_out_set = set(ls_out.strip().splitlines())
        if (dest_prefix + '/' in ls_out_set
                and dest_prefix + '.ready' in ls_out_set):
            print('Cache %s already exists.' % dest_prefix)
            return

        # Reduce the number of individual files to download & write on disk.
        self.RunGit(['pack-refs', '--all'])

        # Run Garbage Collect to compress packfile.
        gc_args = ['gc', '--prune=all']
        if gc_aggressive:
            # The default "gc --aggressive" is often too aggressive for some
            # machines, since it attempts to create as many threads as there are
            # CPU cores, while not limiting per-thread memory usage, which puts
            # too much pressure on RAM on high-core machines, causing them to
            # thrash. Using lower-level commands gives more control over those
            # settings.

            # This might not be strictly necessary, but it's fast and is
            # normally run by 'gc --aggressive', so it shouldn't hurt.
            self.RunGit(['reflog', 'expire', '--all'])

            # These are the default repack settings for 'gc --aggressive'.
            gc_args = [
                'repack', '-d', '-l', '-f', '--depth=50', '--window=250', '-A',
                '--unpack-unreachable=all'
            ]
            # A 1G memory limit seems to provide comparable pack results as the
            # default, even for our largest repos, while preventing runaway
            # memory (at least on current Chromium builders which have about 4G
            # RAM per core).
            gc_args.append('--window-memory=1g')
            # NOTE: It might also be possible to avoid thrashing with a larger
            # window (e.g. "--window-memory=2g") by limiting the number of
            # threads created (e.g. "--threads=[cores/2]"). Some limited testing
            # didn't show much difference in outcomes on our current repos, but
            # it might be worth trying if the repos grow much larger and the
            # packs don't seem to be getting compressed enough.
        self.RunGit(gc_args)

        self.print('running "gsutil -m rsync -r -d %s %s"' %
                   (self.mirror_path, dest_prefix))
        gsutil.call('-m', 'rsync', '-r', '-d', self.mirror_path, dest_prefix)

        # Create .ready file and upload
        _, ready_file_name = tempfile.mkstemp(suffix='.ready')
        try:
            self.print('running "gsutil cp %s %s.ready"' %
                       (ready_file_name, dest_prefix))
            gsutil.call('cp', ready_file_name, '%s.ready' % (dest_prefix))
        finally:
            os.remove(ready_file_name)

        # remove all other directory/.ready files in the same gs_path
        # except for the directory/.ready file previously created
        # which can be used for bootstrapping while the current one is
        # being uploaded
        if not prune:
            return
        prev_dest_prefix = self._GetMostRecentCacheDirectory(ls_out_set)
        if not prev_dest_prefix:
            return
        for path in ls_out_set:
            if path in (prev_dest_prefix + '/', prev_dest_prefix + '.ready'):
                continue
            if path.endswith('.ready'):
                gsutil.call('rm', path)
                continue
            gsutil.call('-m', 'rm', '-r', path)

    @staticmethod
    def DeleteTmpPackFiles(path):
        pack_dir = os.path.join(path, 'objects', 'pack')
        if not os.path.isdir(pack_dir):
            return
        pack_files = [
            f for f in os.listdir(pack_dir)
            if f.startswith('.tmp-') or f.startswith('tmp_pack_')
        ]
        for f in pack_files:
            f = os.path.join(pack_dir, f)
            try:
                os.remove(f)
                logging.warning('Deleted stale temporary pack file %s' % f)
            except OSError:
                logging.warning('Unable to delete temporary pack file %s' % f)


@subcommand.usage('[url of repo to check for caching]')
@metrics.collector.collect_metrics('git cache exists')
def CMDexists(parser, args):
    """Check to see if there already is a cache of the given repo."""
    _, args = parser.parse_args(args)
    if not len(args) == 1:
        parser.error('git cache exists only takes exactly one repo url.')
    url = args[0]
    mirror = Mirror(url)
    if mirror.exists():
        print(mirror.mirror_path)
        return 0
    return 1


@subcommand.usage('[url of repo to create a bootstrap zip file]')
@metrics.collector.collect_metrics('git cache update-bootstrap')
def CMDupdate_bootstrap(parser, args):
    """Create and uploads a bootstrap tarball."""
    # Lets just assert we can't do this on Windows.
    if sys.platform.startswith('win'):
        print('Sorry, update bootstrap will not work on Windows.',
              file=sys.stderr)
        return 1

    parser.add_option('--skip-populate',
                      action='store_true',
                      help='Skips "populate" step if mirror already exists.')
    parser.add_option('--gc-aggressive',
                      action='store_true',
                      help='Run aggressive repacking of the repo.')
    parser.add_option('--prune',
                      action='store_true',
                      help='Prune all other cached bundles of the same repo.')

    populate_args = args[:]
    options, args = parser.parse_args(args)
    url = args[0]
    mirror = Mirror(url)
    if not options.skip_populate or not mirror.exists():
        CMDpopulate(parser, populate_args)
    else:
        print('Skipped populate step.')

    # Get the repo directory.
    _, args2 = parser.parse_args(args)
    url = args2[0]
    mirror = Mirror(url)
    mirror.update_bootstrap(options.prune, options.gc_aggressive)
    return 0


@subcommand.usage('[url of repo to add to or update in cache]')
@metrics.collector.collect_metrics('git cache populate')
def CMDpopulate(parser, args):
    """Ensure that the cache has all up-to-date objects for the given repo."""
    parser.add_option('--depth',
                      type='int',
                      help='Only cache DEPTH commits of history')
    parser.add_option(
        '--no-fetch-tags',
        action='store_true',
        help=('Don\'t fetch tags from the server. This can speed up '
              'fetch considerably when there are many tags.'))
    parser.add_option('--shallow',
                      '-s',
                      action='store_true',
                      help='Only cache 10000 commits of history')
    parser.add_option('--ref',
                      action='append',
                      help='Specify additional refs to be fetched')
    parser.add_option('--commit',
                      action='append',
                      help='Specify additional commits to be fetched')
    parser.add_option('--no_bootstrap',
                      '--no-bootstrap',
                      action='store_true',
                      help='Don\'t bootstrap from Google Storage')
    parser.add_option('--ignore_locks',
                      '--ignore-locks',
                      action='store_true',
                      help='NOOP. This flag will be removed in the future.')
    parser.add_option(
        '--break-locks',
        action='store_true',
        help='Break any existing lock instead of just ignoring it')
    parser.add_option(
        '--reset-fetch-config',
        action='store_true',
        default=False,
        help='Reset the fetch config before populating the cache.')

    options, args = parser.parse_args(args)
    if not len(args) == 1:
        parser.error('git cache populate only takes exactly one repo url.')
    if options.ignore_locks:
        print('ignore_locks is no longer used. Please remove its usage.')
    if options.break_locks:
        print('break_locks is no longer used. Please remove its usage.')
    url = args[0]

    mirror = Mirror(url, refs=options.ref, commits=options.commit)
    kwargs = {
        'no_fetch_tags': options.no_fetch_tags,
        'verbose': options.verbose,
        'shallow': options.shallow,
        'bootstrap': not options.no_bootstrap,
        'lock_timeout': options.timeout,
        'reset_fetch_config': options.reset_fetch_config,
    }
    if options.depth:
        kwargs['depth'] = options.depth
    mirror.populate(**kwargs)


@subcommand.usage('Fetch new commits into cache and current checkout')
@metrics.collector.collect_metrics('git cache fetch')
def CMDfetch(parser, args):
    """Update mirror, and fetch in cwd."""
    parser.add_option('--all', action='store_true', help='Fetch all remotes')
    parser.add_option('--no_bootstrap',
                      '--no-bootstrap',
                      action='store_true',
                      help='Don\'t (re)bootstrap from Google Storage')
    parser.add_option(
        '--no-fetch-tags',
        action='store_true',
        help=('Don\'t fetch tags from the server. This can speed up '
              'fetch considerably when there are many tags.'))
    options, args = parser.parse_args(args)

    # Figure out which remotes to fetch.  This mimics the behavior of regular
    # 'git fetch'.  Note that in the case of "stacked" or "pipelined" branches,
    # this will NOT try to traverse up the branching structure to find the
    # ultimate remote to update.
    remotes = []
    if options.all:
        assert not args, 'fatal: fetch --all does not take repository argument'
        remotes = subprocess.check_output([Mirror.git_exe, 'remote'])
        remotes = remotes.decode('utf-8', 'ignore').splitlines()
    elif args:
        remotes = args
    else:
        current_branch = subprocess.check_output(
            [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD'])
        current_branch = current_branch.decode('utf-8', 'ignore').strip()
        if current_branch != 'HEAD':
            upstream = subprocess.check_output(
                [Mirror.git_exe, 'config',
                 'branch.%s.remote' % current_branch])
            upstream = upstream.decode('utf-8', 'ignore').strip()
            if upstream and upstream != '.':
                remotes = [upstream]
    if not remotes:
        remotes = ['origin']

    cachepath = Mirror.GetCachePath()
    git_dir = os.path.abspath(
        subprocess.check_output([Mirror.git_exe, 'rev-parse',
                                 '--git-dir']).decode('utf-8', 'ignore'))
    git_dir = os.path.abspath(git_dir)
    if git_dir.startswith(cachepath):
        mirror = Mirror.FromPath(git_dir)
        mirror.populate(bootstrap=not options.no_bootstrap,
                        no_fetch_tags=options.no_fetch_tags,
                        lock_timeout=options.timeout)
        return 0
    for remote in remotes:
        remote_url = subprocess.check_output(
            [Mirror.git_exe, 'config',
             'remote.%s.url' % remote])
        remote_url = remote_url.decode('utf-8', 'ignore').strip()
        if remote_url.startswith(cachepath):
            mirror = Mirror.FromPath(remote_url)
            mirror.print = lambda *args: None
            print('Updating git cache...')
            mirror.populate(bootstrap=not options.no_bootstrap,
                            no_fetch_tags=options.no_fetch_tags,
                            lock_timeout=options.timeout)
        subprocess.check_call([Mirror.git_exe, 'fetch', remote])
    return 0


@subcommand.usage('do not use - it is a noop.')
@metrics.collector.collect_metrics('git cache unlock')
def CMDunlock(parser, args):
    """This command does nothing."""
    print('This command does nothing and will be removed in the future.')


class OptionParser(optparse.OptionParser):
    """Wrapper class for OptionParser to handle global options."""
    def __init__(self, *args, **kwargs):
        optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
        self.add_option(
            '-c',
            '--cache-dir',
            help=('Path to the directory containing the caches. Normally '
                  'deduced from git config cache.cachepath or '
                  '$GIT_CACHE_PATH.'))
        self.add_option(
            '-v',
            '--verbose',
            action='count',
            default=1,
            help='Increase verbosity (can be passed multiple times)')
        self.add_option('-q',
                        '--quiet',
                        action='store_true',
                        help='Suppress all extraneous output')
        self.add_option('--timeout',
                        type='int',
                        default=0,
                        help='Timeout for acquiring cache lock, in seconds')

    def parse_args(self, args=None, values=None):
        # Create an optparse.Values object that will store only the actual
        # passed options, without the defaults.
        actual_options = optparse.Values()
        _, args = optparse.OptionParser.parse_args(self, args, actual_options)
        # Create an optparse.Values object with the default options.
        options = optparse.Values(self.get_default_values().__dict__)
        # Update it with the options passed by the user.
        options._update_careful(actual_options.__dict__)
        # Store the options passed by the user in an _actual_options attribute.
        # We store only the keys, and not the values, since the values can
        # contain arbitrary information, which might be PII.
        metrics.collector.add('arguments', list(actual_options.__dict__.keys()))

        if options.quiet:
            options.verbose = 0

        levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
        logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])

        try:
            global_cache_dir = Mirror.GetCachePath()
        except RuntimeError:
            global_cache_dir = None
        if options.cache_dir:
            if global_cache_dir and (os.path.abspath(options.cache_dir) !=
                                     os.path.abspath(global_cache_dir)):
                logging.warning(
                    'Overriding globally-configured cache directory.')
            Mirror.SetCachePath(options.cache_dir)

        return options, args


def main(argv):
    dispatcher = subcommand.CommandDispatcher(__name__)
    return dispatcher.execute(OptionParser(), argv)


if __name__ == '__main__':
    try:
        with metrics.collector.print_notice_and_exit():
            sys.exit(main(sys.argv[1:]))
    except KeyboardInterrupt:
        sys.stderr.write('interrupted\n')
        sys.exit(1)