#!/usr/bin/env python
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""A git command for managing a local cache of git repositories."""

import errno
import logging
import optparse
import os
import tempfile
import subprocess
import sys
import urlparse

import gclient_utils
import subcommand


GIT_EXECUTABLE = 'git.bat' if sys.platform.startswith('win') else 'git'


def UrlToCacheDir(url):
  """Convert a git url to a normalized form for the cache dir path."""
  parsed = urlparse.urlparse(url)
  norm_url = parsed.netloc + parsed.path
  if norm_url.endswith('.git'):
    norm_url = norm_url[:-len('.git')]
  return norm_url.replace('-', '--').replace('/', '-').lower()


def RunGit(cmd, **kwargs):
  """Run git in a subprocess."""
  kwargs.setdefault('cwd', os.getcwd())
  if kwargs.get('filter_fn'):
    kwargs['filter_fn'] = gclient_utils.GitFilter(kwargs.get('filter_fn'))
    kwargs.setdefault('print_stdout', False)
    env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
    env.setdefault('GIT_ASKPASS', 'true')
    env.setdefault('SSH_ASKPASS', 'true')
  else:
    kwargs.setdefault('print_stdout', True)
  stdout = kwargs.get('stdout', sys.stdout)
  print >> stdout, 'running "git %s" in "%s"' % (' '.join(cmd), kwargs['cwd'])
  gclient_utils.CheckCallAndFilter([GIT_EXECUTABLE] + cmd, **kwargs)


class LockError(Exception):
  pass


class Lockfile(object):
  """Class to represent a cross-platform process-specific lockfile."""

  def __init__(self, path):
    self.path = os.path.abspath(path)
    self.lockfile = self.path + ".lock"
    self.pid = os.getpid()

  def _read_pid(self):
    """Read the pid stored in the lockfile.

    Note: This method is potentially racy. By the time it returns the lockfile
    may have been unlocked, removed, or stolen by some other process.
    """
    try:
      with open(self.lockfile, 'r') as f:
        pid = int(f.readline().strip())
    except (IOError, ValueError):
      pid = None
    return pid

  def _make_lockfile(self):
    """Safely creates a lockfile containing the current pid."""
    open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
    fd = os.open(self.lockfile, open_flags, 0o644)
    f = os.fdopen(fd, 'w')
    print >> f, self.pid
    f.close()

  def _remove_lockfile(self):
    """Delete the lockfile. Complains (implicitly) if it doesn't exist."""
    os.remove(self.lockfile)

  def lock(self):
    """Acquire the lock.

    Note: This is a NON-BLOCKING FAIL-FAST operation.
    Do. Or do not. There is no try.
    """
    try:
      self._make_lockfile()
    except OSError as e:
      if e.errno == errno.EEXIST:
        raise LockError("%s is already locked" % self.path)
      else:
        raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))

  def unlock(self):
    """Release the lock."""
    if not self.is_locked():
      raise LockError("%s is not locked" % self.path)
    if not self.i_am_locking():
      raise LockError("%s is locked, but not by me" % self.path)
    self._remove_lockfile()

  def break_lock(self):
    """Remove the lock, even if it was created by someone else."""
    try:
      self._remove_lockfile()
      return True
    except OSError as exc:
      if exc.errno == errno.ENOENT:
        return False
      else:
        raise

  def is_locked(self):
    """Test if the file is locked by anyone.

    Note: This method is potentially racy. By the time it returns the lockfile
    may have been unlocked, removed, or stolen by some other process.
    """
    return os.path.exists(self.lockfile)

  def i_am_locking(self):
    """Test if the file is locked by this process."""
    return self.is_locked() and self.pid == self._read_pid()

  def __enter__(self):
    self.lock()
    return self

  def __exit__(self, *_exc):
    self.unlock()


@subcommand.usage('[url of repo to check for caching]')
def CMDexists(parser, args):
  """Check to see if there already is a cache of the given repo."""
  options, args = parser.parse_args(args)
  if not len(args) == 1:
    parser.error('git cache exists only takes exactly one repo url.')
  url = args[0]
  repo_dir = os.path.join(options.cache_dir, UrlToCacheDir(url))
  flag_file = os.path.join(repo_dir, 'config')
  if os.path.isdir(repo_dir) and os.path.isfile(flag_file):
    print repo_dir
    return 0
  return 1


@subcommand.usage('[url of repo to add to or update in cache]')
def CMDpopulate(parser, args):
  """Ensure that the cache has all up-to-date objects for the given repo."""
  parser.add_option('--depth', type='int',
                    help='Only cache DEPTH commits of history')
  parser.add_option('--shallow', '-s', action='store_true',
                    help='Only cache 10000 commits of history')
  parser.add_option('--ref', action='append',
                    help='Specify additional refs to be fetched')
  options, args = parser.parse_args(args)
  if options.shallow and not options.depth:
    options.depth = 10000
  if not len(args) == 1:
    parser.error('git cache populate only takes exactly one repo url.')
  url = args[0]

  gclient_utils.safe_makedirs(options.cache_dir)
  repo_dir = os.path.join(options.cache_dir, UrlToCacheDir(url))

  v = []
  filter_fn = lambda l: '[up to date]' not in l
  if options.verbose:
    v = ['-v', '--progress']
    filter_fn = None

  d = []
  if options.depth:
    d = ['--depth', '%d' % options.depth]

  def _config(directory):
    RunGit(['config', 'core.deltaBaseCacheLimit',
            gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=directory)
    RunGit(['config', 'remote.origin.url', url],
           cwd=directory)
    RunGit(['config', '--replace-all', 'remote.origin.fetch',
            '+refs/heads/*:refs/heads/*'],
           cwd=directory)
    RunGit(['config', '--add', 'remote.origin.fetch',
            '+refs/tags/*:refs/tags/*'],
           cwd=directory)
    for ref in options.ref or []:
      ref = ref.rstrip('/')
      refspec = '+refs/%s/*:refs/%s/*' % (ref, ref)
      RunGit(['config', '--add', 'remote.origin.fetch', refspec],
             cwd=directory)

  with Lockfile(repo_dir):
    # Setup from scratch if the repo is new or is in a bad state.
    if not os.path.exists(os.path.join(repo_dir, 'config')):
      gclient_utils.rmtree(repo_dir)
      tempdir = tempfile.mkdtemp(suffix=UrlToCacheDir(url),
                                 dir=options.cache_dir)
      RunGit(['init', '--bare'], cwd=tempdir)
      _config(tempdir)
      fetch_cmd = ['fetch'] + v + d + ['origin']
      RunGit(fetch_cmd, filter_fn=filter_fn, cwd=tempdir, retry=True)
      os.rename(tempdir, repo_dir)
    else:
      _config(repo_dir)
      if options.depth and os.path.exists(os.path.join(repo_dir, 'shallow')):
        logging.warn('Shallow fetch requested, but repo cache already exists.')
      fetch_cmd = ['fetch'] + v + ['origin']
      RunGit(fetch_cmd, filter_fn=filter_fn, cwd=repo_dir, retry=True)


@subcommand.usage('[url of repo to unlock, or -a|--all]')
def CMDunlock(parser, args):
  """Unlock one or all repos if their lock files are still around."""
  parser.add_option('--force', '-f', action='store_true',
                    help='Actually perform the action')
  parser.add_option('--all', '-a', action='store_true',
                    help='Unlock all repository caches')
  options, args = parser.parse_args(args)
  if len(args) > 1 or (len(args) == 0 and not options.all):
    parser.error('git cache unlock takes exactly one repo url, or --all')

  if not options.all:
    url = args[0]
    repo_dirs = [os.path.join(options.cache_dir, UrlToCacheDir(url))]
  else:
    repo_dirs = [os.path.join(options.cache_dir, path)
                 for path in os.listdir(options.cache_dir)
                 if os.path.isdir(os.path.join(options.cache_dir, path))]
    repo_dirs.extend([os.path.join(options.cache_dir,
                                   lockfile.replace('.lock', ''))
                      for lockfile in os.listdir(options.cache_dir)
                      if os.path.isfile(os.path.join(options.cache_dir,
                                                     lockfile))
                      and lockfile.endswith('.lock')
                      and os.path.join(options.cache_dir, lockfile)
                          not in repo_dirs])
  lockfiles = [repo_dir + '.lock' for repo_dir in repo_dirs
               if os.path.exists(repo_dir + '.lock')]

  if not options.force:
    parser.error('git cache unlock requires -f|--force to do anything. '
                 'Refusing to unlock the following repo caches: '
                 ', '.join(lockfiles))

  unlocked = []
  untouched = []
  for repo_dir in repo_dirs:
    lf = Lockfile(repo_dir)
    config_lock = os.path.join(repo_dir, 'config.lock')
    unlocked = False
    if os.path.exists(config_lock):
      os.remove(config_lock)
      unlocked = True
    if lf.break_lock():
      unlocked = True

    if unlocked:
      unlocked.append(repo_dir)      
    else:
      untouched.append(repo_dir)

  if unlocked:
    logging.info('Broke locks on these caches: %s' % unlocked)
  if untouched:
    logging.debug('Did not touch these caches: %s' % untouched)


class OptionParser(optparse.OptionParser):
  """Wrapper class for OptionParser to handle global options."""

  def __init__(self, *args, **kwargs):
    optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
    self.add_option('-c', '--cache-dir',
                    help='Path to the directory containing the cache')
    self.add_option('-v', '--verbose', action='count', default=0,
                    help='Increase verbosity (can be passed multiple times)')

  def parse_args(self, args=None, values=None):
    options, args = optparse.OptionParser.parse_args(self, args, values)

    try:
      global_cache_dir = subprocess.check_output(
          [GIT_EXECUTABLE, 'config', '--global', 'cache.cachepath']).strip()
      if options.cache_dir:
        if global_cache_dir and (
            os.path.abspath(options.cache_dir) !=
            os.path.abspath(global_cache_dir)):
          logging.warn('Overriding globally-configured cache directory.')
      else:
        options.cache_dir = global_cache_dir
    except subprocess.CalledProcessError:
      if not options.cache_dir:
        self.error('No cache directory specified on command line '
                   'or in cache.cachepath.')
    options.cache_dir = os.path.abspath(options.cache_dir)

    levels = [logging.WARNING, logging.INFO, logging.DEBUG]
    logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])

    return options, args


def main(argv):
  dispatcher = subcommand.CommandDispatcher(__name__)
  return dispatcher.execute(OptionParser(), argv)


if __name__ == '__main__':
  sys.exit(main(sys.argv[1:]))