# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""SCM-specific utility classes."""

from collections import defaultdict
import os
import pathlib
import platform
import re
from typing import Mapping, List

import gclient_utils
import git_common
import subprocess2

# TODO: Should fix these warnings.
# pylint: disable=line-too-long

# constants used to identify the tree state of a directory.
VERSIONED_NO = 0
VERSIONED_DIR = 1
VERSIONED_SUBMODULE = 2


def determine_scm(root):
    """Similar to upload.py's version but much simpler.

    Returns 'git' or 'diff'.
    """
    if os.path.isdir(os.path.join(root, '.git')):
        return 'git'

    try:
        subprocess2.check_call(['git', 'rev-parse', '--show-cdup'],
                               stdout=subprocess2.DEVNULL,
                               stderr=subprocess2.DEVNULL,
                               cwd=root)
        return 'git'
    except (OSError, subprocess2.CalledProcessError):
        return 'diff'


class GIT(object):
    current_version = None
    rev_parse_cache = {}

    # Maps cwd -> {config key, [config values]}
    # This cache speeds up all `git config ...` operations by only running a
    # single subcommand, which can greatly accelerate things like
    # git-map-branches.
    _CONFIG_CACHE: Mapping[str, Mapping[str, List[str]]] = {}

    @staticmethod
    def _load_config(cwd: str) -> Mapping[str, List[str]]:
        """Loads git config for the given cwd.

        The calls to this method are cached in-memory for performance. The
        config is only reloaded on cache misses.

        Args:
            cwd: path to fetch `git config` for.

        Returns:
            A dict mapping git config keys to a list of its values.
        """
        if cwd not in GIT._CONFIG_CACHE:
            try:
                rawConfig = GIT.Capture(['config', '--list', '-z'],
                                        cwd=cwd,
                                        strip_out=False)
            except subprocess2.CalledProcessError:
                return {}

            cfg = defaultdict(list)

            # Splitting by '\x00' gets an additional empty string at the end.
            for line in rawConfig.split('\x00')[:-1]:
                key, value = map(str.strip, line.split('\n', 1))
                cfg[key].append(value)

            GIT._CONFIG_CACHE[cwd] = cfg

        return GIT._CONFIG_CACHE[cwd]

    @staticmethod
    def _clear_config(cwd: str) -> None:
        GIT._CONFIG_CACHE.pop(cwd, None)


    @staticmethod
    def ApplyEnvVars(kwargs):
        env = kwargs.pop('env', None) or os.environ.copy()
        # Don't prompt for passwords; just fail quickly and noisily.
        # By default, git will use an interactive terminal prompt when a
        # username/ password is needed.  That shouldn't happen in the chromium
        # workflow, and if it does, then gclient may hide the prompt in the
        # midst of a flood of terminal spew.  The only indication that something
        # has gone wrong will be when gclient hangs unresponsively.  Instead, we
        # disable the password prompt and simply allow git to fail noisily.  The
        # error message produced by git will be copied to gclient's output.
        env.setdefault('GIT_ASKPASS', 'true')
        env.setdefault('SSH_ASKPASS', 'true')
        # 'cat' is a magical git string that disables pagers on all platforms.
        env.setdefault('GIT_PAGER', 'cat')
        return env

    @staticmethod
    def Capture(args, cwd=None, strip_out=True, **kwargs):
        kwargs.setdefault('env', GIT.ApplyEnvVars(kwargs))
        kwargs.setdefault('cwd', cwd)
        kwargs.setdefault('autostrip', strip_out)
        return git_common.run(*args, **kwargs)

    @staticmethod
    def CaptureStatus(cwd,
                      upstream_branch,
                      end_commit=None,
                      ignore_submodules=True):
        # type: (str, str, Optional[str]) -> Sequence[Tuple[str, str]]
        """Returns git status.

        Returns an array of (status, file) tuples."""
        if end_commit is None:
            end_commit = ''
        if upstream_branch is None:
            upstream_branch = GIT.GetUpstreamBranch(cwd)
            if upstream_branch is None:
                raise gclient_utils.Error('Cannot determine upstream branch')

        command = [
            '-c', 'core.quotePath=false', 'diff', '--name-status',
            '--no-renames'
        ]
        if ignore_submodules:
            command.append('--ignore-submodules=all')
        command.extend(['-r', '%s...%s' % (upstream_branch, end_commit)])

        status = GIT.Capture(command, cwd)
        results = []
        if status:
            for statusline in status.splitlines():
                # 3-way merges can cause the status can be 'MMM' instead of 'M'.
                # This can happen when the user has 2 local branches and he
                # diffs between these 2 branches instead diffing to upstream.
                m = re.match(r'^(\w)+\t(.+)$', statusline)
                if not m:
                    raise gclient_utils.Error(
                        'status currently unsupported: %s' % statusline)
                # Only grab the first letter.
                results.append(('%s      ' % m.group(1)[0], m.group(2)))
        return results

    @staticmethod
    def GetConfig(cwd, key, default=None):
        values = GIT._load_config(cwd).get(key, None)
        if not values:
            return default

        return values[-1]

    @staticmethod
    def GetConfigBool(cwd, key) -> bool:
        return GIT.GetConfig(cwd, key) == 'true'

    @staticmethod
    def GetConfigList(cwd, key):
        return GIT._load_config(cwd).get(key, [])

    @staticmethod
    def YieldConfigRegexp(cwd, pattern):
        """Yields (key, value) pairs for any config keys matching `pattern`."""
        p = re.compile(pattern)
        for name, values in GIT._load_config(cwd).items():
            if p.match(name):
                for value in values:
                    yield name, value

    @staticmethod
    def GetBranchConfig(cwd, branch, key, default=None):
        assert branch, 'A branch must be given'
        key = 'branch.%s.%s' % (branch, key)
        return GIT.GetConfig(cwd, key, default)

    @staticmethod
    def SetConfig(
        cwd,
        key,
        value=None,
        *,
        append=False,
        missing_ok=True,
        modify_all=False,
        scope='local',
        value_pattern=None,
    ):
        """Sets or unsets one or more config values.

        Args:
            cwd: path to set `git config` for.
            key: The specific config key to affect.
            value: The value to set. If this is None, `key` will be unset.
            append: If True and `value` is not None, this will append
                the value instead of replacing an existing one.
            missing_ok: If `value` is None (i.e. this is an unset operation),
                ignore retcode=5 from `git config` (meaning that the value is
                not present). If `value` is not None, then this option has no
                effect.
            modify_all: If True, this will change a set operation to
                `--replace-all`, and will change an unset operation to
                `--unset-all`.
            scope: By default this is the local scope, but could be `system`,
                `global`, or `worktree`, depending on which config scope you
                want to affect.
            value_pattern: For use with `modify_all=True`, allows
                further filtering of the set or unset operation based on
                the currently configured value. Ignored for
                `modify_all=False`.
        """
        GIT._clear_config(cwd)

        args = ['config', f'--{scope}']
        if value == None:
            args.extend(['--unset' + ('-all' if modify_all else ''), key])
        else:
            if modify_all:
                args.append('--replace-all')
            if append:
                args.append('--add')
            args.extend([key, value])

        if modify_all and value_pattern:
            args.append(value_pattern)

        accepted_retcodes = [0]
        if value is None and missing_ok:
            accepted_retcodes = [0, 5]

        GIT.Capture(args, cwd=cwd, accepted_retcodes=accepted_retcodes)

    @staticmethod
    def SetBranchConfig(cwd, branch, key, value=None):
        assert branch, 'A branch must be given'
        key = 'branch.%s.%s' % (branch, key)
        GIT.SetConfig(cwd, key, value)

    @staticmethod
    def ShortBranchName(branch):
        """Converts a name like 'refs/heads/foo' to just 'foo'."""
        return branch.replace('refs/heads/', '')

    @staticmethod
    def GetBranchRef(cwd):
        """Returns the full branch reference, e.g. 'refs/heads/main'."""
        try:
            return GIT.Capture(['symbolic-ref', 'HEAD'], cwd=cwd)
        except subprocess2.CalledProcessError:
            return None

    @staticmethod
    def GetRemoteHeadRef(cwd, url, remote):
        """Returns the full default remote branch reference, e.g.
        'refs/remotes/origin/main'."""
        if os.path.exists(cwd):
            try:
                # Try using local git copy first
                ref = 'refs/remotes/%s/HEAD' % remote
                ref = GIT.Capture(['symbolic-ref', ref], cwd=cwd)
                if not ref.endswith('master'):
                    return ref
            except subprocess2.CalledProcessError:
                pass

            try:
                # Check if there are changes in the default branch for this
                # particular repository.
                GIT.Capture(['remote', 'set-head', '-a', remote], cwd=cwd)
                return GIT.Capture(['symbolic-ref', ref], cwd=cwd)
            except subprocess2.CalledProcessError:
                pass

        try:
            # Fetch information from git server
            resp = GIT.Capture(['ls-remote', '--symref', url, 'HEAD'])
            regex = r'^ref: (.*)\tHEAD$'
            for line in resp.split('\n'):
                m = re.match(regex, line)
                if m:
                    return ''.join(GIT.RefToRemoteRef(m.group(1), remote))
        except subprocess2.CalledProcessError:
            pass
        # Return default branch
        return 'refs/remotes/%s/main' % remote

    @staticmethod
    def GetBranch(cwd):
        """Returns the short branch name, e.g. 'main'."""
        branchref = GIT.GetBranchRef(cwd)
        if branchref:
            return GIT.ShortBranchName(branchref)
        return None

    @staticmethod
    def GetRemoteBranches(cwd):
        return GIT.Capture(['branch', '-r'], cwd=cwd).split()

    @staticmethod
    def FetchUpstreamTuple(cwd, branch=None):
        """Returns a tuple containing remote and remote ref,
        e.g. 'origin', 'refs/heads/main'
        """
        try:
            branch = branch or GIT.GetBranch(cwd)
        except subprocess2.CalledProcessError:
            pass
        if branch:
            upstream_branch = GIT.GetBranchConfig(cwd, branch, 'merge')
            if upstream_branch:
                remote = GIT.GetBranchConfig(cwd, branch, 'remote', '.')
                return remote, upstream_branch

        upstream_branch = GIT.GetConfig(cwd, 'rietveld.upstream-branch')
        if upstream_branch:
            remote = GIT.GetConfig(cwd, 'rietveld.upstream-remote', '.')
            return remote, upstream_branch

        # Else, try to guess the origin remote.
        remote_branches = GIT.GetRemoteBranches(cwd)
        if 'origin/main' in remote_branches:
            # Fall back on origin/main if it exits.
            return 'origin', 'refs/heads/main'

        if 'origin/master' in remote_branches:
            # Fall back on origin/master if it exits.
            return 'origin', 'refs/heads/master'

        return None, None

    @staticmethod
    def RefToRemoteRef(ref, remote):
        """Convert a checkout ref to the equivalent remote ref.

        Returns:
            A tuple of the remote ref's (common prefix, unique suffix), or None if it
            doesn't appear to refer to a remote ref (e.g. it's a commit hash).
        """
        # TODO(mmoss): This is just a brute-force mapping based of the expected
        # git config. It's a bit better than the even more brute-force
        # replace('heads', ...), but could still be smarter (like maybe actually
        # using values gleaned from the git config).
        m = re.match('^(refs/(remotes/)?)?branch-heads/', ref or '')
        if m:
            return ('refs/remotes/branch-heads/', ref.replace(m.group(0), ''))

        m = re.match('^((refs/)?remotes/)?%s/|(refs/)?heads/' % remote, ref
                     or '')
        if m:
            return ('refs/remotes/%s/' % remote, ref.replace(m.group(0), ''))

        return None

    @staticmethod
    def RemoteRefToRef(ref, remote):
        assert remote, 'A remote must be given'
        if not ref or not ref.startswith('refs/'):
            return None
        if not ref.startswith('refs/remotes/'):
            return ref
        if ref.startswith('refs/remotes/branch-heads/'):
            return 'refs' + ref[len('refs/remotes'):]
        if ref.startswith('refs/remotes/%s/' % remote):
            return 'refs/heads' + ref[len('refs/remotes/%s' % remote):]
        return None

    @staticmethod
    def GetUpstreamBranch(cwd):
        """Gets the current branch's upstream branch."""
        remote, upstream_branch = GIT.FetchUpstreamTuple(cwd)
        if remote != '.' and upstream_branch:
            remote_ref = GIT.RefToRemoteRef(upstream_branch, remote)
            if remote_ref:
                upstream_branch = ''.join(remote_ref)
        return upstream_branch

    @staticmethod
    def IsAncestor(maybe_ancestor, ref, cwd=None):
        # type: (string, string, Optional[string]) -> bool
        """Verifies if |maybe_ancestor| is an ancestor of |ref|."""
        try:
            GIT.Capture(['merge-base', '--is-ancestor', maybe_ancestor, ref],
                        cwd=cwd)
            return True
        except subprocess2.CalledProcessError:
            return False

    @staticmethod
    def GetOldContents(cwd, filename, branch=None):
        if not branch:
            branch = GIT.GetUpstreamBranch(cwd)
        if platform.system() == 'Windows':
            # git show <sha>:<path> wants a posix path.
            filename = filename.replace('\\', '/')
        command = ['show', '%s:%s' % (branch, filename)]
        try:
            return GIT.Capture(command, cwd=cwd, strip_out=False)
        except subprocess2.CalledProcessError:
            return ''

    @staticmethod
    def GenerateDiff(cwd,
                     branch=None,
                     branch_head='HEAD',
                     full_move=False,
                     files=None):
        """Diffs against the upstream branch or optionally another branch.

        full_move means that move or copy operations should completely recreate the
        files, usually in the prospect to apply the patch for a try job."""
        if not branch:
            branch = GIT.GetUpstreamBranch(cwd)
        command = [
            '-c', 'core.quotePath=false', 'diff', '-p', '--no-color',
            '--no-prefix', '--no-ext-diff', branch + "..." + branch_head
        ]
        if full_move:
            command.append('--no-renames')
        else:
            command.append('-C')
        # TODO(maruel): --binary support.
        if files:
            command.append('--')
            command.extend(files)
        diff = GIT.Capture(command, cwd=cwd, strip_out=False).splitlines(True)
        for i in range(len(diff)):
            # In the case of added files, replace /dev/null with the path to the
            # file being added.
            if diff[i].startswith('--- /dev/null'):
                diff[i] = '--- %s' % diff[i + 1][4:]
        return ''.join(diff)

    @staticmethod
    def GetAllFiles(cwd):
        """Returns the list of all files under revision control."""
        command = ['-c', 'core.quotePath=false', 'ls-files', '--', '.']
        return GIT.Capture(command, cwd=cwd).splitlines(False)

    @staticmethod
    def GetSubmoduleCommits(cwd, submodules):
        # type: (string, List[string]) => Mapping[string][string]
        """Returns a mapping of staged or committed new commits for submodules."""
        if not submodules:
            return {}
        result = subprocess2.check_output(['git', 'ls-files', '-s', '--'] +
                                          submodules,
                                          cwd=cwd).decode('utf-8')
        commit_hashes = {}
        for r in result.splitlines():
            # ['<mode>', '<commit_hash>', '<stage_number>', '<path>'].
            record = r.strip().split(maxsplit=3)  # path can contain spaces.
            assert record[0] == '160000', 'file is not a gitlink: %s' % record
            commit_hashes[record[3]] = record[1]
        return commit_hashes

    @staticmethod
    def GetCheckoutRoot(cwd):
        """Returns the top level directory of a git checkout as an absolute path.
        """
        root = GIT.Capture(['rev-parse', '--show-cdup'], cwd=cwd)
        return os.path.abspath(os.path.join(cwd, root))

    @staticmethod
    def IsInsideWorkTree(cwd):
        try:
            return GIT.Capture(['rev-parse', '--is-inside-work-tree'], cwd=cwd)
        except (OSError, subprocess2.CalledProcessError):
            return False

    @staticmethod
    def IsVersioned(cwd, relative_dir):
        # type: (str, str) -> int
        """Checks whether the given |relative_dir| is part of cwd's repo."""
        output = GIT.Capture(['ls-tree', 'HEAD', '--', relative_dir], cwd=cwd)
        if not output:
            return VERSIONED_NO
        if output.startswith('160000'):
            return VERSIONED_SUBMODULE
        return VERSIONED_DIR

    @staticmethod
    def ListSubmodules(repo_root):
        # type: (str) -> Collection[str]
        """Returns the list of submodule paths for the given repo.

        Path separators will be adjusted for the current OS.
        """
        if not os.path.exists(os.path.join(repo_root, '.gitmodules')):
            return []
        config_output = GIT.Capture(
            ['config', '--file', '.gitmodules', '--get-regexp', 'path'],
            cwd=repo_root)
        return [
            line.split()[-1].replace('/', os.path.sep)
            for line in config_output.splitlines()
        ]

    @staticmethod
    def CleanupDir(cwd, relative_dir):
        """Cleans up untracked file inside |relative_dir|."""
        return bool(GIT.Capture(['clean', '-df', relative_dir], cwd=cwd))

    @staticmethod
    def ResolveCommit(cwd, rev):
        cache_key = None
        # We do this instead of rev-parse --verify rev^{commit}, since on
        # Windows git can be either an executable or batch script, each of which
        # requires escaping the caret (^) a different way.
        if gclient_utils.IsFullGitSha(rev):
            # Only cache full SHAs
            cache_key = hash(cwd + rev)
            if val := GIT.rev_parse_cache.get(cache_key):
                return val

            # git-rev parse --verify FULL_GIT_SHA always succeeds, even if we
            # don't have FULL_GIT_SHA locally. Removing the last character
            # forces git to check if FULL_GIT_SHA refers to an object in the
            # local database.
            rev = rev[:-1]
        res = GIT.Capture(['rev-parse', '--quiet', '--verify', rev], cwd=cwd)
        if cache_key:
            # We don't expect concurrent execution, so we don't lock anything.
            GIT.rev_parse_cache[cache_key] = res

        return res

    @staticmethod
    def IsValidRevision(cwd, rev, sha_only=False):
        """Verifies the revision is a proper git revision.

        sha_only: Fail unless rev is a sha hash.
        """
        try:
            sha = GIT.ResolveCommit(cwd, rev)
        except subprocess2.CalledProcessError:
            return None

        if sha_only:
            return sha == rev.lower()
        return True


class DIFF(object):

    @staticmethod
    def GetAllFiles(cwd):
        """Return all files under the repo at cwd.

        If .gitmodules exists in cwd, use it to determine which folders are
        submodules and don't recurse into them. Submodule paths are returned.
        """
        # `git config --file` works outside of a git workspace.
        submodules = GIT.ListSubmodules(cwd)
        if not submodules:
            return [
                str(p.relative_to(cwd)) for p in pathlib.Path(cwd).rglob("*")
                if p.is_file()
            ]

        full_path_submodules = {os.path.join(cwd, s) for s in submodules}

        def should_recurse(dirpath, dirname):
            full_path = os.path.join(dirpath, dirname)
            return full_path not in full_path_submodules

        paths = list(full_path_submodules)
        for dirpath, dirnames, filenames in os.walk(cwd):
            paths.extend([os.path.join(dirpath, f) for f in filenames])
            dirnames[:] = [d for d in dirnames if should_recurse(dirpath, d)]

        return [os.path.relpath(p, cwd) for p in paths]