You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
depot_tools/scm.py

445 lines
15 KiB
Python

# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""SCM-specific utility classes."""
import distutils.version
import glob
import io
import os
import platform
import re
import sys
import gclient_utils
import subprocess2
def ValidateEmail(email):
return (
re.match(r"^[a-zA-Z0-9._%\-+]+@[a-zA-Z0-9._%-]+.[a-zA-Z]{2,6}$", email)
is not None)
def GetCasedPath(path):
"""Elcheapos way to get the real path case on Windows."""
if sys.platform.startswith('win') and os.path.exists(path):
# Reconstruct the path.
path = os.path.abspath(path)
paths = path.split('\\')
for i in range(len(paths)):
if i == 0:
# Skip drive letter.
continue
subpath = '\\'.join(paths[:i+1])
prev = len('\\'.join(paths[:i]))
# glob.glob will return the cased path for the last item only. This is why
# we are calling it in a loop. Extract the data we want and put it back
# into the list.
paths[i] = glob.glob(subpath + '*')[0][prev+1:len(subpath)]
path = '\\'.join(paths)
return path
def GenFakeDiff(filename):
"""Generates a fake diff from a file."""
file_content = gclient_utils.FileRead(filename, 'rb').splitlines(True)
filename = filename.replace(os.sep, '/')
nb_lines = len(file_content)
# We need to use / since patch on unix will fail otherwise.
data = io.StringIO()
data.write("Index: %s\n" % filename)
data.write('=' * 67 + '\n')
# Note: Should we use /dev/null instead?
data.write("--- %s\n" % filename)
data.write("+++ %s\n" % filename)
data.write("@@ -0,0 +1,%d @@\n" % nb_lines)
# Prepend '+' to every lines.
for line in file_content:
data.write('+')
data.write(line)
result = data.getvalue()
data.close()
return result
def determine_scm(root):
"""Similar to upload.py's version but much simpler.
Returns 'git' or None.
"""
if os.path.isdir(os.path.join(root, '.git')):
return 'git'
else:
try:
subprocess2.check_call(
['git', 'rev-parse', '--show-cdup'],
stdout=subprocess2.DEVNULL,
stderr=subprocess2.DEVNULL,
cwd=root)
return 'git'
except (OSError, subprocess2.CalledProcessError):
return None
def only_int(val):
if val.isdigit():
return int(val)
else:
return 0
class GIT(object):
current_version = None
@staticmethod
def ApplyEnvVars(kwargs):
env = kwargs.pop('env', None) or os.environ.copy()
# Don't prompt for passwords; just fail quickly and noisily.
# By default, git will use an interactive terminal prompt when a username/
# password is needed. That shouldn't happen in the chromium workflow,
# and if it does, then gclient may hide the prompt in the midst of a flood
# of terminal spew. The only indication that something has gone wrong
# will be when gclient hangs unresponsively. Instead, we disable the
# password prompt and simply allow git to fail noisily. The error
# message produced by git will be copied to gclient's output.
env.setdefault('GIT_ASKPASS', 'true')
env.setdefault('SSH_ASKPASS', 'true')
# 'cat' is a magical git string that disables pagers on all platforms.
env.setdefault('GIT_PAGER', 'cat')
return env
@staticmethod
def Capture(args, cwd=None, strip_out=True, **kwargs):
env = GIT.ApplyEnvVars(kwargs)
output = subprocess2.check_output(
['git'] + args, cwd=cwd, stderr=subprocess2.PIPE, env=env, **kwargs)
output = output.decode('utf-8', 'replace')
return output.strip() if strip_out else output
@staticmethod
def CaptureStatus(cwd, upstream_branch):
"""Returns git status.
Returns an array of (status, file) tuples."""
gclient_scm.py: Make working with git more reliable I found including a git repo in my DEPS file to be unreliable, esp since I pinning to a specific commit. Whenever I changed the commit in the DEPS file, gclient would attempt to do a rebase and this was failing due to how rebase was being invoked. While investigating the problem, I decided it might be better to take a different approach. Namely, when cloning gclient should just checkout the working tree to a detached HEAD. In this way, gclient can more easily determine if the user has made any changes in the cloned repo. Future updates (as long as there are no changes) become a much simpler operation w/no need to invoke rebase. This is a series of five commits, but sadly, git cl will squash them into this single review. Here are the original commit messages: commit 8cd2213f006a6f4b3f6b8c448a1362b9410d47f1 Author: Jay Soffian <jaysoffian@gmail.com> Date: Wed Apr 14 18:29:18 2010 -0400 Use rev-parse to determine current branch Git branch is a so-called porcelain and its output cannot be relied upon; use git rev-parse instead. gclient_scm.py | 8 +++----- 1 files changed, 3 insertions(+), 5 deletions(-) commit 1a09e04554acfa2671f9588ee9eef0bdbe677ed2 Author: Jay Soffian <jaysoffian@gmail.com> Date: Wed Apr 14 22:16:53 2010 -0400 Detached HEAD does not always imply rebasing; use an _IsRebasing() function instead. gclient_scm.py | 11 ++++++++++- 1 files changed, 10 insertions(+), 1 deletions(-) commit 45308a58c3f1e30b760f13abe3a6288267265fa8 Author: Jay Soffian <jaysoffian@gmail.com> Date: Wed Apr 14 22:19:10 2010 -0400 Clarify comments to use common git terminology gclient_scm.py | 14 ++++++++------ 1 files changed, 8 insertions(+), 6 deletions(-) commit 5e5a661b7dd9c83b2c9c35950f3267d15b7e840a Author: Jay Soffian <jaysoffian@gmail.com> Date: Tue May 4 12:15:40 2010 -0400 Make CaptureStatus use GetUpstreamBranch() instead of assuming 'origin' scm.py | 6 +++++- 1 files changed, 5 insertions(+), 1 deletions(-) commit 42a8bfebd2e1b1be01025c1324d75920ac6eb0e1 Author: Jay Soffian <jaysoffian@gmail.com> Date: Wed Apr 14 22:19:29 2010 -0400 Use a detached HEAD when checking out a tag or commit After cloning, if a tag or commit was specified, leave a detached HEAD. This way we can reliably detect if the user changed the working tree (since HEAD would no longer be detached). Further, this simplifies the code path when the dependency is updated to a new tag/commit. As long as HEAD is detached when we update, we simply checkout whatever we fetched w/o needing to worry about rebasing. gclient_scm.py | 126 +++++++++++++++++++++++++++++++------------- tests/gclient_scm_test.py | 6 +-- 2 files changed, 91 insertions(+), 41 deletions(-) Review URL: http://codereview.chromium.org/1652007 Patch from Jay Soffian <jaysoffian@gmail.com>. git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@48722 0039d316-1c4b-4281-b951-d872f2087c98
15 years ago
if upstream_branch is None:
upstream_branch = GIT.GetUpstreamBranch(cwd)
gclient_scm.py: Make working with git more reliable I found including a git repo in my DEPS file to be unreliable, esp since I pinning to a specific commit. Whenever I changed the commit in the DEPS file, gclient would attempt to do a rebase and this was failing due to how rebase was being invoked. While investigating the problem, I decided it might be better to take a different approach. Namely, when cloning gclient should just checkout the working tree to a detached HEAD. In this way, gclient can more easily determine if the user has made any changes in the cloned repo. Future updates (as long as there are no changes) become a much simpler operation w/no need to invoke rebase. This is a series of five commits, but sadly, git cl will squash them into this single review. Here are the original commit messages: commit 8cd2213f006a6f4b3f6b8c448a1362b9410d47f1 Author: Jay Soffian <jaysoffian@gmail.com> Date: Wed Apr 14 18:29:18 2010 -0400 Use rev-parse to determine current branch Git branch is a so-called porcelain and its output cannot be relied upon; use git rev-parse instead. gclient_scm.py | 8 +++----- 1 files changed, 3 insertions(+), 5 deletions(-) commit 1a09e04554acfa2671f9588ee9eef0bdbe677ed2 Author: Jay Soffian <jaysoffian@gmail.com> Date: Wed Apr 14 22:16:53 2010 -0400 Detached HEAD does not always imply rebasing; use an _IsRebasing() function instead. gclient_scm.py | 11 ++++++++++- 1 files changed, 10 insertions(+), 1 deletions(-) commit 45308a58c3f1e30b760f13abe3a6288267265fa8 Author: Jay Soffian <jaysoffian@gmail.com> Date: Wed Apr 14 22:19:10 2010 -0400 Clarify comments to use common git terminology gclient_scm.py | 14 ++++++++------ 1 files changed, 8 insertions(+), 6 deletions(-) commit 5e5a661b7dd9c83b2c9c35950f3267d15b7e840a Author: Jay Soffian <jaysoffian@gmail.com> Date: Tue May 4 12:15:40 2010 -0400 Make CaptureStatus use GetUpstreamBranch() instead of assuming 'origin' scm.py | 6 +++++- 1 files changed, 5 insertions(+), 1 deletions(-) commit 42a8bfebd2e1b1be01025c1324d75920ac6eb0e1 Author: Jay Soffian <jaysoffian@gmail.com> Date: Wed Apr 14 22:19:29 2010 -0400 Use a detached HEAD when checking out a tag or commit After cloning, if a tag or commit was specified, leave a detached HEAD. This way we can reliably detect if the user changed the working tree (since HEAD would no longer be detached). Further, this simplifies the code path when the dependency is updated to a new tag/commit. As long as HEAD is detached when we update, we simply checkout whatever we fetched w/o needing to worry about rebasing. gclient_scm.py | 126 +++++++++++++++++++++++++++++++------------- tests/gclient_scm_test.py | 6 +-- 2 files changed, 91 insertions(+), 41 deletions(-) Review URL: http://codereview.chromium.org/1652007 Patch from Jay Soffian <jaysoffian@gmail.com>. git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@48722 0039d316-1c4b-4281-b951-d872f2087c98
15 years ago
if upstream_branch is None:
raise gclient_utils.Error('Cannot determine upstream branch')
command = ['-c', 'core.quotePath=false', 'diff',
'--name-status', '--no-renames', '-r', '%s...' % upstream_branch]
status = GIT.Capture(command, cwd)
results = []
if status:
for statusline in status.splitlines():
# 3-way merges can cause the status can be 'MMM' instead of 'M'. This
# can happen when the user has 2 local branches and he diffs between
# these 2 branches instead diffing to upstream.
m = re.match(r'^(\w)+\t(.+)$', statusline)
if not m:
raise gclient_utils.Error(
'status currently unsupported: %s' % statusline)
# Only grab the first letter.
results.append(('%s ' % m.group(1)[0], m.group(2)))
return results
@staticmethod
def GetConfig(cwd, key, default=None):
try:
return GIT.Capture(['config', key], cwd=cwd)
except subprocess2.CalledProcessError:
return default
@staticmethod
def GetBranchConfig(cwd, branch, key, default=None):
assert branch, 'A branch must be given'
key = 'branch.%s.%s' % (branch, key)
return GIT.GetConfig(cwd, key, default)
@staticmethod
def SetConfig(cwd, key, value=None):
if value is None:
args = ['config', '--unset', key]
else:
args = ['config', key, value]
GIT.Capture(args, cwd=cwd)
@staticmethod
def SetBranchConfig(cwd, branch, key, value=None):
assert branch, 'A branch must be given'
key = 'branch.%s.%s' % (branch, key)
GIT.SetConfig(cwd, key, value)
@staticmethod
def IsWorkTreeDirty(cwd):
return GIT.Capture(['status', '-s'], cwd=cwd) != ''
@staticmethod
def GetEmail(cwd):
"""Retrieves the user email address if known."""
return GIT.GetConfig(cwd, 'user.email', '')
@staticmethod
def ShortBranchName(branch):
"""Converts a name like 'refs/heads/foo' to just 'foo'."""
return branch.replace('refs/heads/', '')
@staticmethod
def GetBranchRef(cwd):
"""Returns the full branch reference, e.g. 'refs/heads/main'."""
try:
return GIT.Capture(['symbolic-ref', 'HEAD'], cwd=cwd)
except subprocess2.CalledProcessError:
return None
@staticmethod
def GetRemoteHeadRef(cwd, url, remote):
"""Returns the full default remote branch reference, e.g.
'refs/remotes/origin/main'."""
if os.path.exists(cwd):
try:
# Try using local git copy first
ref = 'refs/remotes/%s/HEAD' % remote
return GIT.Capture(['symbolic-ref', ref], cwd=cwd)
except subprocess2.CalledProcessError:
pass
try:
# Fetch information from git server
resp = GIT.Capture(['ls-remote', '--symref', url, 'HEAD'])
regex = r'^ref: (.*)\tHEAD$'
for line in resp.split('\n'):
m = re.match(regex, line)
if m:
return ''.join(GIT.RefToRemoteRef(m.group(1), remote))
except subprocess2.CalledProcessError:
pass
# Return default branch
return 'refs/remotes/%s/main' % remote
@staticmethod
def GetBranch(cwd):
"""Returns the short branch name, e.g. 'main'."""
branchref = GIT.GetBranchRef(cwd)
if branchref:
return GIT.ShortBranchName(branchref)
return None
@staticmethod
def GetRemoteBranches(cwd):
return GIT.Capture(['branch', '-r'], cwd=cwd).split()
@staticmethod
def FetchUpstreamTuple(cwd, branch=None):
"""Returns a tuple containing remote and remote ref,
e.g. 'origin', 'refs/heads/main'
"""
try:
branch = branch or GIT.GetBranch(cwd)
except subprocess2.CalledProcessError:
pass
if branch:
upstream_branch = GIT.GetBranchConfig(cwd, branch, 'merge')
if upstream_branch:
remote = GIT.GetBranchConfig(cwd, branch, 'remote', '.')
return remote, upstream_branch
upstream_branch = GIT.GetConfig(cwd, 'rietveld.upstream-branch')
if upstream_branch:
remote = GIT.GetConfig(cwd, 'rietveld.upstream-remote', '.')
return remote, upstream_branch
# Else, try to guess the origin remote.
remote_branches = GIT.GetRemoteBranches(cwd)
if 'origin/main' in remote_branches:
# Fall back on origin/main if it exits.
return 'origin', 'refs/heads/main'
elif 'origin/master' in remote_branches:
# Fall back on origin/master if it exits.
return 'origin', 'refs/heads/master'
return None, None
@staticmethod
def RefToRemoteRef(ref, remote):
"""Convert a checkout ref to the equivalent remote ref.
Returns:
A tuple of the remote ref's (common prefix, unique suffix), or None if it
doesn't appear to refer to a remote ref (e.g. it's a commit hash).
"""
# TODO(mmoss): This is just a brute-force mapping based of the expected git
# config. It's a bit better than the even more brute-force replace('heads',
# ...), but could still be smarter (like maybe actually using values gleaned
# from the git config).
m = re.match('^(refs/(remotes/)?)?branch-heads/', ref or '')
if m:
return ('refs/remotes/branch-heads/', ref.replace(m.group(0), ''))
m = re.match('^((refs/)?remotes/)?%s/|(refs/)?heads/' % remote, ref or '')
if m:
return ('refs/remotes/%s/' % remote, ref.replace(m.group(0), ''))
return None
@staticmethod
def RemoteRefToRef(ref, remote):
assert remote, 'A remote must be given'
if not ref or not ref.startswith('refs/'):
return None
if not ref.startswith('refs/remotes/'):
return ref
if ref.startswith('refs/remotes/branch-heads/'):
return 'refs' + ref[len('refs/remotes'):]
if ref.startswith('refs/remotes/%s/' % remote):
return 'refs/heads' + ref[len('refs/remotes/%s' % remote):]
return None
@staticmethod
def GetUpstreamBranch(cwd):
"""Gets the current branch's upstream branch."""
remote, upstream_branch = GIT.FetchUpstreamTuple(cwd)
if remote != '.' and upstream_branch:
remote_ref = GIT.RefToRemoteRef(upstream_branch, remote)
if remote_ref:
upstream_branch = ''.join(remote_ref)
return upstream_branch
Reland "Reland "gclient_scm: Use cherry-picking instead of rebasing."" Abort any cherry-picks before applying the patch, so that if the bots are in a bad state, we don't fail. Original change's description: > Reland "gclient_scm: Use cherry-picking instead of rebasing." > > The failures were caused by: > 1 - When one change (call it #2) has been uploaded on top of another (#1), > and (#1) has already landed, git cherry-pick complains that the range > '<merge-base>..<change #2>' contains empty commits, since the contents > of (#1) are already present in the tree. > 2 - We did not abort the cherry-picking when 'git cherry-pick' failed, > so a failure made all further CLs in that bot fail. > > This CL fixes it and prevents further regressions. > > Original change's description: > > gclient_scm: Use cherry-picking instead of rebasing. > > > > Currently gclient might include extra commits when applying patches. > > For example, in this case we checkout |patch| and rebase it on top of |base|, > > thus including an |extra commit| that we shouldn't. > > > > o master > > | > > . o patch > > |/ > > o extra commit > > | > > o base (what gclient synced src at) > > > > This change uses the merge-base between |patch| and |master| to cherry-pick only > > the changes belonging to the patch. > > > > Bug: 850812 > > Change-Id: I138192f96bc62b1bb19b0e1ad952c8f8c67631c4 > > Reviewed-on: https://chromium-review.googlesource.com/1137052 > > Commit-Queue: Edward Lesmes <ehmaldonado@chromium.org> > > Reviewed-by: Aaron Gable <agable@chromium.org> > > Bug: 850812 > Change-Id: I83f38d0a258df3f5cd89e277f0d648badff29a22 > Reviewed-on: https://chromium-review.googlesource.com/1139554 > Reviewed-by: Aaron Gable <agable@chromium.org> > Commit-Queue: Edward Lesmes <ehmaldonado@chromium.org> Bug: 850812 Change-Id: Ic65bda67c792bd7af5ec013a62d9615d1498eb3a Reviewed-on: https://chromium-review.googlesource.com/1142805 Reviewed-by: Aaron Gable <agable@chromium.org> Commit-Queue: Edward Lesmes <ehmaldonado@chromium.org>
7 years ago
@staticmethod
def IsAncestor(cwd, maybe_ancestor, ref):
"""Verifies if |maybe_ancestor| is an ancestor of |ref|."""
try:
GIT.Capture(['merge-base', '--is-ancestor', maybe_ancestor, ref], cwd=cwd)
return True
except subprocess2.CalledProcessError:
return False
@staticmethod
def GetOldContents(cwd, filename, branch=None):
if not branch:
branch = GIT.GetUpstreamBranch(cwd)
if platform.system() == 'Windows':
# git show <sha>:<path> wants a posix path.
filename = filename.replace('\\', '/')
command = ['show', '%s:%s' % (branch, filename)]
try:
return GIT.Capture(command, cwd=cwd, strip_out=False)
except subprocess2.CalledProcessError:
return ''
@staticmethod
def GenerateDiff(cwd, branch=None, branch_head='HEAD', full_move=False,
files=None):
"""Diffs against the upstream branch or optionally another branch.
full_move means that move or copy operations should completely recreate the
files, usually in the prospect to apply the patch for a try job."""
if not branch:
branch = GIT.GetUpstreamBranch(cwd)
command = ['-c', 'core.quotePath=false', 'diff',
'-p', '--no-color', '--no-prefix', '--no-ext-diff',
branch + "..." + branch_head]
if full_move:
command.append('--no-renames')
else:
command.append('-C')
# TODO(maruel): --binary support.
if files:
command.append('--')
command.extend(files)
diff = GIT.Capture(command, cwd=cwd, strip_out=False).splitlines(True)
for i in range(len(diff)):
# In the case of added files, replace /dev/null with the path to the
# file being added.
if diff[i].startswith('--- /dev/null'):
diff[i] = '--- %s' % diff[i+1][4:]
return ''.join(diff)
@staticmethod
def GetDifferentFiles(cwd, branch=None, branch_head='HEAD'):
"""Returns the list of modified files between two branches."""
if not branch:
branch = GIT.GetUpstreamBranch(cwd)
command = ['-c', 'core.quotePath=false', 'diff',
'--name-only', branch + "..." + branch_head]
return GIT.Capture(command, cwd=cwd).splitlines(False)
@staticmethod
def GetAllFiles(cwd):
"""Returns the list of all files under revision control."""
command = ['-c', 'core.quotePath=false', 'ls-files', '--', '.']
return GIT.Capture(command, cwd=cwd).splitlines(False)
@staticmethod
def GetPatchName(cwd):
"""Constructs a name for this patch."""
short_sha = GIT.Capture(['rev-parse', '--short=4', 'HEAD'], cwd=cwd)
return "%s#%s" % (GIT.GetBranch(cwd), short_sha)
@staticmethod
def GetCheckoutRoot(cwd):
"""Returns the top level directory of a git checkout as an absolute path.
"""
root = GIT.Capture(['rev-parse', '--show-cdup'], cwd=cwd)
return os.path.abspath(os.path.join(cwd, root))
@staticmethod
def GetGitDir(cwd):
return os.path.abspath(GIT.Capture(['rev-parse', '--git-dir'], cwd=cwd))
@staticmethod
def IsInsideWorkTree(cwd):
try:
return GIT.Capture(['rev-parse', '--is-inside-work-tree'], cwd=cwd)
except (OSError, subprocess2.CalledProcessError):
return False
@staticmethod
def IsDirectoryVersioned(cwd, relative_dir):
"""Checks whether the given |relative_dir| is part of cwd's repo."""
return bool(GIT.Capture(['ls-tree', 'HEAD', relative_dir], cwd=cwd))
@staticmethod
def CleanupDir(cwd, relative_dir):
"""Cleans up untracked file inside |relative_dir|."""
return bool(GIT.Capture(['clean', '-df', relative_dir], cwd=cwd))
@staticmethod
def ResolveCommit(cwd, rev):
# We do this instead of rev-parse --verify rev^{commit}, since on Windows
# git can be either an executable or batch script, each of which requires
# escaping the caret (^) a different way.
if gclient_utils.IsFullGitSha(rev):
# git-rev parse --verify FULL_GIT_SHA always succeeds, even if we don't
# have FULL_GIT_SHA locally. Removing the last character forces git to
# check if FULL_GIT_SHA refers to an object in the local database.
rev = rev[:-1]
try:
return GIT.Capture(['rev-parse', '--quiet', '--verify', rev], cwd=cwd)
except subprocess2.CalledProcessError:
return None
@staticmethod
def IsValidRevision(cwd, rev, sha_only=False):
"""Verifies the revision is a proper git revision.
sha_only: Fail unless rev is a sha hash.
"""
sha = GIT.ResolveCommit(cwd, rev)
if sha is None:
return False
if sha_only:
return sha == rev.lower()
return True
@classmethod
def AssertVersion(cls, min_version):
"""Asserts git's version is at least min_version."""
if cls.current_version is None:
current_version = cls.Capture(['--version'], '.')
matched = re.search(r'git version (.+)', current_version)
cls.current_version = distutils.version.LooseVersion(matched.group(1))
min_version = distutils.version.LooseVersion(min_version)
return (min_version <= cls.current_version, cls.current_version)