From f2707a6d934df5a74bf1a4343e3a15dfeaf64f08 Mon Sep 17 00:00:00 2001 From: "dnj@chromium.org" Date: Mon, 7 Jul 2014 17:08:48 +0000 Subject: [PATCH] Consolidated 'git' refish parsing into a class Created the 'GitRefish' class to centralize 'git' refish parsing and consistent usage by 'gclient' 'git' code. BUG=373504 TEST=localtest R=agable@chromium.org, iannucci@chromium.org Review URL: https://codereview.chromium.org/328843005 git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@281553 0039d316-1c4b-4281-b951-d872f2087c98 --- gclient.py | 2 +- gclient_scm.py | 202 +++++++++++++++++++++++++++++++------- tests/gclient_scm_test.py | 140 +++++++++++++++++++++++++- tests/gclient_test.py | 2 +- 4 files changed, 307 insertions(+), 39 deletions(-) diff --git a/gclient.py b/gclient.py index a2d159a2c..275dad3b9 100755 --- a/gclient.py +++ b/gclient.py @@ -1112,7 +1112,7 @@ solutions = [ if dep.managed and dep.url: scm = gclient_scm.CreateSCM( dep.url, self.root_dir, dep.name, self.outbuf) - actual_url = scm.GetActualRemoteURL(self._options) + actual_url = scm.GetActualRemoteURL() if actual_url and not scm.DoesRemoteURLMatch(self._options): raise gclient_utils.Error(''' Your .gclient file seems to be broken. The requested URL is different from what diff --git a/gclient_scm.py b/gclient_scm.py index a87032e74..5947c99f9 100644 --- a/gclient_scm.py +++ b/gclient_scm.py @@ -6,6 +6,7 @@ from __future__ import print_function +import collections import errno import logging import os @@ -159,9 +160,8 @@ class SCMWrapper(object): return getattr(self, command)(options, args, file_list) - def GetActualRemoteURL(self, options): + def GetActualRemoteURL(self): """Attempt to determine the remote URL for this SCMWrapper.""" - # Git if os.path.exists(os.path.join(self.checkout_path, '.git')): actual_remote_url = shlex.split(scm.GIT.Capture( ['config', '--local', '--get-regexp', r'remote.*.url'], @@ -189,7 +189,7 @@ class SCMWrapper(object): # A checkout which doesn't exist can't be broken. return True - actual_remote_url = self.GetActualRemoteURL(options) + actual_remote_url = self.GetActualRemoteURL() if actual_remote_url: return (gclient_utils.SplitUrlRevision(actual_remote_url)[0].rstrip('/') == gclient_utils.SplitUrlRevision(self.url)[0].rstrip('/')) @@ -230,6 +230,139 @@ class SCMWrapper(object): shutil.move(self.checkout_path, dest_path) +_GitRefishBase = collections.namedtuple('GitRef', ( + # The initial string that the parsed Refish came from + 'source', + # Is this ref a branch? + 'is_branch', + # The name of this ref when accessed through the local repository + 'local_ref', + # The name of the remote that this refish resides on + 'remote', + # The path of this refish on the remote (e.g., 'master') + 'remote_ref', + # The remote-qualified path to this refish (e.g., 'origin/master') + 'remote_refspec', + # If a branch, the expected name of the upstream branch that it should + # track; otherwise, 'None' + 'upstream_branch', +)) + + +class GitRefish(_GitRefishBase): + # Disable 'no __init__' warning | pylint: disable=W0232 + """Implements refish parsing and properties. + + This class is designed to concentrate and codify the assumptions made by + 'gclient' code about refs and revisions. + """ + + _DEFAULT_REMOTE = 'origin' + + _GIT_SHA1_RE = re.compile('[0-9a-f]{40}', re.IGNORECASE) + _GIT_SHA1_SHORT_RE = re.compile('[0-9a-f]{4,40}', re.IGNORECASE) + + def __str__(self): + return self.source + + @classmethod + def Parse(cls, ref, remote=None, other_remotes=None): + """Parses a ref into a 'GitRefish' instance. + + Args: + ref: (str) The refish (branch, tag, hash, etc.) to parse + remote: (None/str) If supplied, the name of the primary remote. In + addtion to being recognized as a remote string, the primary remote + will also be used (as needed) when generating the remote refspec. If + omitted, the default remote name ('origin') will be used. + other_remotes: (None/iterable) If supplied, a list of strings to + recognize as remotes in addition to 'remote'. + """ + assert ref == ref.strip() + + # Use default set of remotes + remote = remote or cls._DEFAULT_REMOTE + remotes = {remote} + if other_remotes: + remotes.update(other_remotes) + + # Treat 'ref' as a '/'-delimited set of items; analyze their contents + ref_split = local_ref = remote_ref = tuple(ref.split('/')) + is_branch = True + if len(ref_split) > 1: + if ref_split[0] == 'refs': + if len(ref_split) >= 3 and ref_split[1] == 'heads': + # refs/heads/foo/bar + # + # Local Ref: foo/bar + # Remote Ref: foo/bar + local_ref = remote_ref = ref_split[2:] + elif len(ref_split) >= 4 and ref_split[1] == 'remotes': + # refs/remotes//foo/bar + # This is a bad assumption, and this logic can be improved, but it + # is consistent with traditional 'gclient' logic. + # + # Local Ref: refs/remotes//foo/bar + # Remote: + # Remote Ref: foo/bar + remote = ref_split[2] + remote_ref = ref_split[3:] + elif len(ref_split) >= 2 and ref_split[0] in remotes: + # origin/master + # + # Local Ref: refs/remotes/origin/master + # Remote Ref: master + remote = ref_split[0] + remote_ref = ref_split[1:] + local_ref = ('refs', 'remotes') + ref_split + + # (Default) The refish has multiple paths. Assume at least that it's a + # branch name. + # + # foo/bar + # refs/foo/bar + # + # Local Ref: foo/bar + # Remote ref: foo/bar + else: + # It could be a hash, a short-hash, or a tag + is_branch = False + + # Determine how the ref should be referenced remotely + if is_branch: + # foo/bar/baz => origin/foo/bar/baz + remote_refspec = (remote,) + remote_ref + else: + # Refer to the hash/tag directly. This is actually not allowed in + # 'fetch', although it oftentimes works regardless. + # + # => + remote_refspec = (ref,) + + # Calculate the upstream branch + if is_branch: + # Convert '/refs/heads/...' to 'refs/remotes/REMOTE/...' + if ref_split[:2] == ('refs', 'heads'): + upstream_branch = ('refs', 'remotes', remote) + ref_split[2:] + else: + upstream_branch = ref_split + else: + upstream_branch = None + + def compose(ref_tuple): + return '/'.join(ref_tuple) if ref_tuple else ref_tuple + + return cls( + source=ref, + is_branch=is_branch, + local_ref=compose(local_ref), + remote=remote, + remote_ref=compose(remote_ref), + remote_refspec=compose(remote_refspec), + upstream_branch=compose(upstream_branch), + ) + + class GitWrapper(SCMWrapper): """Wrapper for Git""" name = 'git' @@ -259,6 +392,10 @@ class GitWrapper(SCMWrapper): except OSError: return False + def ParseRefish(self, value, **kwargs): + kwargs.setdefault('remote', self.remote) + return GitRefish.Parse(value, **kwargs) + def GetCheckoutRoot(self): return scm.GIT.GetCheckoutRoot(self.checkout_path) @@ -293,7 +430,7 @@ class GitWrapper(SCMWrapper): cwd=self.checkout_path, filter_fn=GitDiffFilterer(self.relpath).Filter, print_func=self.Print) - def _FetchAndReset(self, revision, file_list, options): + def _FetchAndReset(self, refish, file_list, options): """Equivalent to git fetch; git reset.""" quiet = [] if not options.verbose: @@ -301,7 +438,7 @@ class GitWrapper(SCMWrapper): self._UpdateBranchHeads(options, fetch=False) self._Fetch(options, prune=True, quiet=options.verbose) - self._Run(['reset', '--hard', revision] + quiet, options) + self._Run(['reset', '--hard', refish.local_ref] + quiet, options) if file_list is not None: files = self._Capture(['ls-files']).splitlines() file_list.extend([os.path.join(self.checkout_path, f) for f in files]) @@ -329,7 +466,7 @@ class GitWrapper(SCMWrapper): self._CheckMinVersion("1.6.6") # If a dependency is not pinned, track the default remote branch. - default_rev = 'refs/remotes/%s/master' % self.remote + default_rev = 'refs/remotes/%s/master' % (self.remote,) url, deps_revision = gclient_utils.SplitUrlRevision(self.url) rev_str = "" revision = deps_revision @@ -366,16 +503,7 @@ class GitWrapper(SCMWrapper): verbose = ['--verbose'] printed_path = True - if revision.startswith('refs/'): - rev_type = "branch" - elif revision.startswith(self.remote + '/'): - # Rewrite remote refs to their local equivalents. - revision = 'refs/remotes/' + revision - rev_type = "branch" - else: - # hash is also a tag, only make a distinction at checkout - rev_type = "hash" - + refish = self.ParseRefish(revision) mirror = self._GetMirror(url, options) if mirror: url = mirror.mirror_path @@ -386,10 +514,10 @@ class GitWrapper(SCMWrapper): if mirror: self._UpdateMirror(mirror, options) try: - self._Clone(revision, url, options) + self._Clone(refish.local_ref, url, options) except subprocess2.CalledProcessError: self._DeleteOrMove(options.force) - self._Clone(revision, url, options) + self._Clone(refish.local_ref, url, options) if deps_revision and deps_revision.startswith('branch-heads/'): deps_branch = deps_revision.replace('branch-heads/', '') self._Capture(['branch', deps_branch, deps_revision]) @@ -430,7 +558,7 @@ class GitWrapper(SCMWrapper): self._CheckClean(rev_str) # Switch over to the new upstream self._Run(['remote', 'set-url', self.remote, url], options) - self._FetchAndReset(revision, file_list, options) + self._FetchAndReset(refish, file_list, options) return_early = True if return_early: @@ -473,7 +601,8 @@ class GitWrapper(SCMWrapper): else: raise gclient_utils.Error('Invalid Upstream: %s' % upstream_branch) - if not scm.GIT.IsValidRevision(self.checkout_path, revision, sha_only=True): + if not scm.GIT.IsValidRevision(self.checkout_path, refish.local_ref, + sha_only=True): # Update the remotes first so we have all the refs. remote_output = scm.GIT.Capture(['remote'] + verbose + ['update'], cwd=self.checkout_path) @@ -493,7 +622,7 @@ class GitWrapper(SCMWrapper): # case 0 self._CheckClean(rev_str) self._CheckDetachedHead(rev_str, options) - if self._Capture(['rev-list', '-n', '1', 'HEAD']) == revision: + if self._Capture(['rev-list', '-n', '1', 'HEAD']) == refish.local_ref: self.Print('Up-to-date; skipping checkout.') else: # 'git checkout' may need to overwrite existing untracked files. Allow @@ -511,8 +640,8 @@ class GitWrapper(SCMWrapper): if scm.GIT.IsGitSvn(self.checkout_path) and upstream_branch is not None: # Our git-svn branch (upstream_branch) is our upstream self._AttemptRebase(upstream_branch, files, options, - newbase=revision, printed_path=printed_path, - merge=options.merge) + newbase=refish.local_ref, + printed_path=printed_path, merge=options.merge) printed_path = True else: # Can't find a merge-base since we don't know our upstream. That makes @@ -520,19 +649,19 @@ class GitWrapper(SCMWrapper): # assume origin is our upstream since that's what the old behavior was. upstream_branch = self.remote if options.revision or deps_revision: - upstream_branch = revision + upstream_branch = refish.local_ref self._AttemptRebase(upstream_branch, files, options, printed_path=printed_path, merge=options.merge) printed_path = True - elif rev_type == 'hash': + elif not refish.is_branch: # case 2 self._AttemptRebase(upstream_branch, files, options, - newbase=revision, printed_path=printed_path, + newbase=refish.local_ref, printed_path=printed_path, merge=options.merge) printed_path = True - elif revision.replace('heads', 'remotes/' + self.remote) != upstream_branch: + elif refish.upstream_branch != upstream_branch: # case 4 - new_base = revision.replace('heads', 'remotes/' + self.remote) + new_base = refish.upstream_branch if not printed_path: self.Print('_____ %s%s' % (self.relpath, rev_str), timestamp=False) switch_error = ("Switching upstream branch from %s to %s\n" @@ -663,9 +792,8 @@ class GitWrapper(SCMWrapper): _, deps_revision = gclient_utils.SplitUrlRevision(self.url) if not deps_revision: deps_revision = default_rev - if deps_revision.startswith('refs/heads/'): - deps_revision = deps_revision.replace('refs/heads/', self.remote + '/') - deps_revision = self.GetUsableRev(deps_revision, options) + refish = self.ParseRefish(deps_revision) + deps_revision = self.GetUsableRev(refish.remote_refspec, options) if file_list is not None: files = self._Capture(['diff', deps_revision, '--name-only']).split() @@ -842,14 +970,16 @@ class GitWrapper(SCMWrapper): self.Print('_____ removing non-empty tmp dir %s' % tmp_dir) gclient_utils.rmtree(tmp_dir) self._UpdateBranchHeads(options, fetch=True) - self._Checkout(options, revision.replace('refs/heads/', ''), quiet=True) + + refish = self.ParseRefish(revision) + self._Checkout(options, refish.local_ref, quiet=True) if self._GetCurrentBranch() is None: # Squelch git's very verbose detached HEAD warning and use our own self.Print( - ('Checked out %s to a detached HEAD. Before making any commits\n' - 'in this repo, you should use \'git checkout \' to switch to\n' - 'an existing branch or use \'git checkout %s -b \' to\n' - 'create a new branch for your work.') % (revision, self.remote)) + "Checked out %s to a detached HEAD. Before making any commits\n" + "in this repo, you should use 'git checkout ' to switch to\n" + "an existing branch or use 'git checkout %s -b ' to\n" + "create a new branch for your work." % (revision, self.remote)) def _AskForData(self, prompt, options): if options.jobs > 1: diff --git a/tests/gclient_scm_test.py b/tests/gclient_scm_test.py index e33ee0232..fca43b8af 100755 --- a/tests/gclient_scm_test.py +++ b/tests/gclient_scm_test.py @@ -1278,7 +1278,6 @@ class ManagedGitWrapperTestCaseMox(BaseTestCase): cwd=self.base_path).AndRaise(error) gclient_scm.GitWrapper._Fetch(options) gclient_scm.scm.GIT.Capture(['svn', 'fetch'], cwd=self.base_path) - gclient_scm.GitWrapper._Fetch(options) self.mox.StubOutWithMock(gclient_scm.scm.GIT, 'IsGitSvn', True) gclient_scm.scm.GIT.IsGitSvn(cwd=self.base_path).MultipleTimes( @@ -1289,6 +1288,8 @@ class ManagedGitWrapperTestCaseMox(BaseTestCase): ).AndReturn(True) gclient_scm.scm.GIT.IsValidRevision(cwd=self.base_path, rev=too_big ).MultipleTimes(2).AndReturn(False) + # pylint: disable=E1120 + gclient_scm.GitWrapper._Fetch(options) gclient_scm.os.path.isdir(self.base_path).AndReturn(False) gclient_scm.os.path.isdir(self.base_path).MultipleTimes().AndReturn(True) @@ -1580,6 +1581,143 @@ class UnmanagedGitWrapperTestCase(BaseGitWrapperTestCase): self.checkstdout('________ unmanaged solution; skipping .\n') +class GitRefishTestCase(unittest.TestCase): + + @staticmethod + def parse(revision, **kwargs): + kwargs.setdefault('remote', 'origin') + kwargs.setdefault('other_remotes', ('server', 'backup')) + return gclient_scm.GitRefish.Parse(revision, **kwargs) + + def testParse(self): + LONG_HASH = '0c745b5ff533cf50a8731e168908644a9d9be4cf' + SHORT_HASH = '0c745b5' + TARGETS = ( + ( + 'refs/heads/master', + gclient_scm.GitRefish( + source='refs/heads/master', + is_branch=True, + local_ref='master', + remote='origin', + remote_ref='master', + remote_refspec='origin/master', + upstream_branch='refs/remotes/origin/master', + ), + ), + + ( + 'refs/special/magic', + gclient_scm.GitRefish( + source='refs/special/magic', + is_branch=True, + local_ref='refs/special/magic', + remote='origin', + remote_ref='refs/special/magic', + remote_refspec='origin/refs/special/magic', + upstream_branch='refs/special/magic', + ) + ), + + ( + 'origin/foo/bar', + gclient_scm.GitRefish( + source='origin/foo/bar', + is_branch=True, + local_ref='refs/remotes/origin/foo/bar', + remote='origin', + remote_ref='foo/bar', + remote_refspec='origin/foo/bar', + upstream_branch='origin/foo/bar', + ) + ), + + ( + 'server/foo/bar', + gclient_scm.GitRefish( + source='server/foo/bar', + is_branch=True, + local_ref='refs/remotes/server/foo/bar', + remote='server', + remote_ref='foo/bar', + remote_refspec='server/foo/bar', + upstream_branch='server/foo/bar', + ), + ), + + ( + 'refs/remotes/foo/bar/baz', + gclient_scm.GitRefish( + source='refs/remotes/foo/bar/baz', + is_branch=True, + local_ref='refs/remotes/foo/bar/baz', + remote='foo', + remote_ref='bar/baz', + remote_refspec='foo/bar/baz', + upstream_branch='refs/remotes/foo/bar/baz', + ) + ), + + ( + 'foo/bar', + gclient_scm.GitRefish( + source='foo/bar', + is_branch=True, + local_ref='foo/bar', + remote='origin', + remote_ref='foo/bar', + remote_refspec='origin/foo/bar', + upstream_branch='foo/bar', + ), + ), + + ( + LONG_HASH, + gclient_scm.GitRefish( + source=LONG_HASH, + is_branch=False, + local_ref=LONG_HASH, + remote='origin', + remote_ref=LONG_HASH, + remote_refspec=LONG_HASH, + upstream_branch=None, + ), + ), + + # Short hash (consider it a hash) + ( + SHORT_HASH, + gclient_scm.GitRefish( + source=SHORT_HASH, + is_branch=False, + local_ref=SHORT_HASH, + remote='origin', + remote_ref=SHORT_HASH, + remote_refspec=SHORT_HASH, + upstream_branch=None, + ), + ), + + # Unqualified branches are currently parsed as hash/tag + ( + 'master', + gclient_scm.GitRefish( + source='master', + is_branch=False, + local_ref='master', + remote='origin', + remote_ref='master', + remote_refspec='master', + upstream_branch=None, + ) + ), + ) + + for value, refish in TARGETS: + parsed_refish = self.parse(value) + self.assertEqual(parsed_refish, refish) + + if __name__ == '__main__': level = logging.DEBUG if '-v' in sys.argv else logging.FATAL logging.basicConfig( diff --git a/tests/gclient_test.py b/tests/gclient_test.py index aff9174e1..87b94e659 100755 --- a/tests/gclient_test.py +++ b/tests/gclient_test.py @@ -48,7 +48,7 @@ class SCMMock(object): def DoesRemoteURLMatch(self, _): return True - def GetActualRemoteURL(self, _): + def GetActualRemoteURL(self): return self.url