diff --git a/gclient.py b/gclient.py index 16347a70d..d39d5824b 100755 --- a/gclient.py +++ b/gclient.py @@ -295,7 +295,19 @@ class DependencySettings(object): # These are not mutable: self._parent = parent self._deps_file = deps_file - self._url = url + + # Post process the url to remove trailing slashes. + if isinstance(url, str): + # urls are sometime incorrectly written as proto://host/path/@rev. + # Replace it to proto://host/path@rev. + self._url = url.replace('/@', '@') + elif isinstance(url, (None.__class__)): + self._url = url + else: + raise gclient_utils.Error( + ('dependency url must be either string or None, ' + 'instead of %s') % url.__class__.__name__) + # The condition as string (or None). Useful to keep e.g. for flatten. self._condition = condition # 'managed' determines whether or not this dependency is synced/updated @@ -321,16 +333,6 @@ class DependencySettings(object): self._custom_deps = custom_deps or {} self._custom_hooks = custom_hooks or [] - # Post process the url to remove trailing slashes. - if isinstance(self.url, str): - # urls are sometime incorrectly written as proto://host/path/@rev. - # Replace it to proto://host/path@rev. - self.set_url(self.url.replace('/@', '@')) - elif not isinstance(self.url, (None.__class__)): - raise gclient_utils.Error( - ('dependency url must be either string or None, ' - 'instead of %s') % self.url.__class__.__name__) - # Make any deps_file path platform-appropriate. if self._deps_file: for sep in ['/', '\\']: @@ -485,6 +487,9 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): # custom_deps, if any. self._should_sync = True + self._known_dependency_diff = None + self._dependency_index_state = None + self._OverrideUrl() # This is inherited from WorkItem. We want the URL to be a resource. if self.url and isinstance(self.url, str): @@ -588,6 +593,14 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): ]) return s + @property + def known_dependency_diff(self): + return self._known_dependency_diff + + @property + def dependency_index_state(self): + return self._dependency_index_state + @property def requirements(self): """Calculate the list of requirements.""" @@ -931,6 +944,12 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): if self.git_dependencies_state == gclient_eval.SUBMODULES: deps.update(self.ParseGitSubmodules()) + if self.git_dependencies_state != gclient_eval.DEPS: + # Git submodules are used - get their state. + self._known_dependency_diff = self.CreateSCM().GetSubmoduleDiff() + self._dependency_index_state = self.CreateSCM( + ).GetSubmoduleStateFromIndex() + deps_to_add = self._deps_to_objects( self._postprocess_deps(deps, rel_prefix), self._use_relative_paths) @@ -1158,6 +1177,20 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): try: start = time.time() sync_status = metrics_utils.SYNC_STATUS_FAILURE + if self.parent and self.parent.known_dependency_diff is not None: + if self._use_relative_paths: + path = self.name + else: + path = self.name[len(self.parent.name) + 1:] + current_revision = None + if path in self.parent.dependency_index_state: + current_revision = self.parent.dependency_index_state[ + path] + if path in self.parent.known_dependency_diff: + current_revision = self.parent.known_dependency_diff[ + path][1] + self._used_scm.current_revision = current_revision + self._got_revision = self._used_scm.RunCommand( command, options, args, file_list) latest_commit = self._got_revision @@ -2122,6 +2155,21 @@ it or fix the checkout. removed_cipd_entries = [] read_entries = self._ReadEntries() + # Add known dependency state + queue = list(self.dependencies) + while len(queue) > 0: + dep = queue.pop() + queue.extend(dep.dependencies) + if not dep._known_dependency_diff: + continue + + for k, v in dep._known_dependency_diff.items(): + path = f'{dep.name}/{k}' + if path in read_entries: + continue + read_entries[path] = f'https://unknown@{v[1]}' + + # We process entries sorted in reverse to ensure a child dir is # always deleted before its parent dir. # This is especially important for submodules with pinned revisions diff --git a/gclient_scm.py b/gclient_scm.py index 635590619..684abf19a 100644 --- a/gclient_scm.py +++ b/gclient_scm.py @@ -229,6 +229,7 @@ class GitWrapper(SCMWrapper): filter_kwargs['predicate'] = self.out_cb self.filter = gclient_utils.GitFilter(**filter_kwargs) self._running_under_rosetta = None + self.current_revision = None def GetCheckoutRoot(self): return scm.GIT.GetCheckoutRoot(self.checkout_path) @@ -250,6 +251,69 @@ class GitWrapper(SCMWrapper): ['-c', 'core.quotePath=false', 'diff', '--name-only', base]) )).split() + def GetSubmoduleStateFromIndex(self): + """Returns a map where keys are submodule names and values are commit + hashes. It reads data from the Git index, so only committed values are + present.""" + out = self._Capture(['ls-files', '-s']) + result = {} + for l in out.split('\n'): + if not l.startswith('160000'): + # Not a submodule + continue + (_, commit, _, filepath) = l.split(maxsplit=3) + result[filepath] = commit + return result + + def GetSubmoduleDiff(self): + """Returns a map where keys are submodule names and values are tuples of + (old_commit_hash, new_commit_hash). old_commit_hash matches the Git + index, whereas new_commit_hash matches currently checked out commit + hash.""" + out = self._Capture([ + 'diff', '--no-prefix', '--no-ext-diff', '--no-color', + '--ignore-submodules=dirty', '--submodule=short' + ]) + NO_COMMIT = 40 * '0' + committed_submodule = None + checked_submodule = None + filepath = None + state = 0 + diff = {} + # Parsing git diff uses simple state machine. States: + # 0 - start state + # 1 - diff file/line detected, ready to process content + # 2 - gitlink detected, ready to process gitlink past and current + # content. + # 3 - past gitlink content detected. It contains a commit hash that's in + # git index. + # 4 - new gitlink content detected. It contains currently checked + # commit. At this point, we have all information needed, and we can + # reset state to 0. + for l in out.split('\n'): + if l.startswith('diff --git'): + # New file detected, reset state. + state = 1 + elif state == 1 and l.startswith('index') and l.endswith('160000'): + # We detected gitlink + state = 2 + elif state == 2 and l.startswith('+++ '): + # This line contains filename + filepath = l[4:] + state = 3 + elif state == 3 and l.startswith('-Subproject commit '): + # This line contains what commit hash Git index expects + # (ls-files). + committed_submodule = l.split(' ')[-1] + state = 4 + elif state == 4 and l.startswith('+Subproject commit '): + # This line contains currently checked out commit for this submodule. + checked_submodule = l.split(' ')[-1] + if NO_COMMIT not in (committed_submodule, checked_submodule): + diff[filepath] = (committed_submodule, checked_submodule) + state = 0 + return diff + def diff(self, options, _args, _file_list): _, revision = gclient_utils.SplitUrlRevision(self.url) if not revision: @@ -638,7 +702,6 @@ class GitWrapper(SCMWrapper): raise gclient_utils.Error("Unsupported argument(s): %s" % ",".join(args)) - current_revision = None url, deps_revision = gclient_utils.SplitUrlRevision(self.url) revision = deps_revision managed = True @@ -714,11 +777,11 @@ class GitWrapper(SCMWrapper): self._UpdateMirrorIfNotContains(mirror, options, rev_type, revision) try: - current_revision = self._Clone(revision, url, options) + self.current_revision = self._Clone(revision, url, options) except subprocess2.CalledProcessError as e: logging.warning('Clone failed due to: %s', e) self._DeleteOrMove(options.force) - current_revision = self._Clone(revision, url, options) + self.current_revision = self._Clone(revision, url, options) if file_list is not None: files = self._Capture( ['-c', 'core.quotePath=false', 'ls-files']).splitlines() @@ -742,6 +805,17 @@ class GitWrapper(SCMWrapper): self.relpath) return self._Capture(['rev-parse', '--verify', 'HEAD']) + # Special case for rev_type = hash. If we use submodules, we can check + # information already. + if rev_type == 'hash': + if self.current_revision == revision: + if verbose: + self.Print('Using submodule information to skip check') + if options.reset or options.force: + self._Scrub('HEAD', options) + + return revision + self._maybe_break_locks(options) if mirror: diff --git a/tests/gclient_scm_test.py b/tests/gclient_scm_test.py index 5cee7364d..0eea289fa 100755 --- a/tests/gclient_scm_test.py +++ b/tests/gclient_scm_test.py @@ -139,6 +139,7 @@ Personalized from :3 M 100644 :4 a M 100644 :5 b +M 160000 1111111111111111111111111111111111111111 submodule blob mark :7 @@ -267,7 +268,7 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase): scm.revert(options, self.args, file_list) self.assertEqual(file_list, []) self.assertEqual(scm.revinfo(options, self.args, None), - 'a7142dc9f0009350b96a11f372b6ea658592aa95') + '4091c7d010ca99d0f2dd416d4b70b758ae432187') sys.stdout.close() def testRevertModified(self): @@ -287,7 +288,7 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase): scm.diff(options, self.args, file_list) self.assertEqual(file_list, []) self.assertEqual(scm.revinfo(options, self.args, None), - 'a7142dc9f0009350b96a11f372b6ea658592aa95') + '4091c7d010ca99d0f2dd416d4b70b758ae432187') sys.stdout.close() def testRevertNew(self): @@ -309,7 +310,7 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase): scm.diff(options, self.args, file_list) self.assertEqual(file_list, []) self.assertEqual(scm.revinfo(options, self.args, None), - 'a7142dc9f0009350b96a11f372b6ea658592aa95') + '4091c7d010ca99d0f2dd416d4b70b758ae432187') sys.stdout.close() def testStatusRef(self): @@ -389,14 +390,16 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase): if not self.enabled: return options = self.Options() - expected_file_list = [join(self.base_path, x) for x in ['a', 'b']] + expected_file_list = [ + join(self.base_path, x) for x in ['a', 'b', 'submodule'] + ] scm = gclient_scm.GitWrapper(self.url, self.root_dir, self.relpath) file_list = [] scm.update(options, (), file_list) self.assertEqual(file_list, expected_file_list) self.assertEqual(scm.revinfo(options, (), None), - 'a7142dc9f0009350b96a11f372b6ea658592aa95') + '4091c7d010ca99d0f2dd416d4b70b758ae432187') self.assertEqual( scm._Capture(['config', '--get', 'diff.ignoreSubmodules']), 'dirty') self.assertEqual( @@ -418,12 +421,13 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase): rev = scm.revinfo(options, (), None) file_list = [] scm.update(options, (), file_list) - self.assertEqual(file_list, - [join(self.base_path, x) for x in ['a', 'b', 'c']]) + self.assertEqual( + file_list, + [join(self.base_path, x) for x in ['a', 'b', 'c', 'submodule']]) # The actual commit that is created is unstable, so we verify its tree # and parents instead. self.assertEqual(scm._Capture(['rev-parse', 'HEAD:']), - 'd2e35c10ac24d6c621e14a1fcadceb533155627d') + '3a3ba72731fa208d37b06598a129ba93970325df') parent = 'HEAD^' if sys.platform != 'win32' else 'HEAD^^' self.assertEqual(scm._Capture(['rev-parse', parent + '1']), rev) self.assertEqual(scm._Capture(['rev-parse', parent + '2']), @@ -445,12 +449,13 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase): '(y)es / (q)uit / (s)kip : ', 'y') file_list = [] scm.update(options, (), file_list) - self.assertEqual(file_list, - [join(self.base_path, x) for x in ['a', 'b', 'c']]) + self.assertEqual( + file_list, + [join(self.base_path, x) for x in ['a', 'b', 'c', 'submodule']]) # The actual commit that is created is unstable, so we verify its tree # and parent instead. self.assertEqual(scm._Capture(['rev-parse', 'HEAD:']), - 'd2e35c10ac24d6c621e14a1fcadceb533155627d') + '3a3ba72731fa208d37b06598a129ba93970325df') parent = 'HEAD^' if sys.platform != 'win32' else 'HEAD^^' self.assertEqual(scm._Capture(['rev-parse', parent + '1']), scm._Capture(['rev-parse', 'origin/main'])) @@ -801,14 +806,15 @@ class UnmanagedGitWrapperTestCase(BaseGitWrapperTestCase): self.relpath = '.' self.base_path = join(self.root_dir, self.relpath) url_with_commit_ref = origin_root_dir +\ - '@a7142dc9f0009350b96a11f372b6ea658592aa95' + '@4091c7d010ca99d0f2dd416d4b70b758ae432187' scm = gclient_scm.GitWrapper(url_with_commit_ref, self.root_dir, self.relpath) expected_file_list = [ join(self.base_path, "a"), - join(self.base_path, "b") + join(self.base_path, "b"), + join(self.base_path, "submodule"), ] file_list = [] options.revision = 'unmanaged' @@ -816,11 +822,11 @@ class UnmanagedGitWrapperTestCase(BaseGitWrapperTestCase): self.assertEqual(file_list, expected_file_list) self.assertEqual(scm.revinfo(options, (), None), - 'a7142dc9f0009350b96a11f372b6ea658592aa95') + '4091c7d010ca99d0f2dd416d4b70b758ae432187') # indicates detached HEAD self.assertEqual(self.getCurrentBranch(), None) self.checkInStdout( - 'Checked out a7142dc9f0009350b96a11f372b6ea658592aa95 to a detached HEAD' + 'Checked out 4091c7d010ca99d0f2dd416d4b70b758ae432187 to a detached HEAD' ) def testUpdateCloneOnBranch(self): @@ -1602,6 +1608,57 @@ class CheckDiffTest(fake_repos.FakeReposTestBase): self.assertFalse(scm.check_diff(self.githash('repo_1', 2))) +class Submodules(BaseGitWrapperTestCase): + submodule_hash = '1111111111111111111111111111111111111111' + + def testGetSubmoduleClean(self): + scm = gclient_scm.GitWrapper(self.url, self.root_dir, self.relpath) + options = self.Options() + scm.update(options, None, []) + self.assertEqual(scm.GetSubmoduleStateFromIndex(), + {'submodule': self.submodule_hash}) + self.assertEqual(scm.GetSubmoduleDiff(), {}) + + def testGetSubmoduleModified(self): + scm = gclient_scm.GitWrapper(self.url, self.root_dir, self.relpath) + options = self.Options() + scm.update(options, None, []) + + # Create submodule diff + submodule_dir = os.path.join(self.root_dir, 'submodule') + subprocess2.check_output(['git', '-C', submodule_dir, 'init']) + subprocess2.check_output([ + 'git', '-C', submodule_dir, 'commit', '-m', 'foo', '--allow-empty' + ]) + new_rev = subprocess2.check_output( + ['git', '-C', submodule_dir, 'rev-parse', + 'HEAD']).decode('utf-8').strip() + + # And file diff + with open(os.path.join(self.root_dir, 'a'), 'w') as f: + f.write('foo') + + self.assertEqual(scm.GetSubmoduleStateFromIndex(), + {'submodule': self.submodule_hash}) + + self.assertEqual(scm.GetSubmoduleDiff(), + {'submodule': (self.submodule_hash, new_rev)}) + + def testGetSubmoduleDeleted(self): + scm = gclient_scm.GitWrapper(self.url, self.root_dir, self.relpath) + options = self.Options() + scm.update(options, None, []) + subprocess2.check_output( + ['git', '-C', self.root_dir, 'rm', 'submodule']) + + # When git removes submodule, it's autmatically staged and content is + # unavailable. Therefore, the index shouldn't have any entries and diff + # should be empty. + self.assertEqual(scm.GetSubmoduleStateFromIndex(), {}) + + self.assertEqual(scm.GetSubmoduleDiff(), {}) + + if 'unittest.util' in __import__('sys').modules: # Show full diff in self.assertEqual. __import__('sys').modules['unittest.util']._MAX_LENGTH = 999999999