Reland "[gclient] Read submodule status information"

This reverts commit c0366630f7.

Original change's description:
> Revert "[gclient] Read submodule status information"
>
> This reverts commit 39501ba8c9.
>
> Reason for revert: broke submodule update builder: gclient revinfo
>
>
>
> Original change's description:
> > [gclient] Read submodule status information
> >
> > This allow us to skip sync if we know the state is correct.
> >
> > R=gavinmak@google.com
> >
> > Bug: 40283612, 40942309
> > Change-Id: I30fd5bfb9ca8ab0f7dcce567e2a5cb4aebdc7b2f
> > Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5480172
> > Commit-Queue: Josip Sokcevic <sokcevic@chromium.org>
> > Reviewed-by: Gavin Mak <gavinmak@google.com>
>
> Bug: 40283612, 40942309
> Change-Id: Iaf478c65eb6f18b19afc3fdda2996190bdc58613
> No-Presubmit: true
> No-Tree-Checks: true
> No-Try: true
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5506653
> Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
> Auto-Submit: Josip Sokcevic <sokcevic@chromium.org>
> Commit-Queue: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>

Bug: 40283612, 40942309
Change-Id: I707b0eb1d3850c415e474df8cd43f561054017cb
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5506833
Commit-Queue: Josip Sokcevic <sokcevic@chromium.org>
Reviewed-by: Gavin Mak <gavinmak@google.com>
changes/33/5506833/2
Josip Sokcevic 10 months ago committed by LUCI CQ
parent 7ababdfe02
commit 452cb7b6cb

@ -295,7 +295,19 @@ class DependencySettings(object):
# These are not mutable:
self._parent = parent
self._deps_file = deps_file
self._url = url
# Post process the url to remove trailing slashes.
if isinstance(url, str):
# urls are sometime incorrectly written as proto://host/path/@rev.
# Replace it to proto://host/path@rev.
self._url = url.replace('/@', '@')
elif isinstance(url, (None.__class__)):
self._url = url
else:
raise gclient_utils.Error(
('dependency url must be either string or None, '
'instead of %s') % url.__class__.__name__)
# The condition as string (or None). Useful to keep e.g. for flatten.
self._condition = condition
# 'managed' determines whether or not this dependency is synced/updated
@ -321,16 +333,6 @@ class DependencySettings(object):
self._custom_deps = custom_deps or {}
self._custom_hooks = custom_hooks or []
# Post process the url to remove trailing slashes.
if isinstance(self.url, str):
# urls are sometime incorrectly written as proto://host/path/@rev.
# Replace it to proto://host/path@rev.
self.set_url(self.url.replace('/@', '@'))
elif not isinstance(self.url, (None.__class__)):
raise gclient_utils.Error(
('dependency url must be either string or None, '
'instead of %s') % self.url.__class__.__name__)
# Make any deps_file path platform-appropriate.
if self._deps_file:
for sep in ['/', '\\']:
@ -485,6 +487,9 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
# custom_deps, if any.
self._should_sync = True
self._known_dependency_diff = None
self._dependency_index_state = None
self._OverrideUrl()
# This is inherited from WorkItem. We want the URL to be a resource.
if self.url and isinstance(self.url, str):
@ -588,6 +593,14 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
])
return s
@property
def known_dependency_diff(self):
return self._known_dependency_diff
@property
def dependency_index_state(self):
return self._dependency_index_state
@property
def requirements(self):
"""Calculate the list of requirements."""
@ -931,6 +944,12 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
if self.git_dependencies_state == gclient_eval.SUBMODULES:
deps.update(self.ParseGitSubmodules())
if self.git_dependencies_state != gclient_eval.DEPS:
# Git submodules are used - get their state.
self._known_dependency_diff = self.CreateSCM().GetSubmoduleDiff()
self._dependency_index_state = self.CreateSCM(
).GetSubmoduleStateFromIndex()
deps_to_add = self._deps_to_objects(
self._postprocess_deps(deps, rel_prefix), self._use_relative_paths)
@ -1158,6 +1177,20 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
try:
start = time.time()
sync_status = metrics_utils.SYNC_STATUS_FAILURE
if self.parent and self.parent.known_dependency_diff is not None:
if self._use_relative_paths:
path = self.name
else:
path = self.name[len(self.parent.name) + 1:]
current_revision = None
if path in self.parent.dependency_index_state:
current_revision = self.parent.dependency_index_state[
path]
if path in self.parent.known_dependency_diff:
current_revision = self.parent.known_dependency_diff[
path][1]
self._used_scm.current_revision = current_revision
self._got_revision = self._used_scm.RunCommand(
command, options, args, file_list)
latest_commit = self._got_revision
@ -2122,6 +2155,21 @@ it or fix the checkout.
removed_cipd_entries = []
read_entries = self._ReadEntries()
# Add known dependency state
queue = list(self.dependencies)
while len(queue) > 0:
dep = queue.pop()
queue.extend(dep.dependencies)
if not dep._known_dependency_diff:
continue
for k, v in dep._known_dependency_diff.items():
path = f'{dep.name}/{k}'
if path in read_entries:
continue
read_entries[path] = f'https://unknown@{v[1]}'
# We process entries sorted in reverse to ensure a child dir is
# always deleted before its parent dir.
# This is especially important for submodules with pinned revisions

@ -229,6 +229,7 @@ class GitWrapper(SCMWrapper):
filter_kwargs['predicate'] = self.out_cb
self.filter = gclient_utils.GitFilter(**filter_kwargs)
self._running_under_rosetta = None
self.current_revision = None
def GetCheckoutRoot(self):
return scm.GIT.GetCheckoutRoot(self.checkout_path)
@ -250,6 +251,69 @@ class GitWrapper(SCMWrapper):
['-c', 'core.quotePath=false', 'diff', '--name-only', base])
)).split()
def GetSubmoduleStateFromIndex(self):
"""Returns a map where keys are submodule names and values are commit
hashes. It reads data from the Git index, so only committed values are
present."""
out = self._Capture(['ls-files', '-s'])
result = {}
for l in out.split('\n'):
if not l.startswith('160000'):
# Not a submodule
continue
(_, commit, _, filepath) = l.split(maxsplit=3)
result[filepath] = commit
return result
def GetSubmoduleDiff(self):
"""Returns a map where keys are submodule names and values are tuples of
(old_commit_hash, new_commit_hash). old_commit_hash matches the Git
index, whereas new_commit_hash matches currently checked out commit
hash."""
out = self._Capture([
'diff', '--no-prefix', '--no-ext-diff', '--no-color',
'--ignore-submodules=dirty', '--submodule=short'
])
NO_COMMIT = 40 * '0'
committed_submodule = None
checked_submodule = None
filepath = None
state = 0
diff = {}
# Parsing git diff uses simple state machine. States:
# 0 - start state
# 1 - diff file/line detected, ready to process content
# 2 - gitlink detected, ready to process gitlink past and current
# content.
# 3 - past gitlink content detected. It contains a commit hash that's in
# git index.
# 4 - new gitlink content detected. It contains currently checked
# commit. At this point, we have all information needed, and we can
# reset state to 0.
for l in out.split('\n'):
if l.startswith('diff --git'):
# New file detected, reset state.
state = 1
elif state == 1 and l.startswith('index') and l.endswith('160000'):
# We detected gitlink
state = 2
elif state == 2 and l.startswith('+++ '):
# This line contains filename
filepath = l[4:]
state = 3
elif state == 3 and l.startswith('-Subproject commit '):
# This line contains what commit hash Git index expects
# (ls-files).
committed_submodule = l.split(' ')[-1]
state = 4
elif state == 4 and l.startswith('+Subproject commit '):
# This line contains currently checked out commit for this submodule.
checked_submodule = l.split(' ')[-1]
if NO_COMMIT not in (committed_submodule, checked_submodule):
diff[filepath] = (committed_submodule, checked_submodule)
state = 0
return diff
def diff(self, options, _args, _file_list):
_, revision = gclient_utils.SplitUrlRevision(self.url)
if not revision:
@ -638,7 +702,6 @@ class GitWrapper(SCMWrapper):
raise gclient_utils.Error("Unsupported argument(s): %s" %
",".join(args))
current_revision = None
url, deps_revision = gclient_utils.SplitUrlRevision(self.url)
revision = deps_revision
managed = True
@ -714,11 +777,11 @@ class GitWrapper(SCMWrapper):
self._UpdateMirrorIfNotContains(mirror, options, rev_type,
revision)
try:
current_revision = self._Clone(revision, url, options)
self.current_revision = self._Clone(revision, url, options)
except subprocess2.CalledProcessError as e:
logging.warning('Clone failed due to: %s', e)
self._DeleteOrMove(options.force)
current_revision = self._Clone(revision, url, options)
self.current_revision = self._Clone(revision, url, options)
if file_list is not None:
files = self._Capture(
['-c', 'core.quotePath=false', 'ls-files']).splitlines()
@ -742,6 +805,17 @@ class GitWrapper(SCMWrapper):
self.relpath)
return self._Capture(['rev-parse', '--verify', 'HEAD'])
# Special case for rev_type = hash. If we use submodules, we can check
# information already.
if rev_type == 'hash':
if self.current_revision == revision:
if verbose:
self.Print('Using submodule information to skip check')
if options.reset or options.force:
self._Scrub('HEAD', options)
return revision
self._maybe_break_locks(options)
if mirror:

@ -139,6 +139,7 @@ Personalized
from :3
M 100644 :4 a
M 100644 :5 b
M 160000 1111111111111111111111111111111111111111 submodule
blob
mark :7
@ -267,7 +268,7 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase):
scm.revert(options, self.args, file_list)
self.assertEqual(file_list, [])
self.assertEqual(scm.revinfo(options, self.args, None),
'a7142dc9f0009350b96a11f372b6ea658592aa95')
'4091c7d010ca99d0f2dd416d4b70b758ae432187')
sys.stdout.close()
def testRevertModified(self):
@ -287,7 +288,7 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase):
scm.diff(options, self.args, file_list)
self.assertEqual(file_list, [])
self.assertEqual(scm.revinfo(options, self.args, None),
'a7142dc9f0009350b96a11f372b6ea658592aa95')
'4091c7d010ca99d0f2dd416d4b70b758ae432187')
sys.stdout.close()
def testRevertNew(self):
@ -309,7 +310,7 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase):
scm.diff(options, self.args, file_list)
self.assertEqual(file_list, [])
self.assertEqual(scm.revinfo(options, self.args, None),
'a7142dc9f0009350b96a11f372b6ea658592aa95')
'4091c7d010ca99d0f2dd416d4b70b758ae432187')
sys.stdout.close()
def testStatusRef(self):
@ -389,14 +390,16 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase):
if not self.enabled:
return
options = self.Options()
expected_file_list = [join(self.base_path, x) for x in ['a', 'b']]
expected_file_list = [
join(self.base_path, x) for x in ['a', 'b', 'submodule']
]
scm = gclient_scm.GitWrapper(self.url, self.root_dir, self.relpath)
file_list = []
scm.update(options, (), file_list)
self.assertEqual(file_list, expected_file_list)
self.assertEqual(scm.revinfo(options, (), None),
'a7142dc9f0009350b96a11f372b6ea658592aa95')
'4091c7d010ca99d0f2dd416d4b70b758ae432187')
self.assertEqual(
scm._Capture(['config', '--get', 'diff.ignoreSubmodules']), 'dirty')
self.assertEqual(
@ -418,12 +421,13 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase):
rev = scm.revinfo(options, (), None)
file_list = []
scm.update(options, (), file_list)
self.assertEqual(file_list,
[join(self.base_path, x) for x in ['a', 'b', 'c']])
self.assertEqual(
file_list,
[join(self.base_path, x) for x in ['a', 'b', 'c', 'submodule']])
# The actual commit that is created is unstable, so we verify its tree
# and parents instead.
self.assertEqual(scm._Capture(['rev-parse', 'HEAD:']),
'd2e35c10ac24d6c621e14a1fcadceb533155627d')
'3a3ba72731fa208d37b06598a129ba93970325df')
parent = 'HEAD^' if sys.platform != 'win32' else 'HEAD^^'
self.assertEqual(scm._Capture(['rev-parse', parent + '1']), rev)
self.assertEqual(scm._Capture(['rev-parse', parent + '2']),
@ -445,12 +449,13 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase):
'(y)es / (q)uit / (s)kip : ', 'y')
file_list = []
scm.update(options, (), file_list)
self.assertEqual(file_list,
[join(self.base_path, x) for x in ['a', 'b', 'c']])
self.assertEqual(
file_list,
[join(self.base_path, x) for x in ['a', 'b', 'c', 'submodule']])
# The actual commit that is created is unstable, so we verify its tree
# and parent instead.
self.assertEqual(scm._Capture(['rev-parse', 'HEAD:']),
'd2e35c10ac24d6c621e14a1fcadceb533155627d')
'3a3ba72731fa208d37b06598a129ba93970325df')
parent = 'HEAD^' if sys.platform != 'win32' else 'HEAD^^'
self.assertEqual(scm._Capture(['rev-parse', parent + '1']),
scm._Capture(['rev-parse', 'origin/main']))
@ -801,14 +806,15 @@ class UnmanagedGitWrapperTestCase(BaseGitWrapperTestCase):
self.relpath = '.'
self.base_path = join(self.root_dir, self.relpath)
url_with_commit_ref = origin_root_dir +\
'@a7142dc9f0009350b96a11f372b6ea658592aa95'
'@4091c7d010ca99d0f2dd416d4b70b758ae432187'
scm = gclient_scm.GitWrapper(url_with_commit_ref, self.root_dir,
self.relpath)
expected_file_list = [
join(self.base_path, "a"),
join(self.base_path, "b")
join(self.base_path, "b"),
join(self.base_path, "submodule"),
]
file_list = []
options.revision = 'unmanaged'
@ -816,11 +822,11 @@ class UnmanagedGitWrapperTestCase(BaseGitWrapperTestCase):
self.assertEqual(file_list, expected_file_list)
self.assertEqual(scm.revinfo(options, (), None),
'a7142dc9f0009350b96a11f372b6ea658592aa95')
'4091c7d010ca99d0f2dd416d4b70b758ae432187')
# indicates detached HEAD
self.assertEqual(self.getCurrentBranch(), None)
self.checkInStdout(
'Checked out a7142dc9f0009350b96a11f372b6ea658592aa95 to a detached HEAD'
'Checked out 4091c7d010ca99d0f2dd416d4b70b758ae432187 to a detached HEAD'
)
def testUpdateCloneOnBranch(self):
@ -1602,6 +1608,57 @@ class CheckDiffTest(fake_repos.FakeReposTestBase):
self.assertFalse(scm.check_diff(self.githash('repo_1', 2)))
class Submodules(BaseGitWrapperTestCase):
submodule_hash = '1111111111111111111111111111111111111111'
def testGetSubmoduleClean(self):
scm = gclient_scm.GitWrapper(self.url, self.root_dir, self.relpath)
options = self.Options()
scm.update(options, None, [])
self.assertEqual(scm.GetSubmoduleStateFromIndex(),
{'submodule': self.submodule_hash})
self.assertEqual(scm.GetSubmoduleDiff(), {})
def testGetSubmoduleModified(self):
scm = gclient_scm.GitWrapper(self.url, self.root_dir, self.relpath)
options = self.Options()
scm.update(options, None, [])
# Create submodule diff
submodule_dir = os.path.join(self.root_dir, 'submodule')
subprocess2.check_output(['git', '-C', submodule_dir, 'init'])
subprocess2.check_output([
'git', '-C', submodule_dir, 'commit', '-m', 'foo', '--allow-empty'
])
new_rev = subprocess2.check_output(
['git', '-C', submodule_dir, 'rev-parse',
'HEAD']).decode('utf-8').strip()
# And file diff
with open(os.path.join(self.root_dir, 'a'), 'w') as f:
f.write('foo')
self.assertEqual(scm.GetSubmoduleStateFromIndex(),
{'submodule': self.submodule_hash})
self.assertEqual(scm.GetSubmoduleDiff(),
{'submodule': (self.submodule_hash, new_rev)})
def testGetSubmoduleDeleted(self):
scm = gclient_scm.GitWrapper(self.url, self.root_dir, self.relpath)
options = self.Options()
scm.update(options, None, [])
subprocess2.check_output(
['git', '-C', self.root_dir, 'rm', 'submodule'])
# When git removes submodule, it's autmatically staged and content is
# unavailable. Therefore, the index shouldn't have any entries and diff
# should be empty.
self.assertEqual(scm.GetSubmoduleStateFromIndex(), {})
self.assertEqual(scm.GetSubmoduleDiff(), {})
if 'unittest.util' in __import__('sys').modules:
# Show full diff in self.assertEqual.
__import__('sys').modules['unittest.util']._MAX_LENGTH = 999999999

Loading…
Cancel
Save