Update gclient to use git config caching

This change updates all the modules used by gclient to use `scm.GIT` for git config calls over directly invoking the subprocess.

This change currently doesn't modify git_cache since the config reads and writes within it are done on bare repository. A follow-up CL will update git_cache.

A follow-up CL will also update git_cl and git_map_branches since they have shown performance improvements too: https://crrev.com/c/4697786.

Benchmarking
============
With chromium/src as the baseline super project, this change reduces about 380 git config calls out of 507 total calls on cache hits during no-op. The below numbers are benchmarked with `update_depot_tools` turned off.

Windows Benchmark
=================
Baseline (gpaste/6360045736951808): ~1min 12 sec.
With Caching (gpaste/6480065209040896): ~1min 3sec.
~12.5% decrease in gclient sync noop runtime.

Linux Benchmark
===============
Baseline (gpaste/4730436763254784): ~3.739 sec.
With Caching (gpaste/4849870978940928): ~3.534 sec.
~5.5% decrease in gclient sync noop runtime.

Bug: 1501984
Change-Id: Ib48df2d26a0c742a9b555a1e2ed6366221c7db17
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5252498
Commit-Queue: Aravind Vasudevan <aravindvasudev@google.com>
Reviewed-by: Josip Sokcevic <sokcevic@chromium.org>
changes/98/5252498/16
Aravind Vasudevan 1 year ago committed by LUCI CQ
parent 0784439733
commit 3edda8d185

@ -134,11 +134,8 @@ class SCMWrapper(object):
@staticmethod
def _get_first_remote_url(checkout_path):
log = scm.GIT.Capture(
['config', '--local', '--get-regexp', r'remote.*.url'],
cwd=checkout_path)
# Get the second token of the first line of the log.
return log.splitlines()[0].split(' ', 1)[1]
log = scm.GIT.YieldConfigRegexp(checkout_path, r'remote.*.url')
return next(log)[1]
def GetCacheMirror(self):
if getattr(self, 'cache_dir', None):
@ -592,26 +589,35 @@ class GitWrapper(SCMWrapper):
def wrapper(*args):
return_val = f(*args)
if os.path.exists(os.path.join(args[0].checkout_path, '.git')):
# If diff.ignoreSubmodules is not already set, set it to `all`.
config = subprocess2.capture(['git', 'config', '-l'],
cwd=args[0].checkout_path).decode(
'utf-8').strip().splitlines()
if 'diff.ignoresubmodules=dirty' not in config:
subprocess2.capture(
['git', 'config', 'diff.ignoreSubmodules', 'dirty'],
cwd=args[0].checkout_path)
if 'fetch.recursesubmodules=off' not in config:
subprocess2.capture(
['git', 'config', 'fetch.recurseSubmodules', 'off'],
cwd=args[0].checkout_path)
if 'push.recursesubmodules=off' not in config:
# The config updates to the project are stored in this list
# and updated consecutively after the reads. The updates
# are done this way because `scm.GIT.GetConfig` caches
# the config file and `scm.GIT.SetConfig` evicts the cache.
# This ensures we don't interleave reads and writes causing
# the cache to set and unset consecutively.
config_updates = []
if scm.GIT.GetConfig(args[0].checkout_path,
'diff.ignoresubmodules') != 'dirty':
# If diff.ignoreSubmodules is not already set, set it to `all`.
config_updates.append(('diff.ignoreSubmodules', 'dirty'))
if scm.GIT.GetConfig(args[0].checkout_path,
'fetch.recursesubmodules') != 'off':
config_updates.append(('fetch.recurseSubmodules', 'off'))
if scm.GIT.GetConfig(args[0].checkout_path,
'push.recursesubmodules') != 'off':
# The default is off, but if user sets submodules.recurse to
# on, this becomes on too. We never want to push submodules
# for gclient managed repositories. Push, even if a no-op,
# will increase `git cl upload` latency.
subprocess2.capture(
['git', 'config', 'push.recurseSubmodules', 'off'],
cwd=args[0].checkout_path)
config_updates.append(('push.recurseSubmodules', 'off'))
for update in config_updates:
scm.GIT.SetConfig(args[0].checkout_path, update[0],
update[1])
return return_val
return wrapper
@ -739,7 +745,8 @@ class GitWrapper(SCMWrapper):
# See if the url has changed (the unittests use git://foo for the url,
# let that through).
current_url = self._Capture(['config', 'remote.%s.url' % self.remote])
current_url = scm.GIT.GetConfig(self.checkout_path,
f'remote.{self.remote}.url')
return_early = False
# TODO(maruel): Delete url != 'git://foo' since it's just to make the
# unit test pass. (and update the comment above)
@ -750,12 +757,9 @@ class GitWrapper(SCMWrapper):
strp_current_url = current_url[:-4] if current_url.endswith(
'.git') else current_url
if (strp_current_url.rstrip('/') != strp_url.rstrip('/')
and url != 'git://foo' and
subprocess2.capture([
'git', 'config',
'remote.%s.gclient-auto-fix-url' % self.remote
],
cwd=self.checkout_path).strip() != 'False'):
and url != 'git://foo' and scm.GIT.GetConfigBool(
self.checkout_path,
f'remote.{self.remote}.gclient-auto-fix-url')):
self.Print('_____ switching %s from %s to new upstream %s' %
(self.relpath, current_url, url))
if not (options.force or options.reset):
@ -1553,34 +1557,30 @@ class GitWrapper(SCMWrapper):
if requested."""
if options.force or options.reset:
try:
self._Run(
['config', '--unset-all',
'remote.%s.fetch' % self.remote], options)
self._Run([
'config',
'remote.%s.fetch' % self.remote,
'+refs/heads/*:refs/remotes/%s/*' % self.remote
], options)
scm.GIT.SetConfig(self.checkout_path,
f'remote.{self.remote}.fetch',
modify_all=True)
scm.GIT.SetConfig(
self.checkout_path, f'remote.{self.remote}.fetch',
f'+refs/heads/*:refs/remotes/{self.remote}/*')
except subprocess2.CalledProcessError as e:
# If exit code was 5, it means we attempted to unset a config
# that didn't exist. Ignore it.
if e.returncode != 5:
raise
if hasattr(options, 'with_branch_heads') and options.with_branch_heads:
config_cmd = [
'config',
'remote.%s.fetch' % self.remote,
scm.GIT.SetConfig(
self.checkout_path,
f'remote.{self.remote}.fetch',
'+refs/branch-heads/*:refs/remotes/branch-heads/*',
'^\\+refs/branch-heads/\\*:.*$'
]
self._Run(config_cmd, options)
value_pattern='^\\+refs/branch-heads/\\*:.*$',
modify_all=True)
if hasattr(options, 'with_tags') and options.with_tags:
config_cmd = [
'config',
'remote.%s.fetch' % self.remote, '+refs/tags/*:refs/tags/*',
'^\\+refs/tags/\\*:.*$'
]
self._Run(config_cmd, options)
scm.GIT.SetConfig(self.checkout_path,
f'remote.{self.remote}.fetch',
'+refs/tags/*:refs/tags/*',
value_pattern='^\\+refs/tags/\\*:.*$',
modify_all=True)
def _AutoFetchRef(self, options, revision, depth=None):
"""Attempts to fetch |revision| if not available in local repo.

@ -28,6 +28,7 @@ import auth
import gclient_utils
import metrics
import metrics_utils
import scm
import subprocess2
import http.cookiejar
@ -216,12 +217,10 @@ class CookiesAuthenticator(Authenticator):
def get_gitcookies_path(cls):
if os.getenv('GIT_COOKIES_PATH'):
return os.getenv('GIT_COOKIES_PATH')
try:
path = subprocess2.check_output(
['git', 'config', '--path', 'http.cookiefile'])
return path.decode('utf-8', 'ignore').strip()
except subprocess2.CalledProcessError:
return os.path.expanduser(os.path.join('~', '.gitcookies'))
return scm.GIT.GetConfig(
os.getcwd(), 'http.cookiefile',
os.path.expanduser(os.path.join('~', '.gitcookies')))
@classmethod
def _get_gitcookies(cls):

@ -38,6 +38,7 @@ import signal
import tempfile
import textwrap
import scm
import subprocess2
from io import BytesIO
@ -349,8 +350,10 @@ def branch_config_map(option):
"""Return {branch: <|option| value>} for all branches."""
try:
reg = re.compile(r'^branch\.(.*)\.%s$' % option)
lines = get_config_regexp(reg.pattern)
return {reg.match(k).group(1): v for k, v in (l.split() for l in lines)}
return {
reg.match(k).group(1): v
for k, v in get_config_regexp(reg.pattern)
}
except subprocess2.CalledProcessError:
return {}
@ -384,11 +387,7 @@ def branches(use_limit=True, *args):
def get_config(option, default=None):
try:
return run('config', '--get', option) or default
except subprocess2.CalledProcessError:
return default
return scm.GIT.GetConfig(os.getcwd(), option, default)
def get_config_int(option, default=0):
assert isinstance(default, int)
@ -399,19 +398,11 @@ def get_config_int(option, default=0):
def get_config_list(option):
try:
return run('config', '--get-all', option).split()
except subprocess2.CalledProcessError:
return []
return scm.GIT.GetConfigList(os.getcwd(), option)
def get_config_regexp(pattern):
if IS_WIN: # pragma: no cover
# this madness is because we call git.bat which calls git.exe which
# calls bash.exe (or something to that effect). Each layer divides the
# number of ^'s by 2.
pattern = pattern.replace('^', '^' * 8)
return run('config', '--get-regexp', pattern).splitlines()
return scm.GIT.YieldConfigRegexp(os.getcwd(), pattern)
def is_fsmonitor_enabled():
@ -455,7 +446,7 @@ def del_branch_config(branch, option, scope='local'):
def del_config(option, scope='local'):
try:
run('config', '--' + scope, '--unset', option)
scm.GIT.SetConfig(os.getcwd(), option, scope=scope)
except subprocess2.CalledProcessError:
pass
@ -897,7 +888,7 @@ def set_branch_config(branch, option, value, scope='local'):
def set_config(option, value, scope='local'):
run('config', '--' + scope, option, value)
scm.GIT.SetConfig(os.getcwd(), option, value, scope=scope)
def get_dirty_files():
@ -1116,10 +1107,7 @@ def tree(treeref, recurse=False):
def get_remote_url(remote='origin'):
try:
return run('config', 'remote.%s.url' % remote)
except subprocess2.CalledProcessError:
return None
return scm.GIT.GetConfig(os.getcwd(), 'remote.%s.url' % remote)
def upstream(branch):

@ -48,8 +48,7 @@ def fetch_remotes(branch_tree):
tag_set = git.tags()
fetchspec_map = {}
all_fetchspec_configs = git.get_config_regexp(r'^remote\..*\.fetch')
for fetchspec_config in all_fetchspec_configs:
key, _, fetchspec = fetchspec_config.partition(' ')
for key, fetchspec in all_fetchspec_configs:
dest_spec = fetchspec.partition(':')[2]
remote_name = key.split('.')[1]
fetchspec_map[dest_spec] = remote_name

@ -53,11 +53,11 @@ class BasicTests(unittest.TestCase):
@mock.patch('gclient_scm.scm.GIT.Capture')
def testGetFirstRemoteUrl(self, mockCapture):
REMOTE_STRINGS = [
('remote.origin.url E:\\foo\\bar', 'E:\\foo\\bar'),
('remote.origin.url /b/foo/bar', '/b/foo/bar'),
('remote.origin.url https://foo/bar', 'https://foo/bar'),
('remote.origin.url E:\\Fo Bar\\bax', 'E:\\Fo Bar\\bax'),
('remote.origin.url git://what/"do', 'git://what/"do')
('remote.origin.url = E:\\foo\\bar', 'E:\\foo\\bar'),
('remote.origin.url = /b/foo/bar', '/b/foo/bar'),
('remote.origin.url = https://foo/bar', 'https://foo/bar'),
('remote.origin.url = E:\\Fo Bar\\bax', 'E:\\Fo Bar\\bax'),
('remote.origin.url = git://what/"do', 'git://what/"do')
]
FAKE_PATH = '/fake/path'
mockCapture.side_effect = [question for question, _ in REMOTE_STRINGS]
@ -65,10 +65,11 @@ class BasicTests(unittest.TestCase):
for _, answer in REMOTE_STRINGS:
self.assertEqual(
gclient_scm.SCMWrapper._get_first_remote_url(FAKE_PATH), answer)
gclient_scm.scm.GIT._clear_config(FAKE_PATH)
expected_calls = [
mock.call(['config', '--local', '--get-regexp', r'remote.*.url'],
cwd=FAKE_PATH) for _ in REMOTE_STRINGS
mock.call(['config', '--list'], cwd=FAKE_PATH, strip_out=False)
for _ in REMOTE_STRINGS
]
self.assertEqual(mockCapture.mock_calls, expected_calls)

@ -117,12 +117,16 @@ class CookiesAuthenticatorTest(unittest.TestCase):
os.path.expanduser(os.path.join('~', '.gitcookies')),
gerrit_util.CookiesAuthenticator().get_gitcookies_path())
subprocess2.check_output.side_effect = [b'http.cookiefile']
subprocess2.check_output.side_effect = [
b'http.cookiefile = http.cookiefile'
]
self.assertEqual(
'http.cookiefile',
gerrit_util.CookiesAuthenticator().get_gitcookies_path())
subprocess2.check_output.assert_called_with(
['git', 'config', '--path', 'http.cookiefile'])
subprocess2.check_output.assert_called_with(['git', 'config', '--list'],
cwd=os.getcwd(),
env=mock.ANY,
stderr=mock.ANY)
os.getenv.return_value = 'git-cookies-path'
self.assertEqual(

@ -3309,7 +3309,6 @@ class TestGitCl(unittest.TestCase):
mock.patch('git_cl._GitCookiesChecker.get_hosts_with_creds',
lambda _, include_netrc=False: []).start()
self.calls = [
((['git', 'config', '--path', 'http.cookiefile'], ), CERR1),
((['git', 'config', '--global', 'http.cookiefile'], ), CERR1),
(('os.path.exists', os.path.join('~', NETRC_FILENAME)), True),
(('ask_for_data', 'Press Enter to setup .gitcookies, '
@ -3332,7 +3331,6 @@ class TestGitCl(unittest.TestCase):
custom_cookie_path = ('C:\\.gitcookies' if sys.platform == 'win32' else
'/custom/.gitcookies')
self.calls = [
((['git', 'config', '--path', 'http.cookiefile'], ), CERR1),
((['git', 'config', '--global',
'http.cookiefile'], ), custom_cookie_path),
(('os.path.exists', custom_cookie_path), False),

@ -296,7 +296,8 @@ class GitReadOnlyFunctionsTest(git_test_utils.GitRepoReadOnlyTestBase,
def testDormant(self):
self.assertFalse(self.repo.run(self.gc.is_dormant, 'main'))
self.repo.git('config', 'branch.main.dormant', 'true')
self.gc.scm.GIT.SetConfig(self.repo.repo_path, 'branch.main.dormant',
'true')
self.assertTrue(self.repo.run(self.gc.is_dormant, 'main'))
def testBlame(self):
@ -457,6 +458,7 @@ class GitMutableFunctionsTest(git_test_utils.GitRepoReadWriteTestBase,
[])
self.repo.git('config', '--add', 'happy.derpies', 'cat')
self.gc.scm.GIT._clear_config(self.repo.repo_path)
self.assertEqual(
self.repo.run(self.gc.get_config_list, 'happy.derpies'),
['food', 'cat'])
@ -464,8 +466,7 @@ class GitMutableFunctionsTest(git_test_utils.GitRepoReadWriteTestBase,
self.assertEqual('cat',
self.repo.run(self.gc.get_config, 'dude.bob', 'cat'))
self.repo.run(self.gc.set_config, 'dude.bob', 'dog')
self.gc.scm.GIT.SetConfig(self.repo.repo_path, 'dude.bob', 'dog')
self.assertEqual('dog',
self.repo.run(self.gc.get_config, 'dude.bob', 'cat'))
@ -481,15 +482,19 @@ class GitMutableFunctionsTest(git_test_utils.GitRepoReadWriteTestBase,
self.repo.git('config', 'depot-tools.upstream', 'catfood')
self.gc.scm.GIT._clear_config(self.repo.repo_path)
self.assertEqual('catfood', self.repo.run(self.gc.root))
self.repo.git('config', '--add', 'core.fsmonitor', 'true')
self.gc.scm.GIT._clear_config(self.repo.repo_path)
self.assertEqual(True, self.repo.run(self.gc.is_fsmonitor_enabled))
self.repo.git('config', '--add', 'core.fsmonitor', 't')
self.gc.scm.GIT._clear_config(self.repo.repo_path)
self.assertEqual(False, self.repo.run(self.gc.is_fsmonitor_enabled))
self.repo.git('config', '--add', 'core.fsmonitor', 'false')
self.gc.scm.GIT._clear_config(self.repo.repo_path)
self.assertEqual(False, self.repo.run(self.gc.is_fsmonitor_enabled))
def testRoot(self):
@ -634,8 +639,8 @@ class GitMutableStructuredTest(git_test_utils.GitRepoReadWriteTestBase,
_, rslt = self.repo.capture_stdio(list, self.gc.branches())
self.assertIn('too many branches (39/20)', rslt)
self.repo.git('config', 'depot-tools.branch-limit', '100')
self.gc.scm.GIT.SetConfig(self.repo.repo_path,
'depot-tools.branch-limit', '100')
# should not raise
# This check fails with git 2.4 (see crbug.com/487172)
@ -654,6 +659,7 @@ class GitMutableStructuredTest(git_test_utils.GitRepoReadWriteTestBase,
self.repo.run(self.gc.get_or_create_merge_base, 'branch_L',
'branch_K'))
self.gc.scm.GIT._clear_config(self.repo.repo_path)
self.assertEqual(
self.repo['B'],
self.repo.run(self.gc.get_config, 'branch.branch_K.base'))
@ -674,6 +680,7 @@ class GitMutableStructuredTest(git_test_utils.GitRepoReadWriteTestBase,
self.repo.run(self.gc.manual_merge_base, 'branch_K', self.repo['I'],
'branch_G')
self.gc.scm.GIT._clear_config(self.repo.repo_path)
self.assertEqual(
self.repo['I'],
self.repo.run(self.gc.get_or_create_merge_base, 'branch_K',

Loading…
Cancel
Save