From 3edda8d185b5b46d43ea19ad3e2d1de91b6f8ba0 Mon Sep 17 00:00:00 2001 From: Aravind Vasudevan Date: Wed, 7 Feb 2024 21:50:24 +0000 Subject: [PATCH] Update gclient to use git config caching This change updates all the modules used by gclient to use `scm.GIT` for git config calls over directly invoking the subprocess. This change currently doesn't modify git_cache since the config reads and writes within it are done on bare repository. A follow-up CL will update git_cache. A follow-up CL will also update git_cl and git_map_branches since they have shown performance improvements too: https://crrev.com/c/4697786. Benchmarking ============ With chromium/src as the baseline super project, this change reduces about 380 git config calls out of 507 total calls on cache hits during no-op. The below numbers are benchmarked with `update_depot_tools` turned off. Windows Benchmark ================= Baseline (gpaste/6360045736951808): ~1min 12 sec. With Caching (gpaste/6480065209040896): ~1min 3sec. ~12.5% decrease in gclient sync noop runtime. Linux Benchmark =============== Baseline (gpaste/4730436763254784): ~3.739 sec. With Caching (gpaste/4849870978940928): ~3.534 sec. ~5.5% decrease in gclient sync noop runtime. Bug: 1501984 Change-Id: Ib48df2d26a0c742a9b555a1e2ed6366221c7db17 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5252498 Commit-Queue: Aravind Vasudevan Reviewed-by: Josip Sokcevic --- gclient_scm.py | 96 +++++++++++++++++++-------------------- gerrit_util.py | 11 ++--- git_common.py | 34 +++++--------- git_rebase_update.py | 3 +- tests/gclient_scm_test.py | 15 +++--- tests/gerrit_util_test.py | 10 ++-- tests/git_cl_test.py | 2 - tests/git_common_test.py | 17 +++++-- 8 files changed, 92 insertions(+), 96 deletions(-) diff --git a/gclient_scm.py b/gclient_scm.py index e6a11de98..9ac32672e 100644 --- a/gclient_scm.py +++ b/gclient_scm.py @@ -134,11 +134,8 @@ class SCMWrapper(object): @staticmethod def _get_first_remote_url(checkout_path): - log = scm.GIT.Capture( - ['config', '--local', '--get-regexp', r'remote.*.url'], - cwd=checkout_path) - # Get the second token of the first line of the log. - return log.splitlines()[0].split(' ', 1)[1] + log = scm.GIT.YieldConfigRegexp(checkout_path, r'remote.*.url') + return next(log)[1] def GetCacheMirror(self): if getattr(self, 'cache_dir', None): @@ -592,26 +589,35 @@ class GitWrapper(SCMWrapper): def wrapper(*args): return_val = f(*args) if os.path.exists(os.path.join(args[0].checkout_path, '.git')): - # If diff.ignoreSubmodules is not already set, set it to `all`. - config = subprocess2.capture(['git', 'config', '-l'], - cwd=args[0].checkout_path).decode( - 'utf-8').strip().splitlines() - if 'diff.ignoresubmodules=dirty' not in config: - subprocess2.capture( - ['git', 'config', 'diff.ignoreSubmodules', 'dirty'], - cwd=args[0].checkout_path) - if 'fetch.recursesubmodules=off' not in config: - subprocess2.capture( - ['git', 'config', 'fetch.recurseSubmodules', 'off'], - cwd=args[0].checkout_path) - if 'push.recursesubmodules=off' not in config: + # The config updates to the project are stored in this list + # and updated consecutively after the reads. The updates + # are done this way because `scm.GIT.GetConfig` caches + # the config file and `scm.GIT.SetConfig` evicts the cache. + # This ensures we don't interleave reads and writes causing + # the cache to set and unset consecutively. + config_updates = [] + + if scm.GIT.GetConfig(args[0].checkout_path, + 'diff.ignoresubmodules') != 'dirty': + # If diff.ignoreSubmodules is not already set, set it to `all`. + config_updates.append(('diff.ignoreSubmodules', 'dirty')) + + if scm.GIT.GetConfig(args[0].checkout_path, + 'fetch.recursesubmodules') != 'off': + config_updates.append(('fetch.recurseSubmodules', 'off')) + + if scm.GIT.GetConfig(args[0].checkout_path, + 'push.recursesubmodules') != 'off': # The default is off, but if user sets submodules.recurse to # on, this becomes on too. We never want to push submodules # for gclient managed repositories. Push, even if a no-op, # will increase `git cl upload` latency. - subprocess2.capture( - ['git', 'config', 'push.recurseSubmodules', 'off'], - cwd=args[0].checkout_path) + config_updates.append(('push.recurseSubmodules', 'off')) + + for update in config_updates: + scm.GIT.SetConfig(args[0].checkout_path, update[0], + update[1]) + return return_val return wrapper @@ -739,7 +745,8 @@ class GitWrapper(SCMWrapper): # See if the url has changed (the unittests use git://foo for the url, # let that through). - current_url = self._Capture(['config', 'remote.%s.url' % self.remote]) + current_url = scm.GIT.GetConfig(self.checkout_path, + f'remote.{self.remote}.url') return_early = False # TODO(maruel): Delete url != 'git://foo' since it's just to make the # unit test pass. (and update the comment above) @@ -750,12 +757,9 @@ class GitWrapper(SCMWrapper): strp_current_url = current_url[:-4] if current_url.endswith( '.git') else current_url if (strp_current_url.rstrip('/') != strp_url.rstrip('/') - and url != 'git://foo' and - subprocess2.capture([ - 'git', 'config', - 'remote.%s.gclient-auto-fix-url' % self.remote - ], - cwd=self.checkout_path).strip() != 'False'): + and url != 'git://foo' and scm.GIT.GetConfigBool( + self.checkout_path, + f'remote.{self.remote}.gclient-auto-fix-url')): self.Print('_____ switching %s from %s to new upstream %s' % (self.relpath, current_url, url)) if not (options.force or options.reset): @@ -1553,34 +1557,30 @@ class GitWrapper(SCMWrapper): if requested.""" if options.force or options.reset: try: - self._Run( - ['config', '--unset-all', - 'remote.%s.fetch' % self.remote], options) - self._Run([ - 'config', - 'remote.%s.fetch' % self.remote, - '+refs/heads/*:refs/remotes/%s/*' % self.remote - ], options) + scm.GIT.SetConfig(self.checkout_path, + f'remote.{self.remote}.fetch', + modify_all=True) + scm.GIT.SetConfig( + self.checkout_path, f'remote.{self.remote}.fetch', + f'+refs/heads/*:refs/remotes/{self.remote}/*') except subprocess2.CalledProcessError as e: # If exit code was 5, it means we attempted to unset a config # that didn't exist. Ignore it. if e.returncode != 5: raise if hasattr(options, 'with_branch_heads') and options.with_branch_heads: - config_cmd = [ - 'config', - 'remote.%s.fetch' % self.remote, + scm.GIT.SetConfig( + self.checkout_path, + f'remote.{self.remote}.fetch', '+refs/branch-heads/*:refs/remotes/branch-heads/*', - '^\\+refs/branch-heads/\\*:.*$' - ] - self._Run(config_cmd, options) + value_pattern='^\\+refs/branch-heads/\\*:.*$', + modify_all=True) if hasattr(options, 'with_tags') and options.with_tags: - config_cmd = [ - 'config', - 'remote.%s.fetch' % self.remote, '+refs/tags/*:refs/tags/*', - '^\\+refs/tags/\\*:.*$' - ] - self._Run(config_cmd, options) + scm.GIT.SetConfig(self.checkout_path, + f'remote.{self.remote}.fetch', + '+refs/tags/*:refs/tags/*', + value_pattern='^\\+refs/tags/\\*:.*$', + modify_all=True) def _AutoFetchRef(self, options, revision, depth=None): """Attempts to fetch |revision| if not available in local repo. diff --git a/gerrit_util.py b/gerrit_util.py index be4ee81ad..77420868b 100644 --- a/gerrit_util.py +++ b/gerrit_util.py @@ -28,6 +28,7 @@ import auth import gclient_utils import metrics import metrics_utils +import scm import subprocess2 import http.cookiejar @@ -216,12 +217,10 @@ class CookiesAuthenticator(Authenticator): def get_gitcookies_path(cls): if os.getenv('GIT_COOKIES_PATH'): return os.getenv('GIT_COOKIES_PATH') - try: - path = subprocess2.check_output( - ['git', 'config', '--path', 'http.cookiefile']) - return path.decode('utf-8', 'ignore').strip() - except subprocess2.CalledProcessError: - return os.path.expanduser(os.path.join('~', '.gitcookies')) + + return scm.GIT.GetConfig( + os.getcwd(), 'http.cookiefile', + os.path.expanduser(os.path.join('~', '.gitcookies'))) @classmethod def _get_gitcookies(cls): diff --git a/git_common.py b/git_common.py index 5cceb7bcc..b466fc8de 100644 --- a/git_common.py +++ b/git_common.py @@ -38,6 +38,7 @@ import signal import tempfile import textwrap +import scm import subprocess2 from io import BytesIO @@ -349,8 +350,10 @@ def branch_config_map(option): """Return {branch: <|option| value>} for all branches.""" try: reg = re.compile(r'^branch\.(.*)\.%s$' % option) - lines = get_config_regexp(reg.pattern) - return {reg.match(k).group(1): v for k, v in (l.split() for l in lines)} + return { + reg.match(k).group(1): v + for k, v in get_config_regexp(reg.pattern) + } except subprocess2.CalledProcessError: return {} @@ -384,11 +387,7 @@ def branches(use_limit=True, *args): def get_config(option, default=None): - try: - return run('config', '--get', option) or default - except subprocess2.CalledProcessError: - return default - + return scm.GIT.GetConfig(os.getcwd(), option, default) def get_config_int(option, default=0): assert isinstance(default, int) @@ -399,19 +398,11 @@ def get_config_int(option, default=0): def get_config_list(option): - try: - return run('config', '--get-all', option).split() - except subprocess2.CalledProcessError: - return [] + return scm.GIT.GetConfigList(os.getcwd(), option) def get_config_regexp(pattern): - if IS_WIN: # pragma: no cover - # this madness is because we call git.bat which calls git.exe which - # calls bash.exe (or something to that effect). Each layer divides the - # number of ^'s by 2. - pattern = pattern.replace('^', '^' * 8) - return run('config', '--get-regexp', pattern).splitlines() + return scm.GIT.YieldConfigRegexp(os.getcwd(), pattern) def is_fsmonitor_enabled(): @@ -455,7 +446,7 @@ def del_branch_config(branch, option, scope='local'): def del_config(option, scope='local'): try: - run('config', '--' + scope, '--unset', option) + scm.GIT.SetConfig(os.getcwd(), option, scope=scope) except subprocess2.CalledProcessError: pass @@ -897,7 +888,7 @@ def set_branch_config(branch, option, value, scope='local'): def set_config(option, value, scope='local'): - run('config', '--' + scope, option, value) + scm.GIT.SetConfig(os.getcwd(), option, value, scope=scope) def get_dirty_files(): @@ -1116,10 +1107,7 @@ def tree(treeref, recurse=False): def get_remote_url(remote='origin'): - try: - return run('config', 'remote.%s.url' % remote) - except subprocess2.CalledProcessError: - return None + return scm.GIT.GetConfig(os.getcwd(), 'remote.%s.url' % remote) def upstream(branch): diff --git a/git_rebase_update.py b/git_rebase_update.py index 8b1fb8d44..ec5c1260a 100755 --- a/git_rebase_update.py +++ b/git_rebase_update.py @@ -48,8 +48,7 @@ def fetch_remotes(branch_tree): tag_set = git.tags() fetchspec_map = {} all_fetchspec_configs = git.get_config_regexp(r'^remote\..*\.fetch') - for fetchspec_config in all_fetchspec_configs: - key, _, fetchspec = fetchspec_config.partition(' ') + for key, fetchspec in all_fetchspec_configs: dest_spec = fetchspec.partition(':')[2] remote_name = key.split('.')[1] fetchspec_map[dest_spec] = remote_name diff --git a/tests/gclient_scm_test.py b/tests/gclient_scm_test.py index f3efcd5d0..9d31575a9 100755 --- a/tests/gclient_scm_test.py +++ b/tests/gclient_scm_test.py @@ -53,11 +53,11 @@ class BasicTests(unittest.TestCase): @mock.patch('gclient_scm.scm.GIT.Capture') def testGetFirstRemoteUrl(self, mockCapture): REMOTE_STRINGS = [ - ('remote.origin.url E:\\foo\\bar', 'E:\\foo\\bar'), - ('remote.origin.url /b/foo/bar', '/b/foo/bar'), - ('remote.origin.url https://foo/bar', 'https://foo/bar'), - ('remote.origin.url E:\\Fo Bar\\bax', 'E:\\Fo Bar\\bax'), - ('remote.origin.url git://what/"do', 'git://what/"do') + ('remote.origin.url = E:\\foo\\bar', 'E:\\foo\\bar'), + ('remote.origin.url = /b/foo/bar', '/b/foo/bar'), + ('remote.origin.url = https://foo/bar', 'https://foo/bar'), + ('remote.origin.url = E:\\Fo Bar\\bax', 'E:\\Fo Bar\\bax'), + ('remote.origin.url = git://what/"do', 'git://what/"do') ] FAKE_PATH = '/fake/path' mockCapture.side_effect = [question for question, _ in REMOTE_STRINGS] @@ -65,10 +65,11 @@ class BasicTests(unittest.TestCase): for _, answer in REMOTE_STRINGS: self.assertEqual( gclient_scm.SCMWrapper._get_first_remote_url(FAKE_PATH), answer) + gclient_scm.scm.GIT._clear_config(FAKE_PATH) expected_calls = [ - mock.call(['config', '--local', '--get-regexp', r'remote.*.url'], - cwd=FAKE_PATH) for _ in REMOTE_STRINGS + mock.call(['config', '--list'], cwd=FAKE_PATH, strip_out=False) + for _ in REMOTE_STRINGS ] self.assertEqual(mockCapture.mock_calls, expected_calls) diff --git a/tests/gerrit_util_test.py b/tests/gerrit_util_test.py index 706d8c375..9904e79ee 100755 --- a/tests/gerrit_util_test.py +++ b/tests/gerrit_util_test.py @@ -117,12 +117,16 @@ class CookiesAuthenticatorTest(unittest.TestCase): os.path.expanduser(os.path.join('~', '.gitcookies')), gerrit_util.CookiesAuthenticator().get_gitcookies_path()) - subprocess2.check_output.side_effect = [b'http.cookiefile'] + subprocess2.check_output.side_effect = [ + b'http.cookiefile = http.cookiefile' + ] self.assertEqual( 'http.cookiefile', gerrit_util.CookiesAuthenticator().get_gitcookies_path()) - subprocess2.check_output.assert_called_with( - ['git', 'config', '--path', 'http.cookiefile']) + subprocess2.check_output.assert_called_with(['git', 'config', '--list'], + cwd=os.getcwd(), + env=mock.ANY, + stderr=mock.ANY) os.getenv.return_value = 'git-cookies-path' self.assertEqual( diff --git a/tests/git_cl_test.py b/tests/git_cl_test.py index b8fafe2e8..25e1ff5d5 100755 --- a/tests/git_cl_test.py +++ b/tests/git_cl_test.py @@ -3309,7 +3309,6 @@ class TestGitCl(unittest.TestCase): mock.patch('git_cl._GitCookiesChecker.get_hosts_with_creds', lambda _, include_netrc=False: []).start() self.calls = [ - ((['git', 'config', '--path', 'http.cookiefile'], ), CERR1), ((['git', 'config', '--global', 'http.cookiefile'], ), CERR1), (('os.path.exists', os.path.join('~', NETRC_FILENAME)), True), (('ask_for_data', 'Press Enter to setup .gitcookies, ' @@ -3332,7 +3331,6 @@ class TestGitCl(unittest.TestCase): custom_cookie_path = ('C:\\.gitcookies' if sys.platform == 'win32' else '/custom/.gitcookies') self.calls = [ - ((['git', 'config', '--path', 'http.cookiefile'], ), CERR1), ((['git', 'config', '--global', 'http.cookiefile'], ), custom_cookie_path), (('os.path.exists', custom_cookie_path), False), diff --git a/tests/git_common_test.py b/tests/git_common_test.py index 939071a19..6aac4cbaa 100755 --- a/tests/git_common_test.py +++ b/tests/git_common_test.py @@ -296,7 +296,8 @@ class GitReadOnlyFunctionsTest(git_test_utils.GitRepoReadOnlyTestBase, def testDormant(self): self.assertFalse(self.repo.run(self.gc.is_dormant, 'main')) - self.repo.git('config', 'branch.main.dormant', 'true') + self.gc.scm.GIT.SetConfig(self.repo.repo_path, 'branch.main.dormant', + 'true') self.assertTrue(self.repo.run(self.gc.is_dormant, 'main')) def testBlame(self): @@ -457,6 +458,7 @@ class GitMutableFunctionsTest(git_test_utils.GitRepoReadWriteTestBase, []) self.repo.git('config', '--add', 'happy.derpies', 'cat') + self.gc.scm.GIT._clear_config(self.repo.repo_path) self.assertEqual( self.repo.run(self.gc.get_config_list, 'happy.derpies'), ['food', 'cat']) @@ -464,8 +466,7 @@ class GitMutableFunctionsTest(git_test_utils.GitRepoReadWriteTestBase, self.assertEqual('cat', self.repo.run(self.gc.get_config, 'dude.bob', 'cat')) - self.repo.run(self.gc.set_config, 'dude.bob', 'dog') - + self.gc.scm.GIT.SetConfig(self.repo.repo_path, 'dude.bob', 'dog') self.assertEqual('dog', self.repo.run(self.gc.get_config, 'dude.bob', 'cat')) @@ -481,15 +482,19 @@ class GitMutableFunctionsTest(git_test_utils.GitRepoReadWriteTestBase, self.repo.git('config', 'depot-tools.upstream', 'catfood') + self.gc.scm.GIT._clear_config(self.repo.repo_path) self.assertEqual('catfood', self.repo.run(self.gc.root)) self.repo.git('config', '--add', 'core.fsmonitor', 'true') + self.gc.scm.GIT._clear_config(self.repo.repo_path) self.assertEqual(True, self.repo.run(self.gc.is_fsmonitor_enabled)) self.repo.git('config', '--add', 'core.fsmonitor', 't') + self.gc.scm.GIT._clear_config(self.repo.repo_path) self.assertEqual(False, self.repo.run(self.gc.is_fsmonitor_enabled)) self.repo.git('config', '--add', 'core.fsmonitor', 'false') + self.gc.scm.GIT._clear_config(self.repo.repo_path) self.assertEqual(False, self.repo.run(self.gc.is_fsmonitor_enabled)) def testRoot(self): @@ -634,8 +639,8 @@ class GitMutableStructuredTest(git_test_utils.GitRepoReadWriteTestBase, _, rslt = self.repo.capture_stdio(list, self.gc.branches()) self.assertIn('too many branches (39/20)', rslt) - - self.repo.git('config', 'depot-tools.branch-limit', '100') + self.gc.scm.GIT.SetConfig(self.repo.repo_path, + 'depot-tools.branch-limit', '100') # should not raise # This check fails with git 2.4 (see crbug.com/487172) @@ -654,6 +659,7 @@ class GitMutableStructuredTest(git_test_utils.GitRepoReadWriteTestBase, self.repo.run(self.gc.get_or_create_merge_base, 'branch_L', 'branch_K')) + self.gc.scm.GIT._clear_config(self.repo.repo_path) self.assertEqual( self.repo['B'], self.repo.run(self.gc.get_config, 'branch.branch_K.base')) @@ -674,6 +680,7 @@ class GitMutableStructuredTest(git_test_utils.GitRepoReadWriteTestBase, self.repo.run(self.gc.manual_merge_base, 'branch_K', self.repo['I'], 'branch_G') + self.gc.scm.GIT._clear_config(self.repo.repo_path) self.assertEqual( self.repo['I'], self.repo.run(self.gc.get_or_create_merge_base, 'branch_K',