From c41f72cf44357ea9bed1296259e14404bb8e799f Mon Sep 17 00:00:00 2001 From: danakj Date: Tue, 5 Nov 2019 17:12:01 +0000 Subject: [PATCH] Allow the bot_update api to not fetch tags from the git server. There are 3 layers modified, starting from the bottom up: 1. git_cache.py populate Now takes a --no-fetch-tags option. If specified, the cache will not fetch updated tags from the server by passing --no-tags to git fetch. This prevents the server from sending all the tag refs. In chromium this prevents significant time bottlenecks dealing with 10k+ tags. 2. bot_update.py bot_update has to deal with multiple git repos, it has the root repo that is checked out through git-cache, as well as repos synched via DEPS and gclient. The script now takes a --no_fetch_tags option. If specified, the git-cache populate call will include --no-fetch-tags. Otherwise, it won't. So this controls (for chromium) if fetches to the src.git server are passed --no-tags. 3. bot_update/api.py This is the entry point for recipes and also has to deal with multiple git repos. The behaviour at this point is not changed from the default. A |no_fetch_tags| parameter is added to ensure_checkout() that defaults to False. This CL is a refactor with no intended behavior change. The next step will be to change the chromium build repo to pass True for ensure_checkout(no_fetch_tags) on chromium trybots. This represents solution #2 in https://docs.google.com/document/d/1hDIunJjjfpmr50y3YnZ4o3aq1NZF4gJa1TS0p7AIL64/edit# Bug: 1019824 Change-Id: I935430603299daa9e301a95a5184c0ce486fd912 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/1894352 Reviewed-by: Aaron Gable Reviewed-by: Erik Chen Commit-Queue: danakj Auto-Submit: danakj --- git_cache.py | 42 +++++++++++++---- recipes/README.recipes.md | 12 +++-- recipes/recipe_modules/bot_update/api.py | 45 ++++++++++++++----- .../bot_update/resources/bot_update.py | 29 +++++++++--- .../bot_update/tests/ensure_checkout.py | 1 + tests/bot_update_coverage_test.py | 25 +++++++++++ tests/git_cache_test.py | 30 +++++++++++++ 7 files changed, 154 insertions(+), 30 deletions(-) diff --git a/git_cache.py b/git_cache.py index 772dbea12..4a1ec03a0 100755 --- a/git_cache.py +++ b/git_cache.py @@ -536,15 +536,18 @@ class Mirror(object): 'but failed. Continuing with non-optimized repository.' % len(pack_files)) - def _fetch(self, rundir, verbose, depth, reset_fetch_config): + def _fetch(self, rundir, verbose, depth, no_fetch_tags, reset_fetch_config): self.config(rundir, reset_fetch_config) v = [] d = [] + t = [] if verbose: v = ['-v', '--progress'] if depth: d = ['--depth', str(depth)] - fetch_cmd = ['fetch'] + v + d + ['origin'] + if no_fetch_tags: + t = ['--no-tags'] + fetch_cmd = ['fetch'] + v + d + t + ['origin'] fetch_specs = subprocess.check_output( [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'], cwd=rundir).strip().splitlines() @@ -585,8 +588,14 @@ class Mirror(object): raise ClobberNeeded() # Corrupted cache. logging.warn('Fetch of %s failed' % spec) - def populate(self, depth=None, shallow=False, bootstrap=False, - verbose=False, ignore_lock=False, lock_timeout=0, + def populate(self, + depth=None, + no_fetch_tags=False, + shallow=False, + bootstrap=False, + verbose=False, + ignore_lock=False, + lock_timeout=0, reset_fetch_config=False): assert self.GetCachePath() if shallow and not depth: @@ -599,13 +608,15 @@ class Mirror(object): try: self._ensure_bootstrapped(depth, bootstrap) - self._fetch(self.mirror_path, verbose, depth, reset_fetch_config) + self._fetch(self.mirror_path, verbose, depth, no_fetch_tags, + reset_fetch_config) except ClobberNeeded: # This is a major failure, we need to clean and force a bootstrap. gclient_utils.rmtree(self.mirror_path) self.print(GIT_CACHE_CORRUPT_MESSAGE) self._ensure_bootstrapped(depth, bootstrap, force=True) - self._fetch(self.mirror_path, verbose, depth, reset_fetch_config) + self._fetch(self.mirror_path, verbose, depth, no_fetch_tags, + reset_fetch_config) finally: if not ignore_lock: lockfile.unlock() @@ -769,6 +780,11 @@ def CMDpopulate(parser, args): """Ensure that the cache has all up-to-date objects for the given repo.""" parser.add_option('--depth', type='int', help='Only cache DEPTH commits of history') + parser.add_option( + '--no-fetch-tags', + action='store_true', + help=('Don\'t fetch tags from the server. This can speed up ' + 'fetch considerably when there are many tags.')) parser.add_option('--shallow', '-s', action='store_true', help='Only cache 10000 commits of history') parser.add_option('--ref', action='append', @@ -794,6 +810,7 @@ def CMDpopulate(parser, args): if options.break_locks: mirror.unlock() kwargs = { + 'no_fetch_tags': options.no_fetch_tags, 'verbose': options.verbose, 'shallow': options.shallow, 'bootstrap': not options.no_bootstrap, @@ -813,6 +830,11 @@ def CMDfetch(parser, args): parser.add_option('--no_bootstrap', '--no-bootstrap', action='store_true', help='Don\'t (re)bootstrap from Google Storage') + parser.add_option( + '--no-fetch-tags', + action='store_true', + help=('Don\'t fetch tags from the server. This can speed up ' + 'fetch considerably when there are many tags.')) options, args = parser.parse_args(args) # Figure out which remotes to fetch. This mimics the behavior of regular @@ -844,7 +866,9 @@ def CMDfetch(parser, args): if git_dir.startswith(cachepath): mirror = Mirror.FromPath(git_dir) mirror.populate( - bootstrap=not options.no_bootstrap, lock_timeout=options.timeout) + bootstrap=not options.no_bootstrap, + no_fetch_tags=options.no_fetch_tags, + lock_timeout=options.timeout) return 0 for remote in remotes: remote_url = subprocess.check_output( @@ -854,7 +878,9 @@ def CMDfetch(parser, args): mirror.print = lambda *args: None print('Updating git cache...') mirror.populate( - bootstrap=not options.no_bootstrap, lock_timeout=options.timeout) + bootstrap=not options.no_bootstrap, + no_fetch_tags=options.no_fetch_tags, + lock_timeout=options.timeout) subprocess.check_call([Mirror.git_exe, 'fetch', remote]) return 0 diff --git a/recipes/README.recipes.md b/recipes/README.recipes.md index 8f1337a25..09b00b091 100644 --- a/recipes/README.recipes.md +++ b/recipes/README.recipes.md @@ -56,16 +56,20 @@ Recipe module to ensure a checkout is consistent on a bot. Wrapper for easy calling of bot_update. -— **def [deapply\_patch](/recipes/recipe_modules/bot_update/api.py#478)(self, bot_update_step):** +— **def [deapply\_patch](/recipes/recipe_modules/bot_update/api.py#498)(self, bot_update_step):** Deapplies a patch, taking care of DEPS and solution revisions properly. -— **def [ensure\_checkout](/recipes/recipe_modules/bot_update/api.py#68)(self, gclient_config=None, suffix=None, patch=True, update_presentation=True, patch_root=None, with_branch_heads=False, with_tags=False, refs=None, patch_oauth2=None, oauth2_json=None, use_site_config_creds=None, clobber=False, root_solution_revision=None, rietveld=None, issue=None, patchset=None, gerrit_no_reset=False, gerrit_no_rebase_patch_ref=False, disable_syntax_validation=False, manifest_name=None, patch_refs=None, ignore_input_commit=False, set_output_commit=False, step_test_data=None, \*\*kwargs):** +— **def [ensure\_checkout](/recipes/recipe_modules/bot_update/api.py#68)(self, gclient_config=None, suffix=None, patch=True, update_presentation=True, patch_root=None, with_branch_heads=False, with_tags=False, no_fetch_tags=False, refs=None, patch_oauth2=None, oauth2_json=None, use_site_config_creds=None, clobber=False, root_solution_revision=None, rietveld=None, issue=None, patchset=None, gerrit_no_reset=False, gerrit_no_rebase_patch_ref=False, disable_syntax_validation=False, manifest_name=None, patch_refs=None, ignore_input_commit=False, set_output_commit=False, step_test_data=None, \*\*kwargs):** Args: gclient_config: The gclient configuration to use when running bot_update. If omitted, the current gclient configuration is used. + no_fetch_tags: When true, the root git repo being checked out will not + fetch any tags referenced from the references being fetched. When a repo + has many references, it can become a performance bottleneck, so avoid + tags if the checkout will not need them present. disable_syntax_validation: (legacy) Disables syntax validation for DEPS. Needed as migration paths for recipes dealing with older revisions, such as bisect. @@ -83,7 +87,7 @@ Args: step_test_data: a null function that returns test bot_update.py output. Use test_api.output_json to generate test data. -— **def [get\_project\_revision\_properties](/recipes/recipe_modules/bot_update/api.py#455)(self, project_name, gclient_config=None):** +— **def [get\_project\_revision\_properties](/recipes/recipe_modules/bot_update/api.py#475)(self, project_name, gclient_config=None):** Returns all property names used for storing the checked-out revision of a given project. @@ -101,7 +105,7 @@ Returns (list of str): All properties that'll hold the checked-out revision   **@property**
— **def [last\_returned\_properties](/recipes/recipe_modules/bot_update/api.py#36)(self):** -— **def [resolve\_fixed\_revision](/recipes/recipe_modules/bot_update/api.py#405)(self, bot_update_json, name):** +— **def [resolve\_fixed\_revision](/recipes/recipe_modules/bot_update/api.py#425)(self, bot_update_json, name):** Set a fixed revision for a single dependency using project revision properties. diff --git a/recipes/recipe_modules/bot_update/api.py b/recipes/recipe_modules/bot_update/api.py index 200da9642..38acec94b 100644 --- a/recipes/recipe_modules/bot_update/api.py +++ b/recipes/recipe_modules/bot_update/api.py @@ -35,7 +35,7 @@ class BotUpdateApi(recipe_api.RecipeApi): @property def last_returned_properties(self): - return self._last_returned_properties + return self._last_returned_properties def _get_commit_repo_path(self, commit, gclient_config): """Returns local path to the repo that the commit is associated with. @@ -65,23 +65,41 @@ class BotUpdateApi(recipe_api.RecipeApi): return repo_path - def ensure_checkout(self, gclient_config=None, suffix=None, - patch=True, update_presentation=True, + def ensure_checkout(self, + gclient_config=None, + suffix=None, + patch=True, + update_presentation=True, patch_root=None, - with_branch_heads=False, with_tags=False, refs=None, - patch_oauth2=None, oauth2_json=None, - use_site_config_creds=None, clobber=False, - root_solution_revision=None, rietveld=None, issue=None, - patchset=None, gerrit_no_reset=False, + with_branch_heads=False, + with_tags=False, + no_fetch_tags=False, + refs=None, + patch_oauth2=None, + oauth2_json=None, + use_site_config_creds=None, + clobber=False, + root_solution_revision=None, + rietveld=None, + issue=None, + patchset=None, + gerrit_no_reset=False, gerrit_no_rebase_patch_ref=False, - disable_syntax_validation=False, manifest_name=None, - patch_refs=None, ignore_input_commit=False, - set_output_commit=False, step_test_data=None, + disable_syntax_validation=False, + manifest_name=None, + patch_refs=None, + ignore_input_commit=False, + set_output_commit=False, + step_test_data=None, **kwargs): """ Args: gclient_config: The gclient configuration to use when running bot_update. If omitted, the current gclient configuration is used. + no_fetch_tags: When true, the root git repo being checked out will not + fetch any tags referenced from the references being fetched. When a repo + has many references, it can become a performance bottleneck, so avoid + tags if the checkout will not need them present. disable_syntax_validation: (legacy) Disables syntax validation for DEPS. Needed as migration paths for recipes dealing with older revisions, such as bisect. @@ -238,6 +256,8 @@ class BotUpdateApi(recipe_api.RecipeApi): cmd.append('--with_tags') if gerrit_no_reset: cmd.append('--gerrit_no_reset') + if no_fetch_tags: + cmd.append('--no_fetch_tags') if gerrit_no_rebase_patch_ref: cmd.append('--gerrit_no_rebase_patch_ref') if disable_syntax_validation or cfg.disable_syntax_validation: @@ -490,4 +510,7 @@ class BotUpdateApi(recipe_api.RecipeApi): bot_update_json['properties'][rev_property]) self._resolve_fixed_revisions(bot_update_json) + # TODO(crbug.com/1019824): Pass |no_fetch_tags=True| here to prevent + # fetching 10k+ tags for chromium. This operation shouldn't need tags, it is + # removing a previously applied patch. self.ensure_checkout(patch=False, update_presentation=False) diff --git a/recipes/recipe_modules/bot_update/resources/bot_update.py b/recipes/recipe_modules/bot_update/resources/bot_update.py index b1d1fb854..10659d2e9 100755 --- a/recipes/recipe_modules/bot_update/resources/bot_update.py +++ b/recipes/recipe_modules/bot_update/resources/bot_update.py @@ -638,12 +638,14 @@ def _maybe_break_locks(checkout_path, tries=3): -def git_checkouts(solutions, revisions, refs, git_cache_dir, cleanup_dir): +def git_checkouts(solutions, revisions, refs, no_fetch_tags, git_cache_dir, + cleanup_dir): build_dir = os.getcwd() first_solution = True for sln in solutions: sln_dir = path.join(build_dir, sln['name']) - _git_checkout(sln, sln_dir, revisions, refs, git_cache_dir, cleanup_dir) + _git_checkout(sln, sln_dir, revisions, refs, no_fetch_tags, git_cache_dir, + cleanup_dir) if first_solution: git_ref = git('log', '--format=%H', '--max-count=1', cwd=path.join(build_dir, sln['name']) @@ -652,11 +654,14 @@ def git_checkouts(solutions, revisions, refs, git_cache_dir, cleanup_dir): return git_ref -def _git_checkout(sln, sln_dir, revisions, refs, git_cache_dir, cleanup_dir): +def _git_checkout(sln, sln_dir, revisions, refs, no_fetch_tags, git_cache_dir, + cleanup_dir): name = sln['name'] url = sln['url'] populate_cmd = (['cache', 'populate', '--ignore_locks', '-v', '--cache-dir', git_cache_dir, url, '--reset-fetch-config']) + if no_fetch_tags: + populate_cmd.extend(['--no-fetch-tags']) for ref in refs: populate_cmd.extend(['--ref', ref]) @@ -823,15 +828,16 @@ def emit_json(out_file, did_run, gclient_output=None, **kwargs): def ensure_checkout(solutions, revisions, first_sln, target_os, target_os_only, - target_cpu, patch_root, patch_refs, - gerrit_rebase_patch_ref, refs, git_cache_dir, - cleanup_dir, gerrit_reset, disable_syntax_validation): + target_cpu, patch_root, patch_refs, gerrit_rebase_patch_ref, + no_fetch_tags, refs, git_cache_dir, cleanup_dir, + gerrit_reset, disable_syntax_validation): # Get a checkout of each solution, without DEPS or hooks. # Calling git directly because there is no way to run Gclient without # invoking DEPS. print('Fetching Git checkout') - git_checkouts(solutions, revisions, refs, git_cache_dir, cleanup_dir) + git_checkouts(solutions, revisions, refs, no_fetch_tags, git_cache_dir, + cleanup_dir) # Ensure our build/ directory is set up with the correct .gclient file. gclient_configure(solutions, target_os, target_os_only, target_cpu, @@ -941,6 +947,12 @@ def parse_args(): 'Can prepend root@ to specify which repository, ' 'where root is either a filesystem path or git https ' 'url. To specify Tip of Tree, set rev to HEAD. ') + parse.add_option( + '--no_fetch_tags', + action='store_true', + help=('Don\'t fetch tags from the server for the git checkout. ' + 'This can speed up fetch considerably when ' + 'there are many tags.')) # TODO(machenbach): Remove the flag when all uses have been removed. parse.add_option('--output_manifest', action='store_true', help=('Deprecated.')) @@ -1072,6 +1084,9 @@ def checkout(options, git_slns, specs, revisions, step_text): patch_refs=options.patch_refs, gerrit_rebase_patch_ref=not options.gerrit_no_rebase_patch_ref, + # Control how the fetch step will occur. + no_fetch_tags=options.no_fetch_tags, + # Finally, extra configurations cleanup dir location. refs=options.refs, git_cache_dir=options.git_cache_dir, diff --git a/recipes/recipe_modules/bot_update/tests/ensure_checkout.py b/recipes/recipe_modules/bot_update/tests/ensure_checkout.py index c9043a57a..6615eaf8c 100644 --- a/recipes/recipe_modules/bot_update/tests/ensure_checkout.py +++ b/recipes/recipe_modules/bot_update/tests/ensure_checkout.py @@ -14,6 +14,7 @@ DEPS = [ def RunSteps(api): api.gclient.set_config('depot_tools') api.bot_update.ensure_checkout() + api.bot_update.ensure_checkout(no_fetch_tags=True) def GenTests(api): diff --git a/tests/bot_update_coverage_test.py b/tests/bot_update_coverage_test.py index 7e6eb3931..ecac91a75 100755 --- a/tests/bot_update_coverage_test.py +++ b/tests/bot_update_coverage_test.py @@ -151,6 +151,7 @@ class BotUpdateUnittests(unittest.TestCase): 'patch_root': None, 'patch_refs': [], 'gerrit_rebase_patch_ref': None, + 'no_fetch_tags': False, 'refs': [], 'git_cache_dir': '', 'cleanup_dir': None, @@ -208,6 +209,30 @@ class BotUpdateUnittests(unittest.TestCase): self.assertEqual(args[idx_third_revision+1], 'src/v8@deadbeef') return self.call.records + def testTagsByDefault(self): + bot_update.ensure_checkout(**self.params) + found = False + for record in self.call.records: + args = record[0] + if args[:3] == ('git', 'cache', 'populate'): + self.assertFalse('--no-fetch-tags' in args) + found = True + self.assertTrue(found) + return self.call.records + + def testNoTags(self): + params = self.params + params['no_fetch_tags'] = True + bot_update.ensure_checkout(**params) + found = False + for record in self.call.records: + args = record[0] + if args[:3] == ('git', 'cache', 'populate'): + self.assertTrue('--no-fetch-tags' in args) + found = True + self.assertTrue(found) + return self.call.records + def testApplyPatchOnGclient(self): ref = 'refs/changes/12/345/6' repo = 'https://chromium.googlesource.com/v8/v8' diff --git a/tests/git_cache_test.py b/tests/git_cache_test.py index 394555f26..5009aeca5 100755 --- a/tests/git_cache_test.py +++ b/tests/git_cache_test.py @@ -84,6 +84,36 @@ class GitCacheTest(unittest.TestCase): mirror.populate(reset_fetch_config=True) + def _makeGitRepoWithTag(self): + self.git(['init', '-q']) + with open(os.path.join(self.origin_dir, 'foo'), 'w') as f: + f.write('touched\n') + self.git(['add', 'foo']) + self.git(['commit', '-m', 'foo']) + self.git(['tag', 'TAG']) + self.git(['pack-refs']) + + def testPopulateFetchTagsByDefault(self): + self._makeGitRepoWithTag() + + # Default behaviour includes tags. + mirror = git_cache.Mirror(self.origin_dir) + mirror.populate() + + cache_dir = os.path.join(self.cache_dir, + mirror.UrlToCacheDir(self.origin_dir)) + self.assertTrue(os.path.exists(cache_dir + '/refs/tags/TAG')) + + def testPopulateFetchWithoutTags(self): + self._makeGitRepoWithTag() + + # Ask to not include tags. + mirror = git_cache.Mirror(self.origin_dir) + mirror.populate(no_fetch_tags=True) + + cache_dir = os.path.join(self.cache_dir, + mirror.UrlToCacheDir(self.origin_dir)) + self.assertFalse(os.path.exists(cache_dir + '/refs/tags/TAG')) def testPopulateResetFetchConfigEmptyFetchConfig(self): self.git(['init', '-q'])