Allow the bot_update api to not fetch tags from the git server.

There are 3 layers modified, starting from the bottom up:

1. git_cache.py populate

Now takes a --no-fetch-tags option. If specified, the cache will not
fetch updated tags from the server by passing --no-tags to git fetch.
This prevents the server from sending all the tag refs. In chromium
this prevents significant time bottlenecks dealing with 10k+ tags.

2. bot_update.py

bot_update has to deal with multiple git repos, it has the root repo
that is checked out through git-cache, as well as repos synched via
DEPS and gclient.

The script now takes a --no_fetch_tags option. If specified,
the git-cache populate call will include --no-fetch-tags. Otherwise, it
won't. So this controls (for chromium) if fetches to the src.git server
are passed --no-tags.

3. bot_update/api.py

This is the entry point for recipes and also has to deal with multiple
git repos. The behaviour at this point is not changed from the default.
A |no_fetch_tags| parameter is added to ensure_checkout() that defaults
to False.


This CL is a refactor with no intended behavior change.

The next step will be to change the chromium build repo to pass True
for ensure_checkout(no_fetch_tags) on chromium trybots.

This represents solution #2 in
https://docs.google.com/document/d/1hDIunJjjfpmr50y3YnZ4o3aq1NZF4gJa1TS0p7AIL64/edit#

Bug: 1019824
Change-Id: I935430603299daa9e301a95a5184c0ce486fd912
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/1894352
Reviewed-by: Aaron Gable <agable@chromium.org>
Reviewed-by: Erik Chen <erikchen@chromium.org>
Commit-Queue: danakj <danakj@chromium.org>
Auto-Submit: danakj <danakj@chromium.org>
changes/52/1894352/14
danakj 6 years ago committed by Commit Bot
parent f6a2232b48
commit c41f72cf44

@ -536,15 +536,18 @@ class Mirror(object):
'but failed. Continuing with non-optimized repository.'
% len(pack_files))
def _fetch(self, rundir, verbose, depth, reset_fetch_config):
def _fetch(self, rundir, verbose, depth, no_fetch_tags, reset_fetch_config):
self.config(rundir, reset_fetch_config)
v = []
d = []
t = []
if verbose:
v = ['-v', '--progress']
if depth:
d = ['--depth', str(depth)]
fetch_cmd = ['fetch'] + v + d + ['origin']
if no_fetch_tags:
t = ['--no-tags']
fetch_cmd = ['fetch'] + v + d + t + ['origin']
fetch_specs = subprocess.check_output(
[self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
cwd=rundir).strip().splitlines()
@ -585,8 +588,14 @@ class Mirror(object):
raise ClobberNeeded() # Corrupted cache.
logging.warn('Fetch of %s failed' % spec)
def populate(self, depth=None, shallow=False, bootstrap=False,
verbose=False, ignore_lock=False, lock_timeout=0,
def populate(self,
depth=None,
no_fetch_tags=False,
shallow=False,
bootstrap=False,
verbose=False,
ignore_lock=False,
lock_timeout=0,
reset_fetch_config=False):
assert self.GetCachePath()
if shallow and not depth:
@ -599,13 +608,15 @@ class Mirror(object):
try:
self._ensure_bootstrapped(depth, bootstrap)
self._fetch(self.mirror_path, verbose, depth, reset_fetch_config)
self._fetch(self.mirror_path, verbose, depth, no_fetch_tags,
reset_fetch_config)
except ClobberNeeded:
# This is a major failure, we need to clean and force a bootstrap.
gclient_utils.rmtree(self.mirror_path)
self.print(GIT_CACHE_CORRUPT_MESSAGE)
self._ensure_bootstrapped(depth, bootstrap, force=True)
self._fetch(self.mirror_path, verbose, depth, reset_fetch_config)
self._fetch(self.mirror_path, verbose, depth, no_fetch_tags,
reset_fetch_config)
finally:
if not ignore_lock:
lockfile.unlock()
@ -769,6 +780,11 @@ def CMDpopulate(parser, args):
"""Ensure that the cache has all up-to-date objects for the given repo."""
parser.add_option('--depth', type='int',
help='Only cache DEPTH commits of history')
parser.add_option(
'--no-fetch-tags',
action='store_true',
help=('Don\'t fetch tags from the server. This can speed up '
'fetch considerably when there are many tags.'))
parser.add_option('--shallow', '-s', action='store_true',
help='Only cache 10000 commits of history')
parser.add_option('--ref', action='append',
@ -794,6 +810,7 @@ def CMDpopulate(parser, args):
if options.break_locks:
mirror.unlock()
kwargs = {
'no_fetch_tags': options.no_fetch_tags,
'verbose': options.verbose,
'shallow': options.shallow,
'bootstrap': not options.no_bootstrap,
@ -813,6 +830,11 @@ def CMDfetch(parser, args):
parser.add_option('--no_bootstrap', '--no-bootstrap',
action='store_true',
help='Don\'t (re)bootstrap from Google Storage')
parser.add_option(
'--no-fetch-tags',
action='store_true',
help=('Don\'t fetch tags from the server. This can speed up '
'fetch considerably when there are many tags.'))
options, args = parser.parse_args(args)
# Figure out which remotes to fetch. This mimics the behavior of regular
@ -844,7 +866,9 @@ def CMDfetch(parser, args):
if git_dir.startswith(cachepath):
mirror = Mirror.FromPath(git_dir)
mirror.populate(
bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
bootstrap=not options.no_bootstrap,
no_fetch_tags=options.no_fetch_tags,
lock_timeout=options.timeout)
return 0
for remote in remotes:
remote_url = subprocess.check_output(
@ -854,7 +878,9 @@ def CMDfetch(parser, args):
mirror.print = lambda *args: None
print('Updating git cache...')
mirror.populate(
bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
bootstrap=not options.no_bootstrap,
no_fetch_tags=options.no_fetch_tags,
lock_timeout=options.timeout)
subprocess.check_call([Mirror.git_exe, 'fetch', remote])
return 0

@ -56,16 +56,20 @@ Recipe module to ensure a checkout is consistent on a bot.
Wrapper for easy calling of bot_update.
&mdash; **def [deapply\_patch](/recipes/recipe_modules/bot_update/api.py#478)(self, bot_update_step):**
&mdash; **def [deapply\_patch](/recipes/recipe_modules/bot_update/api.py#498)(self, bot_update_step):**
Deapplies a patch, taking care of DEPS and solution revisions properly.
&mdash; **def [ensure\_checkout](/recipes/recipe_modules/bot_update/api.py#68)(self, gclient_config=None, suffix=None, patch=True, update_presentation=True, patch_root=None, with_branch_heads=False, with_tags=False, refs=None, patch_oauth2=None, oauth2_json=None, use_site_config_creds=None, clobber=False, root_solution_revision=None, rietveld=None, issue=None, patchset=None, gerrit_no_reset=False, gerrit_no_rebase_patch_ref=False, disable_syntax_validation=False, manifest_name=None, patch_refs=None, ignore_input_commit=False, set_output_commit=False, step_test_data=None, \*\*kwargs):**
&mdash; **def [ensure\_checkout](/recipes/recipe_modules/bot_update/api.py#68)(self, gclient_config=None, suffix=None, patch=True, update_presentation=True, patch_root=None, with_branch_heads=False, with_tags=False, no_fetch_tags=False, refs=None, patch_oauth2=None, oauth2_json=None, use_site_config_creds=None, clobber=False, root_solution_revision=None, rietveld=None, issue=None, patchset=None, gerrit_no_reset=False, gerrit_no_rebase_patch_ref=False, disable_syntax_validation=False, manifest_name=None, patch_refs=None, ignore_input_commit=False, set_output_commit=False, step_test_data=None, \*\*kwargs):**
Args:
gclient_config: The gclient configuration to use when running bot_update.
If omitted, the current gclient configuration is used.
no_fetch_tags: When true, the root git repo being checked out will not
fetch any tags referenced from the references being fetched. When a repo
has many references, it can become a performance bottleneck, so avoid
tags if the checkout will not need them present.
disable_syntax_validation: (legacy) Disables syntax validation for DEPS.
Needed as migration paths for recipes dealing with older revisions,
such as bisect.
@ -83,7 +87,7 @@ Args:
step_test_data: a null function that returns test bot_update.py output.
Use test_api.output_json to generate test data.
&mdash; **def [get\_project\_revision\_properties](/recipes/recipe_modules/bot_update/api.py#455)(self, project_name, gclient_config=None):**
&mdash; **def [get\_project\_revision\_properties](/recipes/recipe_modules/bot_update/api.py#475)(self, project_name, gclient_config=None):**
Returns all property names used for storing the checked-out revision of
a given project.
@ -101,7 +105,7 @@ Returns (list of str): All properties that'll hold the checked-out revision
&emsp; **@property**<br>&mdash; **def [last\_returned\_properties](/recipes/recipe_modules/bot_update/api.py#36)(self):**
&mdash; **def [resolve\_fixed\_revision](/recipes/recipe_modules/bot_update/api.py#405)(self, bot_update_json, name):**
&mdash; **def [resolve\_fixed\_revision](/recipes/recipe_modules/bot_update/api.py#425)(self, bot_update_json, name):**
Set a fixed revision for a single dependency using project revision
properties.

@ -35,7 +35,7 @@ class BotUpdateApi(recipe_api.RecipeApi):
@property
def last_returned_properties(self):
return self._last_returned_properties
return self._last_returned_properties
def _get_commit_repo_path(self, commit, gclient_config):
"""Returns local path to the repo that the commit is associated with.
@ -65,23 +65,41 @@ class BotUpdateApi(recipe_api.RecipeApi):
return repo_path
def ensure_checkout(self, gclient_config=None, suffix=None,
patch=True, update_presentation=True,
def ensure_checkout(self,
gclient_config=None,
suffix=None,
patch=True,
update_presentation=True,
patch_root=None,
with_branch_heads=False, with_tags=False, refs=None,
patch_oauth2=None, oauth2_json=None,
use_site_config_creds=None, clobber=False,
root_solution_revision=None, rietveld=None, issue=None,
patchset=None, gerrit_no_reset=False,
with_branch_heads=False,
with_tags=False,
no_fetch_tags=False,
refs=None,
patch_oauth2=None,
oauth2_json=None,
use_site_config_creds=None,
clobber=False,
root_solution_revision=None,
rietveld=None,
issue=None,
patchset=None,
gerrit_no_reset=False,
gerrit_no_rebase_patch_ref=False,
disable_syntax_validation=False, manifest_name=None,
patch_refs=None, ignore_input_commit=False,
set_output_commit=False, step_test_data=None,
disable_syntax_validation=False,
manifest_name=None,
patch_refs=None,
ignore_input_commit=False,
set_output_commit=False,
step_test_data=None,
**kwargs):
"""
Args:
gclient_config: The gclient configuration to use when running bot_update.
If omitted, the current gclient configuration is used.
no_fetch_tags: When true, the root git repo being checked out will not
fetch any tags referenced from the references being fetched. When a repo
has many references, it can become a performance bottleneck, so avoid
tags if the checkout will not need them present.
disable_syntax_validation: (legacy) Disables syntax validation for DEPS.
Needed as migration paths for recipes dealing with older revisions,
such as bisect.
@ -238,6 +256,8 @@ class BotUpdateApi(recipe_api.RecipeApi):
cmd.append('--with_tags')
if gerrit_no_reset:
cmd.append('--gerrit_no_reset')
if no_fetch_tags:
cmd.append('--no_fetch_tags')
if gerrit_no_rebase_patch_ref:
cmd.append('--gerrit_no_rebase_patch_ref')
if disable_syntax_validation or cfg.disable_syntax_validation:
@ -490,4 +510,7 @@ class BotUpdateApi(recipe_api.RecipeApi):
bot_update_json['properties'][rev_property])
self._resolve_fixed_revisions(bot_update_json)
# TODO(crbug.com/1019824): Pass |no_fetch_tags=True| here to prevent
# fetching 10k+ tags for chromium. This operation shouldn't need tags, it is
# removing a previously applied patch.
self.ensure_checkout(patch=False, update_presentation=False)

@ -638,12 +638,14 @@ def _maybe_break_locks(checkout_path, tries=3):
def git_checkouts(solutions, revisions, refs, git_cache_dir, cleanup_dir):
def git_checkouts(solutions, revisions, refs, no_fetch_tags, git_cache_dir,
cleanup_dir):
build_dir = os.getcwd()
first_solution = True
for sln in solutions:
sln_dir = path.join(build_dir, sln['name'])
_git_checkout(sln, sln_dir, revisions, refs, git_cache_dir, cleanup_dir)
_git_checkout(sln, sln_dir, revisions, refs, no_fetch_tags, git_cache_dir,
cleanup_dir)
if first_solution:
git_ref = git('log', '--format=%H', '--max-count=1',
cwd=path.join(build_dir, sln['name'])
@ -652,11 +654,14 @@ def git_checkouts(solutions, revisions, refs, git_cache_dir, cleanup_dir):
return git_ref
def _git_checkout(sln, sln_dir, revisions, refs, git_cache_dir, cleanup_dir):
def _git_checkout(sln, sln_dir, revisions, refs, no_fetch_tags, git_cache_dir,
cleanup_dir):
name = sln['name']
url = sln['url']
populate_cmd = (['cache', 'populate', '--ignore_locks', '-v',
'--cache-dir', git_cache_dir, url, '--reset-fetch-config'])
if no_fetch_tags:
populate_cmd.extend(['--no-fetch-tags'])
for ref in refs:
populate_cmd.extend(['--ref', ref])
@ -823,15 +828,16 @@ def emit_json(out_file, did_run, gclient_output=None, **kwargs):
def ensure_checkout(solutions, revisions, first_sln, target_os, target_os_only,
target_cpu, patch_root, patch_refs,
gerrit_rebase_patch_ref, refs, git_cache_dir,
cleanup_dir, gerrit_reset, disable_syntax_validation):
target_cpu, patch_root, patch_refs, gerrit_rebase_patch_ref,
no_fetch_tags, refs, git_cache_dir, cleanup_dir,
gerrit_reset, disable_syntax_validation):
# Get a checkout of each solution, without DEPS or hooks.
# Calling git directly because there is no way to run Gclient without
# invoking DEPS.
print('Fetching Git checkout')
git_checkouts(solutions, revisions, refs, git_cache_dir, cleanup_dir)
git_checkouts(solutions, revisions, refs, no_fetch_tags, git_cache_dir,
cleanup_dir)
# Ensure our build/ directory is set up with the correct .gclient file.
gclient_configure(solutions, target_os, target_os_only, target_cpu,
@ -941,6 +947,12 @@ def parse_args():
'Can prepend root@<rev> to specify which repository, '
'where root is either a filesystem path or git https '
'url. To specify Tip of Tree, set rev to HEAD. ')
parse.add_option(
'--no_fetch_tags',
action='store_true',
help=('Don\'t fetch tags from the server for the git checkout. '
'This can speed up fetch considerably when '
'there are many tags.'))
# TODO(machenbach): Remove the flag when all uses have been removed.
parse.add_option('--output_manifest', action='store_true',
help=('Deprecated.'))
@ -1072,6 +1084,9 @@ def checkout(options, git_slns, specs, revisions, step_text):
patch_refs=options.patch_refs,
gerrit_rebase_patch_ref=not options.gerrit_no_rebase_patch_ref,
# Control how the fetch step will occur.
no_fetch_tags=options.no_fetch_tags,
# Finally, extra configurations cleanup dir location.
refs=options.refs,
git_cache_dir=options.git_cache_dir,

@ -14,6 +14,7 @@ DEPS = [
def RunSteps(api):
api.gclient.set_config('depot_tools')
api.bot_update.ensure_checkout()
api.bot_update.ensure_checkout(no_fetch_tags=True)
def GenTests(api):

@ -151,6 +151,7 @@ class BotUpdateUnittests(unittest.TestCase):
'patch_root': None,
'patch_refs': [],
'gerrit_rebase_patch_ref': None,
'no_fetch_tags': False,
'refs': [],
'git_cache_dir': '',
'cleanup_dir': None,
@ -208,6 +209,30 @@ class BotUpdateUnittests(unittest.TestCase):
self.assertEqual(args[idx_third_revision+1], 'src/v8@deadbeef')
return self.call.records
def testTagsByDefault(self):
bot_update.ensure_checkout(**self.params)
found = False
for record in self.call.records:
args = record[0]
if args[:3] == ('git', 'cache', 'populate'):
self.assertFalse('--no-fetch-tags' in args)
found = True
self.assertTrue(found)
return self.call.records
def testNoTags(self):
params = self.params
params['no_fetch_tags'] = True
bot_update.ensure_checkout(**params)
found = False
for record in self.call.records:
args = record[0]
if args[:3] == ('git', 'cache', 'populate'):
self.assertTrue('--no-fetch-tags' in args)
found = True
self.assertTrue(found)
return self.call.records
def testApplyPatchOnGclient(self):
ref = 'refs/changes/12/345/6'
repo = 'https://chromium.googlesource.com/v8/v8'

@ -84,6 +84,36 @@ class GitCacheTest(unittest.TestCase):
mirror.populate(reset_fetch_config=True)
def _makeGitRepoWithTag(self):
self.git(['init', '-q'])
with open(os.path.join(self.origin_dir, 'foo'), 'w') as f:
f.write('touched\n')
self.git(['add', 'foo'])
self.git(['commit', '-m', 'foo'])
self.git(['tag', 'TAG'])
self.git(['pack-refs'])
def testPopulateFetchTagsByDefault(self):
self._makeGitRepoWithTag()
# Default behaviour includes tags.
mirror = git_cache.Mirror(self.origin_dir)
mirror.populate()
cache_dir = os.path.join(self.cache_dir,
mirror.UrlToCacheDir(self.origin_dir))
self.assertTrue(os.path.exists(cache_dir + '/refs/tags/TAG'))
def testPopulateFetchWithoutTags(self):
self._makeGitRepoWithTag()
# Ask to not include tags.
mirror = git_cache.Mirror(self.origin_dir)
mirror.populate(no_fetch_tags=True)
cache_dir = os.path.join(self.cache_dir,
mirror.UrlToCacheDir(self.origin_dir))
self.assertFalse(os.path.exists(cache_dir + '/refs/tags/TAG'))
def testPopulateResetFetchConfigEmptyFetchConfig(self):
self.git(['init', '-q'])

Loading…
Cancel
Save