Allow the bot_update api to not fetch tags from the git server.

There are 3 layers modified, starting from the bottom up:

1. git_cache.py populate

Now takes a --no-fetch-tags option. If specified, the cache will not
fetch updated tags from the server by passing --no-tags to git fetch.
This prevents the server from sending all the tag refs. In chromium
this prevents significant time bottlenecks dealing with 10k+ tags.

2. bot_update.py

bot_update has to deal with multiple git repos, it has the root repo
that is checked out through git-cache, as well as repos synched via
DEPS and gclient.

The script now takes a --no_fetch_tags option. If specified,
the git-cache populate call will include --no-fetch-tags. Otherwise, it
won't. So this controls (for chromium) if fetches to the src.git server
are passed --no-tags.

3. bot_update/api.py

This is the entry point for recipes and also has to deal with multiple
git repos. The behaviour at this point is not changed from the default.
A |no_fetch_tags| parameter is added to ensure_checkout() that defaults
to False.


This CL is a refactor with no intended behavior change.

The next step will be to change the chromium build repo to pass True
for ensure_checkout(no_fetch_tags) on chromium trybots.

This represents solution #2 in
https://docs.google.com/document/d/1hDIunJjjfpmr50y3YnZ4o3aq1NZF4gJa1TS0p7AIL64/edit#

Bug: 1019824
Change-Id: I935430603299daa9e301a95a5184c0ce486fd912
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/1894352
Reviewed-by: Aaron Gable <agable@chromium.org>
Reviewed-by: Erik Chen <erikchen@chromium.org>
Commit-Queue: danakj <danakj@chromium.org>
Auto-Submit: danakj <danakj@chromium.org>
changes/52/1894352/14
danakj 6 years ago committed by Commit Bot
parent f6a2232b48
commit c41f72cf44

@ -536,15 +536,18 @@ class Mirror(object):
'but failed. Continuing with non-optimized repository.' 'but failed. Continuing with non-optimized repository.'
% len(pack_files)) % len(pack_files))
def _fetch(self, rundir, verbose, depth, reset_fetch_config): def _fetch(self, rundir, verbose, depth, no_fetch_tags, reset_fetch_config):
self.config(rundir, reset_fetch_config) self.config(rundir, reset_fetch_config)
v = [] v = []
d = [] d = []
t = []
if verbose: if verbose:
v = ['-v', '--progress'] v = ['-v', '--progress']
if depth: if depth:
d = ['--depth', str(depth)] d = ['--depth', str(depth)]
fetch_cmd = ['fetch'] + v + d + ['origin'] if no_fetch_tags:
t = ['--no-tags']
fetch_cmd = ['fetch'] + v + d + t + ['origin']
fetch_specs = subprocess.check_output( fetch_specs = subprocess.check_output(
[self.git_exe, 'config', '--get-all', 'remote.origin.fetch'], [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
cwd=rundir).strip().splitlines() cwd=rundir).strip().splitlines()
@ -585,8 +588,14 @@ class Mirror(object):
raise ClobberNeeded() # Corrupted cache. raise ClobberNeeded() # Corrupted cache.
logging.warn('Fetch of %s failed' % spec) logging.warn('Fetch of %s failed' % spec)
def populate(self, depth=None, shallow=False, bootstrap=False, def populate(self,
verbose=False, ignore_lock=False, lock_timeout=0, depth=None,
no_fetch_tags=False,
shallow=False,
bootstrap=False,
verbose=False,
ignore_lock=False,
lock_timeout=0,
reset_fetch_config=False): reset_fetch_config=False):
assert self.GetCachePath() assert self.GetCachePath()
if shallow and not depth: if shallow and not depth:
@ -599,13 +608,15 @@ class Mirror(object):
try: try:
self._ensure_bootstrapped(depth, bootstrap) self._ensure_bootstrapped(depth, bootstrap)
self._fetch(self.mirror_path, verbose, depth, reset_fetch_config) self._fetch(self.mirror_path, verbose, depth, no_fetch_tags,
reset_fetch_config)
except ClobberNeeded: except ClobberNeeded:
# This is a major failure, we need to clean and force a bootstrap. # This is a major failure, we need to clean and force a bootstrap.
gclient_utils.rmtree(self.mirror_path) gclient_utils.rmtree(self.mirror_path)
self.print(GIT_CACHE_CORRUPT_MESSAGE) self.print(GIT_CACHE_CORRUPT_MESSAGE)
self._ensure_bootstrapped(depth, bootstrap, force=True) self._ensure_bootstrapped(depth, bootstrap, force=True)
self._fetch(self.mirror_path, verbose, depth, reset_fetch_config) self._fetch(self.mirror_path, verbose, depth, no_fetch_tags,
reset_fetch_config)
finally: finally:
if not ignore_lock: if not ignore_lock:
lockfile.unlock() lockfile.unlock()
@ -769,6 +780,11 @@ def CMDpopulate(parser, args):
"""Ensure that the cache has all up-to-date objects for the given repo.""" """Ensure that the cache has all up-to-date objects for the given repo."""
parser.add_option('--depth', type='int', parser.add_option('--depth', type='int',
help='Only cache DEPTH commits of history') help='Only cache DEPTH commits of history')
parser.add_option(
'--no-fetch-tags',
action='store_true',
help=('Don\'t fetch tags from the server. This can speed up '
'fetch considerably when there are many tags.'))
parser.add_option('--shallow', '-s', action='store_true', parser.add_option('--shallow', '-s', action='store_true',
help='Only cache 10000 commits of history') help='Only cache 10000 commits of history')
parser.add_option('--ref', action='append', parser.add_option('--ref', action='append',
@ -794,6 +810,7 @@ def CMDpopulate(parser, args):
if options.break_locks: if options.break_locks:
mirror.unlock() mirror.unlock()
kwargs = { kwargs = {
'no_fetch_tags': options.no_fetch_tags,
'verbose': options.verbose, 'verbose': options.verbose,
'shallow': options.shallow, 'shallow': options.shallow,
'bootstrap': not options.no_bootstrap, 'bootstrap': not options.no_bootstrap,
@ -813,6 +830,11 @@ def CMDfetch(parser, args):
parser.add_option('--no_bootstrap', '--no-bootstrap', parser.add_option('--no_bootstrap', '--no-bootstrap',
action='store_true', action='store_true',
help='Don\'t (re)bootstrap from Google Storage') help='Don\'t (re)bootstrap from Google Storage')
parser.add_option(
'--no-fetch-tags',
action='store_true',
help=('Don\'t fetch tags from the server. This can speed up '
'fetch considerably when there are many tags.'))
options, args = parser.parse_args(args) options, args = parser.parse_args(args)
# Figure out which remotes to fetch. This mimics the behavior of regular # Figure out which remotes to fetch. This mimics the behavior of regular
@ -844,7 +866,9 @@ def CMDfetch(parser, args):
if git_dir.startswith(cachepath): if git_dir.startswith(cachepath):
mirror = Mirror.FromPath(git_dir) mirror = Mirror.FromPath(git_dir)
mirror.populate( mirror.populate(
bootstrap=not options.no_bootstrap, lock_timeout=options.timeout) bootstrap=not options.no_bootstrap,
no_fetch_tags=options.no_fetch_tags,
lock_timeout=options.timeout)
return 0 return 0
for remote in remotes: for remote in remotes:
remote_url = subprocess.check_output( remote_url = subprocess.check_output(
@ -854,7 +878,9 @@ def CMDfetch(parser, args):
mirror.print = lambda *args: None mirror.print = lambda *args: None
print('Updating git cache...') print('Updating git cache...')
mirror.populate( mirror.populate(
bootstrap=not options.no_bootstrap, lock_timeout=options.timeout) bootstrap=not options.no_bootstrap,
no_fetch_tags=options.no_fetch_tags,
lock_timeout=options.timeout)
subprocess.check_call([Mirror.git_exe, 'fetch', remote]) subprocess.check_call([Mirror.git_exe, 'fetch', remote])
return 0 return 0

@ -56,16 +56,20 @@ Recipe module to ensure a checkout is consistent on a bot.
Wrapper for easy calling of bot_update. Wrapper for easy calling of bot_update.
&mdash; **def [deapply\_patch](/recipes/recipe_modules/bot_update/api.py#478)(self, bot_update_step):** &mdash; **def [deapply\_patch](/recipes/recipe_modules/bot_update/api.py#498)(self, bot_update_step):**
Deapplies a patch, taking care of DEPS and solution revisions properly. Deapplies a patch, taking care of DEPS and solution revisions properly.
&mdash; **def [ensure\_checkout](/recipes/recipe_modules/bot_update/api.py#68)(self, gclient_config=None, suffix=None, patch=True, update_presentation=True, patch_root=None, with_branch_heads=False, with_tags=False, refs=None, patch_oauth2=None, oauth2_json=None, use_site_config_creds=None, clobber=False, root_solution_revision=None, rietveld=None, issue=None, patchset=None, gerrit_no_reset=False, gerrit_no_rebase_patch_ref=False, disable_syntax_validation=False, manifest_name=None, patch_refs=None, ignore_input_commit=False, set_output_commit=False, step_test_data=None, \*\*kwargs):** &mdash; **def [ensure\_checkout](/recipes/recipe_modules/bot_update/api.py#68)(self, gclient_config=None, suffix=None, patch=True, update_presentation=True, patch_root=None, with_branch_heads=False, with_tags=False, no_fetch_tags=False, refs=None, patch_oauth2=None, oauth2_json=None, use_site_config_creds=None, clobber=False, root_solution_revision=None, rietveld=None, issue=None, patchset=None, gerrit_no_reset=False, gerrit_no_rebase_patch_ref=False, disable_syntax_validation=False, manifest_name=None, patch_refs=None, ignore_input_commit=False, set_output_commit=False, step_test_data=None, \*\*kwargs):**
Args: Args:
gclient_config: The gclient configuration to use when running bot_update. gclient_config: The gclient configuration to use when running bot_update.
If omitted, the current gclient configuration is used. If omitted, the current gclient configuration is used.
no_fetch_tags: When true, the root git repo being checked out will not
fetch any tags referenced from the references being fetched. When a repo
has many references, it can become a performance bottleneck, so avoid
tags if the checkout will not need them present.
disable_syntax_validation: (legacy) Disables syntax validation for DEPS. disable_syntax_validation: (legacy) Disables syntax validation for DEPS.
Needed as migration paths for recipes dealing with older revisions, Needed as migration paths for recipes dealing with older revisions,
such as bisect. such as bisect.
@ -83,7 +87,7 @@ Args:
step_test_data: a null function that returns test bot_update.py output. step_test_data: a null function that returns test bot_update.py output.
Use test_api.output_json to generate test data. Use test_api.output_json to generate test data.
&mdash; **def [get\_project\_revision\_properties](/recipes/recipe_modules/bot_update/api.py#455)(self, project_name, gclient_config=None):** &mdash; **def [get\_project\_revision\_properties](/recipes/recipe_modules/bot_update/api.py#475)(self, project_name, gclient_config=None):**
Returns all property names used for storing the checked-out revision of Returns all property names used for storing the checked-out revision of
a given project. a given project.
@ -101,7 +105,7 @@ Returns (list of str): All properties that'll hold the checked-out revision
&emsp; **@property**<br>&mdash; **def [last\_returned\_properties](/recipes/recipe_modules/bot_update/api.py#36)(self):** &emsp; **@property**<br>&mdash; **def [last\_returned\_properties](/recipes/recipe_modules/bot_update/api.py#36)(self):**
&mdash; **def [resolve\_fixed\_revision](/recipes/recipe_modules/bot_update/api.py#405)(self, bot_update_json, name):** &mdash; **def [resolve\_fixed\_revision](/recipes/recipe_modules/bot_update/api.py#425)(self, bot_update_json, name):**
Set a fixed revision for a single dependency using project revision Set a fixed revision for a single dependency using project revision
properties. properties.

@ -35,7 +35,7 @@ class BotUpdateApi(recipe_api.RecipeApi):
@property @property
def last_returned_properties(self): def last_returned_properties(self):
return self._last_returned_properties return self._last_returned_properties
def _get_commit_repo_path(self, commit, gclient_config): def _get_commit_repo_path(self, commit, gclient_config):
"""Returns local path to the repo that the commit is associated with. """Returns local path to the repo that the commit is associated with.
@ -65,23 +65,41 @@ class BotUpdateApi(recipe_api.RecipeApi):
return repo_path return repo_path
def ensure_checkout(self, gclient_config=None, suffix=None, def ensure_checkout(self,
patch=True, update_presentation=True, gclient_config=None,
suffix=None,
patch=True,
update_presentation=True,
patch_root=None, patch_root=None,
with_branch_heads=False, with_tags=False, refs=None, with_branch_heads=False,
patch_oauth2=None, oauth2_json=None, with_tags=False,
use_site_config_creds=None, clobber=False, no_fetch_tags=False,
root_solution_revision=None, rietveld=None, issue=None, refs=None,
patchset=None, gerrit_no_reset=False, patch_oauth2=None,
oauth2_json=None,
use_site_config_creds=None,
clobber=False,
root_solution_revision=None,
rietveld=None,
issue=None,
patchset=None,
gerrit_no_reset=False,
gerrit_no_rebase_patch_ref=False, gerrit_no_rebase_patch_ref=False,
disable_syntax_validation=False, manifest_name=None, disable_syntax_validation=False,
patch_refs=None, ignore_input_commit=False, manifest_name=None,
set_output_commit=False, step_test_data=None, patch_refs=None,
ignore_input_commit=False,
set_output_commit=False,
step_test_data=None,
**kwargs): **kwargs):
""" """
Args: Args:
gclient_config: The gclient configuration to use when running bot_update. gclient_config: The gclient configuration to use when running bot_update.
If omitted, the current gclient configuration is used. If omitted, the current gclient configuration is used.
no_fetch_tags: When true, the root git repo being checked out will not
fetch any tags referenced from the references being fetched. When a repo
has many references, it can become a performance bottleneck, so avoid
tags if the checkout will not need them present.
disable_syntax_validation: (legacy) Disables syntax validation for DEPS. disable_syntax_validation: (legacy) Disables syntax validation for DEPS.
Needed as migration paths for recipes dealing with older revisions, Needed as migration paths for recipes dealing with older revisions,
such as bisect. such as bisect.
@ -238,6 +256,8 @@ class BotUpdateApi(recipe_api.RecipeApi):
cmd.append('--with_tags') cmd.append('--with_tags')
if gerrit_no_reset: if gerrit_no_reset:
cmd.append('--gerrit_no_reset') cmd.append('--gerrit_no_reset')
if no_fetch_tags:
cmd.append('--no_fetch_tags')
if gerrit_no_rebase_patch_ref: if gerrit_no_rebase_patch_ref:
cmd.append('--gerrit_no_rebase_patch_ref') cmd.append('--gerrit_no_rebase_patch_ref')
if disable_syntax_validation or cfg.disable_syntax_validation: if disable_syntax_validation or cfg.disable_syntax_validation:
@ -490,4 +510,7 @@ class BotUpdateApi(recipe_api.RecipeApi):
bot_update_json['properties'][rev_property]) bot_update_json['properties'][rev_property])
self._resolve_fixed_revisions(bot_update_json) self._resolve_fixed_revisions(bot_update_json)
# TODO(crbug.com/1019824): Pass |no_fetch_tags=True| here to prevent
# fetching 10k+ tags for chromium. This operation shouldn't need tags, it is
# removing a previously applied patch.
self.ensure_checkout(patch=False, update_presentation=False) self.ensure_checkout(patch=False, update_presentation=False)

@ -638,12 +638,14 @@ def _maybe_break_locks(checkout_path, tries=3):
def git_checkouts(solutions, revisions, refs, git_cache_dir, cleanup_dir): def git_checkouts(solutions, revisions, refs, no_fetch_tags, git_cache_dir,
cleanup_dir):
build_dir = os.getcwd() build_dir = os.getcwd()
first_solution = True first_solution = True
for sln in solutions: for sln in solutions:
sln_dir = path.join(build_dir, sln['name']) sln_dir = path.join(build_dir, sln['name'])
_git_checkout(sln, sln_dir, revisions, refs, git_cache_dir, cleanup_dir) _git_checkout(sln, sln_dir, revisions, refs, no_fetch_tags, git_cache_dir,
cleanup_dir)
if first_solution: if first_solution:
git_ref = git('log', '--format=%H', '--max-count=1', git_ref = git('log', '--format=%H', '--max-count=1',
cwd=path.join(build_dir, sln['name']) cwd=path.join(build_dir, sln['name'])
@ -652,11 +654,14 @@ def git_checkouts(solutions, revisions, refs, git_cache_dir, cleanup_dir):
return git_ref return git_ref
def _git_checkout(sln, sln_dir, revisions, refs, git_cache_dir, cleanup_dir): def _git_checkout(sln, sln_dir, revisions, refs, no_fetch_tags, git_cache_dir,
cleanup_dir):
name = sln['name'] name = sln['name']
url = sln['url'] url = sln['url']
populate_cmd = (['cache', 'populate', '--ignore_locks', '-v', populate_cmd = (['cache', 'populate', '--ignore_locks', '-v',
'--cache-dir', git_cache_dir, url, '--reset-fetch-config']) '--cache-dir', git_cache_dir, url, '--reset-fetch-config'])
if no_fetch_tags:
populate_cmd.extend(['--no-fetch-tags'])
for ref in refs: for ref in refs:
populate_cmd.extend(['--ref', ref]) populate_cmd.extend(['--ref', ref])
@ -823,15 +828,16 @@ def emit_json(out_file, did_run, gclient_output=None, **kwargs):
def ensure_checkout(solutions, revisions, first_sln, target_os, target_os_only, def ensure_checkout(solutions, revisions, first_sln, target_os, target_os_only,
target_cpu, patch_root, patch_refs, target_cpu, patch_root, patch_refs, gerrit_rebase_patch_ref,
gerrit_rebase_patch_ref, refs, git_cache_dir, no_fetch_tags, refs, git_cache_dir, cleanup_dir,
cleanup_dir, gerrit_reset, disable_syntax_validation): gerrit_reset, disable_syntax_validation):
# Get a checkout of each solution, without DEPS or hooks. # Get a checkout of each solution, without DEPS or hooks.
# Calling git directly because there is no way to run Gclient without # Calling git directly because there is no way to run Gclient without
# invoking DEPS. # invoking DEPS.
print('Fetching Git checkout') print('Fetching Git checkout')
git_checkouts(solutions, revisions, refs, git_cache_dir, cleanup_dir) git_checkouts(solutions, revisions, refs, no_fetch_tags, git_cache_dir,
cleanup_dir)
# Ensure our build/ directory is set up with the correct .gclient file. # Ensure our build/ directory is set up with the correct .gclient file.
gclient_configure(solutions, target_os, target_os_only, target_cpu, gclient_configure(solutions, target_os, target_os_only, target_cpu,
@ -941,6 +947,12 @@ def parse_args():
'Can prepend root@<rev> to specify which repository, ' 'Can prepend root@<rev> to specify which repository, '
'where root is either a filesystem path or git https ' 'where root is either a filesystem path or git https '
'url. To specify Tip of Tree, set rev to HEAD. ') 'url. To specify Tip of Tree, set rev to HEAD. ')
parse.add_option(
'--no_fetch_tags',
action='store_true',
help=('Don\'t fetch tags from the server for the git checkout. '
'This can speed up fetch considerably when '
'there are many tags.'))
# TODO(machenbach): Remove the flag when all uses have been removed. # TODO(machenbach): Remove the flag when all uses have been removed.
parse.add_option('--output_manifest', action='store_true', parse.add_option('--output_manifest', action='store_true',
help=('Deprecated.')) help=('Deprecated.'))
@ -1072,6 +1084,9 @@ def checkout(options, git_slns, specs, revisions, step_text):
patch_refs=options.patch_refs, patch_refs=options.patch_refs,
gerrit_rebase_patch_ref=not options.gerrit_no_rebase_patch_ref, gerrit_rebase_patch_ref=not options.gerrit_no_rebase_patch_ref,
# Control how the fetch step will occur.
no_fetch_tags=options.no_fetch_tags,
# Finally, extra configurations cleanup dir location. # Finally, extra configurations cleanup dir location.
refs=options.refs, refs=options.refs,
git_cache_dir=options.git_cache_dir, git_cache_dir=options.git_cache_dir,

@ -14,6 +14,7 @@ DEPS = [
def RunSteps(api): def RunSteps(api):
api.gclient.set_config('depot_tools') api.gclient.set_config('depot_tools')
api.bot_update.ensure_checkout() api.bot_update.ensure_checkout()
api.bot_update.ensure_checkout(no_fetch_tags=True)
def GenTests(api): def GenTests(api):

@ -151,6 +151,7 @@ class BotUpdateUnittests(unittest.TestCase):
'patch_root': None, 'patch_root': None,
'patch_refs': [], 'patch_refs': [],
'gerrit_rebase_patch_ref': None, 'gerrit_rebase_patch_ref': None,
'no_fetch_tags': False,
'refs': [], 'refs': [],
'git_cache_dir': '', 'git_cache_dir': '',
'cleanup_dir': None, 'cleanup_dir': None,
@ -208,6 +209,30 @@ class BotUpdateUnittests(unittest.TestCase):
self.assertEqual(args[idx_third_revision+1], 'src/v8@deadbeef') self.assertEqual(args[idx_third_revision+1], 'src/v8@deadbeef')
return self.call.records return self.call.records
def testTagsByDefault(self):
bot_update.ensure_checkout(**self.params)
found = False
for record in self.call.records:
args = record[0]
if args[:3] == ('git', 'cache', 'populate'):
self.assertFalse('--no-fetch-tags' in args)
found = True
self.assertTrue(found)
return self.call.records
def testNoTags(self):
params = self.params
params['no_fetch_tags'] = True
bot_update.ensure_checkout(**params)
found = False
for record in self.call.records:
args = record[0]
if args[:3] == ('git', 'cache', 'populate'):
self.assertTrue('--no-fetch-tags' in args)
found = True
self.assertTrue(found)
return self.call.records
def testApplyPatchOnGclient(self): def testApplyPatchOnGclient(self):
ref = 'refs/changes/12/345/6' ref = 'refs/changes/12/345/6'
repo = 'https://chromium.googlesource.com/v8/v8' repo = 'https://chromium.googlesource.com/v8/v8'

@ -84,6 +84,36 @@ class GitCacheTest(unittest.TestCase):
mirror.populate(reset_fetch_config=True) mirror.populate(reset_fetch_config=True)
def _makeGitRepoWithTag(self):
self.git(['init', '-q'])
with open(os.path.join(self.origin_dir, 'foo'), 'w') as f:
f.write('touched\n')
self.git(['add', 'foo'])
self.git(['commit', '-m', 'foo'])
self.git(['tag', 'TAG'])
self.git(['pack-refs'])
def testPopulateFetchTagsByDefault(self):
self._makeGitRepoWithTag()
# Default behaviour includes tags.
mirror = git_cache.Mirror(self.origin_dir)
mirror.populate()
cache_dir = os.path.join(self.cache_dir,
mirror.UrlToCacheDir(self.origin_dir))
self.assertTrue(os.path.exists(cache_dir + '/refs/tags/TAG'))
def testPopulateFetchWithoutTags(self):
self._makeGitRepoWithTag()
# Ask to not include tags.
mirror = git_cache.Mirror(self.origin_dir)
mirror.populate(no_fetch_tags=True)
cache_dir = os.path.join(self.cache_dir,
mirror.UrlToCacheDir(self.origin_dir))
self.assertFalse(os.path.exists(cache_dir + '/refs/tags/TAG'))
def testPopulateResetFetchConfigEmptyFetchConfig(self): def testPopulateResetFetchConfigEmptyFetchConfig(self):
self.git(['init', '-q']) self.git(['init', '-q'])

Loading…
Cancel
Save