From cd33775a7ad6848c2a9c272538bf97e7715e4c9b Mon Sep 17 00:00:00 2001 From: Andrii Shyshkalov Date: Tue, 6 Feb 2018 15:20:05 -0800 Subject: [PATCH] bot_update: optimize checkout of solution's main repo. Goal: reduce number of git fetches from remote to the minimum possible. Minimum possible is either: * 0 if desired revision is pinned (!= HEAD) and already exists in cache * 1 if desired revision is HEAD * >= 2 if revision is pinned but we talk temporarily out-of-date git server not yet having it. This CL achieves the above. No fetch: https://ci.chromium.org/swarming/task/39dad20230c84110?server=chromium-swarm.appspot.com bot_update takes <1min, but it used to take at least 2. The steps are now like this: ... ===Running git cat-file -e 33bf8a94dcd5e0abbdf83e4afaed24b5180e3eb2=== In directory: /b/swarming/w/ir/cache/git/chromium.googlesource.com-chromium-src ===Succeeded in 0.0 mins=== ===Running /b/swarming/w/ir/cipd_bin_packages/bin/python -u /b/swarming/w/ir/recipe-checkout-dir/depot_tools/git_cache.py exists --quiet --cache-dir /b/swarming/w/ir/cache/git https://chromium.googlesource.com/chromium/src.git=== In directory: /b/swarming/w/ir/cache/builder/linux /b/swarming/w/ir/cache/git/chromium.googlesource.com-chromium-src ===Succeeded in 0.0 mins=== ===Running git remote set-url origin /b/swarming/w/ir/cache/git/chromium.googlesource.com-chromium-src=== In directory: /b/swarming/w/ir/cache/builder/linux/src ===Succeeded in 0.0 mins=== ===Running git fetch origin=== In directory: /b/swarming/w/ir/cache/builder/linux/src From /b/swarming/w/ir/cache/git/chromium.googlesource.com-chromium-src 003eedbfee47..5210107a20e8 master -> origin/master ===Succeeded in 0.0 mins=== ===Running git checkout --force 33bf8a94dcd5e0abbdf83e4afaed24b5180e3eb2=== ... Fetch & success: https://ci.chromium.org/swarming/task/39db022b874ffd10?server=chromium-swarm.appspot.com Fetch & retry because missing rev: https://ci.chromium.org/swarming/task/39db04ce8b4b8910?server=chromium-swarm.appspot.com Fetch because rev=HEAD: https://ci.chromium.org/swarming/task/39db0c0dd5ab5d10?server=chromium-swarm.appspot.com This has already been adopted in gclient. Bug: Change-Id: Id99892b62719fdf3f7e6e59058986d1500384f8d Reviewed-on: https://chromium-review.googlesource.com/771591 Commit-Queue: Andrii Shyshkalov Reviewed-by: Robbie Iannucci Reviewed-by: Jao-ke Chin-Lee --- .../bot_update/resources/bot_update.py | 89 +++++++++++++------ 1 file changed, 64 insertions(+), 25 deletions(-) diff --git a/recipes/recipe_modules/bot_update/resources/bot_update.py b/recipes/recipe_modules/bot_update/resources/bot_update.py index 9c5c43a517..a76562f461 100755 --- a/recipes/recipe_modules/bot_update/resources/bot_update.py +++ b/recipes/recipe_modules/bot_update/resources/bot_update.py @@ -523,6 +523,15 @@ def _get_target_branch_and_revision(solution_name, git_url, revisions): return 'master', configured +def get_target_pin(solution_name, git_url, revisions): + """Returns revision to be checked out if it is pinned, else None.""" + _, revision = _get_target_branch_and_revision( + solution_name, git_url, revisions) + if revision.upper() != 'HEAD': + return revision + return None + + def force_solution_revision(solution_name, git_url, revisions, cwd): branch, revision = _get_target_branch_and_revision( solution_name, git_url, revisions) @@ -543,6 +552,18 @@ def force_solution_revision(solution_name, git_url, revisions, cwd): git('checkout', '--force', treeish, '--', cwd=cwd) +def _has_in_git_cache(revision_sha1, git_cache_dir, url): + """Returns whether given revision_sha1 is contained in cache of a given repo. + """ + try: + mirror_dir = git( + 'cache', 'exists', '--quiet', '--cache-dir', git_cache_dir, url).strip() + git('cat-file', '-e', revision_sha1, cwd=mirror_dir) + return True + except SubprocessFailed: + return False + + def is_broken_repo_dir(repo_dir): # Treat absence of 'config' as a signal of a partially deleted repo. return not path.exists(os.path.join(repo_dir, '.git', 'config')) @@ -606,20 +627,46 @@ def _git_checkout(sln, sln_dir, revisions, shallow, refs, git_cache_dir, for ref in refs: populate_cmd.extend(['--ref', ref]) - # Just in case we're hitting a different git server than the one from - # which the target revision was polled, we retry some. - - # One minute (5 tries with exp. backoff). We retry at least once regardless - # of deadline in case initial fetch takes longer than the deadline but does - # not contain the required revision. - deadline = time.time() + 60 - tries = 0 - while True: + # Step 1: populate/refresh cache, if necessary. + pin = get_target_pin(name, url, revisions) + if not pin: + # Refresh only once. git(*populate_cmd) - mirror_dir = git( - 'cache', 'exists', '--quiet', - '--cache-dir', git_cache_dir, url).strip() + elif _has_in_git_cache(pin, git_cache_dir, url): + # No need to fetch at all, because we already have needed revision. + pass + else: + # We may need to retry a bit due to eventual consinstency in replication of + # git servers. + soft_deadline = time.time() + 60 + attempt = 0 + while True: + attempt += 1 + # TODO(tandrii): propagate the pin to git server per recommendation of + # maintainers of *.googlesource.com (workaround git server replication + # lag). + git(*populate_cmd) + if _has_in_git_cache(pin, git_cache_dir, url): + break + overrun = time.time() - soft_deadline + # Only kick in deadline after second attempt to ensure we retry at least + # once after initial fetch from not-yet-replicated server. + if attempt >= 2 and overrun > 0: + print 'Ran %s seconds past deadline. Aborting.' % (overrun,) + # TODO(tandrii): raise exception immediately here, instead of doing + # useless step 2 trying to fetch something that we know doesn't exist + # in cache **after production data gives us confidence to do so**. + break + + sleep_secs = min(60, 2**attempt) + print 'waiting %s seconds and trying to fetch again...' % sleep_secs + time.sleep(sleep_secs) + # Step 2: populate a checkout from local cache. All operations are local. + mirror_dir = git( + 'cache', 'exists', '--quiet', '--cache-dir', git_cache_dir, url).strip() + first_try = True + while True: try: # If repo deletion was aborted midway, it may have left .git in broken # state. @@ -650,21 +697,13 @@ def _git_checkout(sln, sln_dir, revisions, shallow, refs, git_cache_dir, except SubprocessFailed as e: # Exited abnormally, theres probably something wrong. print 'Something failed: %s.' % str(e) - - # Only kick in deadline after trying once, in case the revision hasn't - # yet propagated. - if tries >= 1 and time.time() > deadline: - overrun = time.time() - deadline - print 'Ran %s seconds past deadline. Aborting.' % overrun + if first_try: + first_try = False + # Lets wipe the checkout and try again. + remove(sln_dir, cleanup_dir) + else: raise - # Lets wipe the checkout and try again. - tries += 1 - sleep_secs = 2**tries - print 'waiting %s seconds and trying again...' % sleep_secs - time.sleep(sleep_secs) - remove(sln_dir, cleanup_dir) - def _download(url): """Fetch url and return content, with retries for flake."""