From 08049e2db2ee6919df9a0bd4e9b18b0acdc378e7 Mon Sep 17 00:00:00 2001 From: Vadim Shtayura Date: Wed, 11 Oct 2017 00:14:52 +0000 Subject: [PATCH] Revert "git_cache: Remove locks" This reverts commit c3eb3fa33551c957d0179472c908864da016d95a. Reason for revert: lots of "runhooks" failure everywhere Example: https://build.chromium.org/p/chromium.linux/builders/Linux%20Builder/builds/92720 Original change's description: > git_cache: Remove locks > > These aren't in use, and the original problem they were > meant to solve has been solved at the gclient.py layer > using resource locking: > https://codereview.chromium.org/2049583003 > > Bug: 773008 > Change-Id: I6609f39d7f15604e0bb3d742a41c4f9fec87a57a > Reviewed-on: https://chromium-review.googlesource.com/707728 > Reviewed-by: Aaron Gable > Reviewed-by: Robbie Iannucci > Commit-Queue: Ryan Tseng TBR=iannucci@chromium.org,hinoka@chromium.org,agable@chromium.org,phajdan.jr@chromium.org Change-Id: I31d5fef94f39f3a9f97b9e59121073b1f433d11e No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: 773008 Reviewed-on: https://chromium-review.googlesource.com/711054 Reviewed-by: Vadim Shtayura Commit-Queue: Vadim Shtayura --- gclient.py | 9 + gclient_scm.py | 5 +- git_cache.py | 232 ++++++++++++++++-- .../bot_update/resources/bot_update.py | 43 +++- tests/bot_update_coverage_test.py | 14 ++ 5 files changed, 285 insertions(+), 18 deletions(-) diff --git a/gclient.py b/gclient.py index 76832f250..adedd618c 100755 --- a/gclient.py +++ b/gclient.py @@ -1308,6 +1308,10 @@ it or fix the checkout. if cache_dir: cache_dir = os.path.join(self.root_dir, cache_dir) cache_dir = os.path.abspath(cache_dir) + # If running on a bot, force break any stale git cache locks. + if os.path.exists(cache_dir) and os.environ.get('CHROME_HEADLESS'): + subprocess2.check_call(['git', 'cache', 'unlock', '--cache-dir', + cache_dir, '--force', '--all']) gclient_scm.GitWrapper.cache_dir = cache_dir git_cache.Mirror.SetCachePath(cache_dir) @@ -2350,11 +2354,16 @@ def CMDsync(parser, args): parser.add_option('--no_bootstrap', '--no-bootstrap', action='store_true', help='Don\'t bootstrap from Google Storage.') + parser.add_option('--ignore_locks', action='store_true', + help='GIT ONLY - Ignore cache locks.') parser.add_option('--break_repo_locks', action='store_true', help='GIT ONLY - Forcibly remove repo locks (e.g. ' 'index.lock). This should only be used if you know for ' 'certain that this invocation of gclient is the only ' 'thing operating on the git repos (e.g. on a bot).') + parser.add_option('--lock_timeout', type='int', default=5000, + help='GIT ONLY - Deadline (in seconds) to wait for git ' + 'cache lock to become available. Default is %default.') # TODO(agable): Remove these when the oldest CrOS release milestone is M56. parser.add_option('-t', '--transitive', action='store_true', help='DEPRECATED: This is a no-op.') diff --git a/gclient_scm.py b/gclient_scm.py index 208ac3af6..cc03d8e6d 100644 --- a/gclient_scm.py +++ b/gclient_scm.py @@ -855,7 +855,10 @@ class GitWrapper(SCMWrapper): depth = None mirror.populate(verbose=options.verbose, bootstrap=not getattr(options, 'no_bootstrap', False), - depth=depth) + depth=depth, + ignore_lock=getattr(options, 'ignore_locks', False), + lock_timeout=getattr(options, 'lock_timeout', 0)) + mirror.unlock() def _Clone(self, revision, url, options): """Clone a git repository from the given URL. diff --git a/git_cache.py b/git_cache.py index 322920210..20b844514 100755 --- a/git_cache.py +++ b/git_cache.py @@ -35,6 +35,9 @@ except NameError: class WinErr(Exception): pass +class LockError(Exception): + pass + class ClobberNeeded(Exception): pass @@ -74,11 +77,122 @@ def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10, sleep_time *= 2 +class Lockfile(object): + """Class to represent a cross-platform process-specific lockfile.""" + + def __init__(self, path, timeout=0): + self.path = os.path.abspath(path) + self.timeout = timeout + self.lockfile = self.path + ".lock" + self.pid = os.getpid() + + def _read_pid(self): + """Read the pid stored in the lockfile. + + Note: This method is potentially racy. By the time it returns the lockfile + may have been unlocked, removed, or stolen by some other process. + """ + try: + with open(self.lockfile, 'r') as f: + pid = int(f.readline().strip()) + except (IOError, ValueError): + pid = None + return pid + + def _make_lockfile(self): + """Safely creates a lockfile containing the current pid.""" + open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY) + fd = os.open(self.lockfile, open_flags, 0o644) + f = os.fdopen(fd, 'w') + print(self.pid, file=f) + f.close() + + def _remove_lockfile(self): + """Delete the lockfile. Complains (implicitly) if it doesn't exist. + + See gclient_utils.py:rmtree docstring for more explanation on the + windows case. + """ + if sys.platform == 'win32': + lockfile = os.path.normcase(self.lockfile) + + def delete(): + exitcode = subprocess.call(['cmd.exe', '/c', + 'del', '/f', '/q', lockfile]) + if exitcode != 0: + raise LockError('Failed to remove lock: %s' % (lockfile,)) + exponential_backoff_retry( + delete, + excs=(LockError,), + name='del [%s]' % (lockfile,)) + else: + os.remove(self.lockfile) + + def lock(self): + """Acquire the lock. + + This will block with a deadline of self.timeout seconds. + """ + elapsed = 0 + while True: + try: + self._make_lockfile() + return + except OSError as e: + if elapsed < self.timeout: + sleep_time = max(10, min(3, self.timeout - elapsed)) + logging.info('Could not create git cache lockfile; ' + 'will retry after sleep(%d).', sleep_time); + elapsed += sleep_time + time.sleep(sleep_time) + continue + if e.errno == errno.EEXIST: + raise LockError("%s is already locked" % self.path) + else: + raise LockError("Failed to create %s (err %s)" % (self.path, e.errno)) + + def unlock(self): + """Release the lock.""" + try: + if not self.is_locked(): + raise LockError("%s is not locked" % self.path) + if not self.i_am_locking(): + raise LockError("%s is locked, but not by me" % self.path) + self._remove_lockfile() + except WinErr: + # Windows is unreliable when it comes to file locking. YMMV. + pass + + def break_lock(self): + """Remove the lock, even if it was created by someone else.""" + try: + self._remove_lockfile() + return True + except OSError as exc: + if exc.errno == errno.ENOENT: + return False + else: + raise + + def is_locked(self): + """Test if the file is locked by anyone. + + Note: This method is potentially racy. By the time it returns the lockfile + may have been unlocked, removed, or stolen by some other process. + """ + return os.path.exists(self.lockfile) + + def i_am_locking(self): + """Test if the file is locked by this process.""" + return self.is_locked() and self.pid == self._read_pid() + + class Mirror(object): git_exe = 'git.bat' if sys.platform.startswith('win') else 'git' gsutil_exe = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'gsutil.py') + cachepath_lock = threading.Lock() @staticmethod def parse_fetch_spec(spec): @@ -140,21 +254,23 @@ class Mirror(object): @classmethod def SetCachePath(cls, cachepath): - setattr(cls, 'cachepath', cachepath) + with cls.cachepath_lock: + setattr(cls, 'cachepath', cachepath) @classmethod def GetCachePath(cls): - if not hasattr(cls, 'cachepath'): - try: - cachepath = subprocess.check_output( - [cls.git_exe, 'config', '--global', 'cache.cachepath']).strip() - except subprocess.CalledProcessError: - cachepath = None - if not cachepath: - raise RuntimeError( - 'No global cache.cachepath git configuration found.') - setattr(cls, 'cachepath', cachepath) - return getattr(cls, 'cachepath') + with cls.cachepath_lock: + if not hasattr(cls, 'cachepath'): + try: + cachepath = subprocess.check_output( + [cls.git_exe, 'config', '--global', 'cache.cachepath']).strip() + except subprocess.CalledProcessError: + cachepath = None + if not cachepath: + raise RuntimeError( + 'No global cache.cachepath git configuration found.') + setattr(cls, 'cachepath', cachepath) + return getattr(cls, 'cachepath') def Rename(self, src, dst): # This is somehow racy on Windows. @@ -371,12 +487,17 @@ class Mirror(object): raise ClobberNeeded() # Corrupted cache. logging.warn('Fetch of %s failed' % spec) - def populate(self, depth=None, shallow=False, bootstrap=False, verbose=False): + def populate(self, depth=None, shallow=False, bootstrap=False, + verbose=False, ignore_lock=False, lock_timeout=0): assert self.GetCachePath() if shallow and not depth: depth = 10000 gclient_utils.safe_makedirs(self.GetCachePath()) + lockfile = Lockfile(self.mirror_path, lock_timeout) + if not ignore_lock: + lockfile.lock() + tempdir = None try: tempdir = self._ensure_bootstrapped(depth, bootstrap) @@ -394,6 +515,8 @@ class Mirror(object): if os.path.exists(self.mirror_path): gclient_utils.rmtree(self.mirror_path) self.Rename(tempdir, self.mirror_path) + if not ignore_lock: + lockfile.unlock() def update_bootstrap(self, prune=False): # The files are named .zip @@ -434,6 +557,45 @@ class Mirror(object): except OSError: logging.warn('Unable to delete temporary pack file %s' % f) + @classmethod + def BreakLocks(cls, path): + did_unlock = False + lf = Lockfile(path) + if lf.break_lock(): + did_unlock = True + # Look for lock files that might have been left behind by an interrupted + # git process. + lf = os.path.join(path, 'config.lock') + if os.path.exists(lf): + os.remove(lf) + did_unlock = True + cls.DeleteTmpPackFiles(path) + return did_unlock + + def unlock(self): + return self.BreakLocks(self.mirror_path) + + @classmethod + def UnlockAll(cls): + cachepath = cls.GetCachePath() + if not cachepath: + return + dirlist = os.listdir(cachepath) + repo_dirs = set([os.path.join(cachepath, path) for path in dirlist + if os.path.isdir(os.path.join(cachepath, path))]) + for dirent in dirlist: + if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'): + gclient_utils.rm_file_or_tree(os.path.join(cachepath, dirent)) + elif (dirent.endswith('.lock') and + os.path.isfile(os.path.join(cachepath, dirent))): + repo_dirs.add(os.path.join(cachepath, dirent[:-5])) + + unlocked_repos = [] + for repo_dir in repo_dirs: + if cls.BreakLocks(repo_dir): + unlocked_repos.append(repo_dir) + + return unlocked_repos @subcommand.usage('[url of repo to check for caching]') def CMDexists(parser, args): @@ -485,6 +647,9 @@ def CMDpopulate(parser, args): parser.add_option('--no_bootstrap', '--no-bootstrap', action='store_true', help='Don\'t bootstrap from Google Storage') + parser.add_option('--ignore_locks', '--ignore-locks', + action='store_true', + help='Don\'t try to lock repository') options, args = parser.parse_args(args) if not len(args) == 1: @@ -496,6 +661,8 @@ def CMDpopulate(parser, args): 'verbose': options.verbose, 'shallow': options.shallow, 'bootstrap': not options.no_bootstrap, + 'ignore_lock': options.ignore_locks, + 'lock_timeout': options.timeout, } if options.depth: kwargs['depth'] = options.depth @@ -540,7 +707,7 @@ def CMDfetch(parser, args): if git_dir.startswith(cachepath): mirror = Mirror.FromPath(git_dir) mirror.populate( - bootstrap=not options.no_bootstrap,) + bootstrap=not options.no_bootstrap, lock_timeout=options.timeout) return 0 for remote in remotes: remote_url = subprocess.check_output( @@ -550,11 +717,44 @@ def CMDfetch(parser, args): mirror.print = lambda *args: None print('Updating git cache...') mirror.populate( - bootstrap=not options.no_bootstrap) + bootstrap=not options.no_bootstrap, lock_timeout=options.timeout) subprocess.check_call([Mirror.git_exe, 'fetch', remote]) return 0 +@subcommand.usage('[url of repo to unlock, or -a|--all]') +def CMDunlock(parser, args): + """Unlock one or all repos if their lock files are still around.""" + parser.add_option('--force', '-f', action='store_true', + help='Actually perform the action') + parser.add_option('--all', '-a', action='store_true', + help='Unlock all repository caches') + options, args = parser.parse_args(args) + if len(args) > 1 or (len(args) == 0 and not options.all): + parser.error('git cache unlock takes exactly one repo url, or --all') + + if not options.force: + cachepath = Mirror.GetCachePath() + lockfiles = [os.path.join(cachepath, path) + for path in os.listdir(cachepath) + if path.endswith('.lock') and os.path.isfile(path)] + parser.error('git cache unlock requires -f|--force to do anything. ' + 'Refusing to unlock the following repo caches: ' + ', '.join(lockfiles)) + + unlocked_repos = [] + if options.all: + unlocked_repos.extend(Mirror.UnlockAll()) + else: + m = Mirror(args[0]) + if m.unlock(): + unlocked_repos.append(m.mirror_path) + + if unlocked_repos: + logging.info('Broke locks on these caches:\n %s' % '\n '.join( + unlocked_repos)) + + class OptionParser(optparse.OptionParser): """Wrapper class for OptionParser to handle global options.""" @@ -566,6 +766,8 @@ class OptionParser(optparse.OptionParser): help='Increase verbosity (can be passed multiple times)') self.add_option('-q', '--quiet', action='store_true', help='Suppress all extraneous output') + self.add_option('--timeout', type='int', default=0, + help='Timeout for acquiring cache lock, in seconds') def parse_args(self, args=None, values=None): options, args = optparse.OptionParser.parse_args(self, args, values) diff --git a/recipes/recipe_modules/bot_update/resources/bot_update.py b/recipes/recipe_modules/bot_update/resources/bot_update.py index 81fe09111..426eb77dc 100755 --- a/recipes/recipe_modules/bot_update/resources/bot_update.py +++ b/recipes/recipe_modules/bot_update/resources/bot_update.py @@ -331,7 +331,7 @@ def gclient_sync( os.close(fd) args = ['sync', '--verbose', '--reset', '--force', - '--output-json', gclient_output_file, + '--ignore_locks', '--output-json', gclient_output_file, '--nohooks', '--noprehooks', '--delete_unversioned_trees'] if with_branch_heads: args += ['--with_branch_heads'] @@ -470,9 +470,36 @@ def is_broken_repo_dir(repo_dir): return not path.exists(os.path.join(repo_dir, '.git', 'config')) +def _maybe_break_locks(checkout_path): + """This removes all .lock files from this repo's .git directory. + + In particular, this will cleanup index.lock files, as well as ref lock + files. + """ + git_dir = os.path.join(checkout_path, '.git') + for dirpath, _, filenames in os.walk(git_dir): + for filename in filenames: + if filename.endswith('.lock'): + to_break = os.path.join(dirpath, filename) + print 'breaking lock: %s' % to_break + try: + os.remove(to_break) + except OSError as ex: + print 'FAILED to break lock: %s: %s' % (to_break, ex) + raise + + def git_checkout(solutions, revisions, shallow, refs, git_cache_dir, cleanup_dir): build_dir = os.getcwd() + # Before we do anything, break all git_cache locks. + if path.isdir(git_cache_dir): + git('cache', 'unlock', '-vv', '--force', '--all', + '--cache-dir', git_cache_dir) + for item in os.listdir(git_cache_dir): + filename = os.path.join(git_cache_dir, item) + if item.endswith('.lock'): + raise Exception('%s exists after cache unlock' % filename) first_solution = True for sln in solutions: # Just in case we're hitting a different git server than the one from @@ -489,7 +516,7 @@ def git_checkout(solutions, revisions, shallow, refs, git_cache_dir, shallow = False sln_dir = path.join(build_dir, name) s = ['--shallow'] if shallow else [] - populate_cmd = (['cache', 'populate', '-v', + populate_cmd = (['cache', 'populate', '--ignore_locks', '-v', '--cache-dir', git_cache_dir] + s + [url]) for ref in refs: populate_cmd.extend(['--ref', ref]) @@ -517,6 +544,18 @@ def git_checkout(solutions, revisions, shallow, refs, git_cache_dir, refspec = '%s:%s' % (ref, ref.lstrip('+')) git('fetch', 'origin', refspec, cwd=sln_dir) + # Windows sometimes has trouble deleting files. + # This can make git commands that rely on locks fail. + # Try a few times in case Windows has trouble again (and again). + if sys.platform.startswith('win'): + tries = 3 + while tries: + try: + _maybe_break_locks(sln_dir) + break + except Exception: + tries -= 1 + revision = get_target_revision(name, url, revisions) or 'HEAD' force_revision(sln_dir, revision) done = True diff --git a/tests/bot_update_coverage_test.py b/tests/bot_update_coverage_test.py index 79a3c1679..8eb7b7651 100755 --- a/tests/bot_update_coverage_test.py +++ b/tests/bot_update_coverage_test.py @@ -231,6 +231,20 @@ class BotUpdateUnittests(unittest.TestCase): gclient_sync_cmd = args self.assertTrue('--break_repo_locks' in gclient_sync_cmd) + def testGitCheckoutBreaksLocks(self): + self.overrideSetupForWindows() + path = '/b/build/slave/foo/build/.git' + lockfile = 'index.lock' + removed = [] + old_os_walk = os.walk + old_os_remove = os.remove + setattr(os, 'walk', lambda _: [(path, None, [lockfile])]) + setattr(os, 'remove', removed.append) + bot_update.ensure_checkout(**self.params) + setattr(os, 'walk', old_os_walk) + setattr(os, 'remove', old_os_remove) + self.assertTrue(os.path.join(path, lockfile) in removed) + if __name__ == '__main__': unittest.main()