From 19199514e8ead0b605e5672258633ecfe7672551 Mon Sep 17 00:00:00 2001 From: Josip Sokcevic Date: Wed, 1 May 2024 16:41:16 +0000 Subject: [PATCH] [gsutil] Fix race when downloading gsutil If gsutil is not downloaded, and if gsutil (or download_from_google_storage) is called concurrently with n>2, it's possible that those processes are doing the same work. There is also a critical point where gsutil.py can fail with: Destination path '/depot_tools_path/external_bin/gsutil/gsutil_4.68/d' already exists error. To avoid this problem, use FS locking around code that manipulates with files. R=jojwang@google.com Bug: 338040708 Change-Id: Ib83aaa1e09628f878e512d79f2fa5221c2bcfd37 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5502531 Reviewed-by: Joanna Wang Commit-Queue: Joanna Wang Auto-Submit: Josip Sokcevic --- gsutil.py | 69 +++++++++++++++++++----------------------- lockfile.py | 6 ++-- tests/lockfile_test.py | 2 +- 3 files changed, 36 insertions(+), 41 deletions(-) diff --git a/gsutil.py b/gsutil.py index ea02827052..f19efa048e 100755 --- a/gsutil.py +++ b/gsutil.py @@ -17,8 +17,6 @@ import tempfile import time import urllib.request -import zipfile - GSUTIL_URL = 'https://storage.googleapis.com/pub/' API_URL = 'https://www.googleapis.com/storage/v1/b/pub/o/' @@ -101,42 +99,37 @@ def ensure_gsutil(version, target, clean): return gsutil_bin if not os.path.exists(target): - try: - os.makedirs(target) - except FileExistsError: - # Another process is prepping workspace, so let's check if - # gsutil_bin is present. If after several checks it's still not, - # continue with downloading gsutil. - delay = 2 # base delay, in seconds - for _ in range(3): # make N attempts - # sleep first as it's not expected to have file ready just yet. - time.sleep(delay) - delay *= 1.5 # next delay increased by that factor - if os.path.isfile(gsutil_bin): - return gsutil_bin - - with temporary_directory(target) as instance_dir: - # Clean up if we're redownloading a corrupted gsutil. - cleanup_path = os.path.join(instance_dir, 'clean') - try: - os.rename(bin_dir, cleanup_path) - except (OSError, IOError): - cleanup_path = None - if cleanup_path: - shutil.rmtree(cleanup_path) - - download_dir = os.path.join(instance_dir, 'd') - target_zip_filename = download_gsutil(version, instance_dir) - with zipfile.ZipFile(target_zip_filename, 'r') as target_zip: - target_zip.extractall(download_dir) - - shutil.move(download_dir, bin_dir) - # Final check that the gsutil bin exists. This should never fail. - if not os.path.isfile(gsutil_bin): - raise InvalidGsutilError() - # Drop a flag file. - with open(gsutil_flag, 'w') as f: - f.write('This flag file is dropped by gsutil.py') + os.makedirs(target, exist_ok=True) + + import lockfile + import zipfile + with lockfile.lock(bin_dir, timeout=30): + # Check if gsutil is ready (another process may have had lock). + if not clean and os.path.isfile(gsutil_flag): + return gsutil_bin + + with temporary_directory(target) as instance_dir: + download_dir = os.path.join(instance_dir, 'd') + target_zip_filename = download_gsutil(version, instance_dir) + with zipfile.ZipFile(target_zip_filename, 'r') as target_zip: + target_zip.extractall(download_dir) + + # Clean up if we're redownloading a corrupted gsutil. + cleanup_path = os.path.join(instance_dir, 'clean') + try: + os.rename(bin_dir, cleanup_path) + except (OSError, IOError): + cleanup_path = None + if cleanup_path: + shutil.rmtree(cleanup_path) + + shutil.move(download_dir, bin_dir) + # Final check that the gsutil bin exists. This should never fail. + if not os.path.isfile(gsutil_bin): + raise InvalidGsutilError() + # Drop a flag file. + with open(gsutil_flag, 'w') as f: + f.write('This flag file is dropped by gsutil.py') return gsutil_bin diff --git a/lockfile.py b/lockfile.py index 4ea9e6f31e..5a31a650e6 100644 --- a/lockfile.py +++ b/lockfile.py @@ -80,14 +80,16 @@ def _try_lock(lockfile): def _lock(path, timeout=0): """_lock returns function to release the lock if locking was successful. - _lock also implements simple retry logic.""" + _lock also implements simple retry logic. + NOTE: timeout value doesn't include time it takes to aquire lock, just + overall sleep time.""" elapsed = 0 + sleep_time = 0.1 while True: try: return _try_lock(path + '.locked') except (OSError, IOError) as e: if elapsed < timeout: - sleep_time = min(10, timeout - elapsed) logging.info( 'Could not create git cache lockfile; ' 'will retry after sleep(%d).', sleep_time) diff --git a/tests/lockfile_test.py b/tests/lockfile_test.py index 23e39b516f..8fc8cb8c00 100755 --- a/tests/lockfile_test.py +++ b/tests/lockfile_test.py @@ -102,7 +102,7 @@ class LockTest(unittest.TestCase): # One result was consumed by side_effect, we expect only one in the # queue. self.assertEqual(1, results.qsize()) - sleep_mock.assert_called_once_with(1) + sleep_mock.assert_called_with(0.1) if __name__ == '__main__':