From 19199514e8ead0b605e5672258633ecfe7672551 Mon Sep 17 00:00:00 2001
From: Josip Sokcevic <sokcevic@chromium.org>
Date: Wed, 1 May 2024 16:41:16 +0000
Subject: [PATCH] [gsutil] Fix race when downloading gsutil

If gsutil is not downloaded, and if gsutil (or
download_from_google_storage) is called concurrently with n>2, it's
possible that those processes are doing the same work. There is also a
critical point where gsutil.py can fail with:

Destination path '/depot_tools_path/external_bin/gsutil/gsutil_4.68/d'
already exists error.

To avoid this problem, use FS locking around code that manipulates with
files.

R=jojwang@google.com

Bug: 338040708
Change-Id: Ib83aaa1e09628f878e512d79f2fa5221c2bcfd37
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5502531
Reviewed-by: Joanna Wang <jojwang@chromium.org>
Commit-Queue: Joanna Wang <jojwang@chromium.org>
Auto-Submit: Josip Sokcevic <sokcevic@chromium.org>
---
 gsutil.py              | 69 +++++++++++++++++++-----------------------
 lockfile.py            |  6 ++--
 tests/lockfile_test.py |  2 +-
 3 files changed, 36 insertions(+), 41 deletions(-)

diff --git a/gsutil.py b/gsutil.py
index ea02827052..f19efa048e 100755
--- a/gsutil.py
+++ b/gsutil.py
@@ -17,8 +17,6 @@ import tempfile
 import time
 import urllib.request
 
-import zipfile
-
 GSUTIL_URL = 'https://storage.googleapis.com/pub/'
 API_URL = 'https://www.googleapis.com/storage/v1/b/pub/o/'
 
@@ -101,42 +99,37 @@ def ensure_gsutil(version, target, clean):
         return gsutil_bin
 
     if not os.path.exists(target):
-        try:
-            os.makedirs(target)
-        except FileExistsError:
-            # Another process is prepping workspace, so let's check if
-            # gsutil_bin is present.  If after several checks it's still not,
-            # continue with downloading gsutil.
-            delay = 2  # base delay, in seconds
-            for _ in range(3):  # make N attempts
-                # sleep first as it's not expected to have file ready just yet.
-                time.sleep(delay)
-                delay *= 1.5  # next delay increased by that factor
-                if os.path.isfile(gsutil_bin):
-                    return gsutil_bin
-
-    with temporary_directory(target) as instance_dir:
-        # Clean up if we're redownloading a corrupted gsutil.
-        cleanup_path = os.path.join(instance_dir, 'clean')
-        try:
-            os.rename(bin_dir, cleanup_path)
-        except (OSError, IOError):
-            cleanup_path = None
-        if cleanup_path:
-            shutil.rmtree(cleanup_path)
-
-        download_dir = os.path.join(instance_dir, 'd')
-        target_zip_filename = download_gsutil(version, instance_dir)
-        with zipfile.ZipFile(target_zip_filename, 'r') as target_zip:
-            target_zip.extractall(download_dir)
-
-        shutil.move(download_dir, bin_dir)
-        # Final check that the gsutil bin exists.  This should never fail.
-        if not os.path.isfile(gsutil_bin):
-            raise InvalidGsutilError()
-        # Drop a flag file.
-        with open(gsutil_flag, 'w') as f:
-            f.write('This flag file is dropped by gsutil.py')
+        os.makedirs(target, exist_ok=True)
+
+    import lockfile
+    import zipfile
+    with lockfile.lock(bin_dir, timeout=30):
+        # Check if gsutil is ready (another process may have had lock).
+        if not clean and os.path.isfile(gsutil_flag):
+            return gsutil_bin
+
+        with temporary_directory(target) as instance_dir:
+            download_dir = os.path.join(instance_dir, 'd')
+            target_zip_filename = download_gsutil(version, instance_dir)
+            with zipfile.ZipFile(target_zip_filename, 'r') as target_zip:
+                target_zip.extractall(download_dir)
+
+            # Clean up if we're redownloading a corrupted gsutil.
+            cleanup_path = os.path.join(instance_dir, 'clean')
+            try:
+                os.rename(bin_dir, cleanup_path)
+            except (OSError, IOError):
+                cleanup_path = None
+            if cleanup_path:
+                shutil.rmtree(cleanup_path)
+
+            shutil.move(download_dir, bin_dir)
+            # Final check that the gsutil bin exists.  This should never fail.
+            if not os.path.isfile(gsutil_bin):
+                raise InvalidGsutilError()
+            # Drop a flag file.
+            with open(gsutil_flag, 'w') as f:
+                f.write('This flag file is dropped by gsutil.py')
 
     return gsutil_bin
 
diff --git a/lockfile.py b/lockfile.py
index 4ea9e6f31e..5a31a650e6 100644
--- a/lockfile.py
+++ b/lockfile.py
@@ -80,14 +80,16 @@ def _try_lock(lockfile):
 def _lock(path, timeout=0):
     """_lock returns function to release the lock if locking was successful.
 
-    _lock also implements simple retry logic."""
+    _lock also implements simple retry logic.
+    NOTE: timeout value doesn't include time it takes to aquire lock, just
+    overall sleep time."""
     elapsed = 0
+    sleep_time = 0.1
     while True:
         try:
             return _try_lock(path + '.locked')
         except (OSError, IOError) as e:
             if elapsed < timeout:
-                sleep_time = min(10, timeout - elapsed)
                 logging.info(
                     'Could not create git cache lockfile; '
                     'will retry after sleep(%d).', sleep_time)
diff --git a/tests/lockfile_test.py b/tests/lockfile_test.py
index 23e39b516f..8fc8cb8c00 100755
--- a/tests/lockfile_test.py
+++ b/tests/lockfile_test.py
@@ -102,7 +102,7 @@ class LockTest(unittest.TestCase):
         # One result was consumed by side_effect, we expect only one in the
         # queue.
         self.assertEqual(1, results.qsize())
-        sleep_mock.assert_called_once_with(1)
+        sleep_mock.assert_called_with(0.1)
 
 
 if __name__ == '__main__':