Add GCS root and .gcs_entries file to keep track of installed gcs deps

Now that multiple objects can share a directory, when objects are
removed, the directory should also remove the extracted contents
from that specific object. Since those exact contents are unknown, the
whole directory will be cleared.
If an entire GCS dep is added or removed, the corresponding directory
path will be cleared as well.

.gcs_entries holds a record of which GCS deps and objects
have been downloaded, per checkout. Example:
```
{
  "src": {
    "src/third_party/llvm-build/Release+Asserts": [
      "Linux_x64/llvmobjdump-llvmorg-19-init-2941-ga0b3dbaf-22.tar.xz",
      "Linux_x64/clang-llvmorg-19-init-2941-ga0b3dbaf-22.tar.xz"
    ],
    "src/third_party/node/linux": [
      "46795170ff5df9831955f163f6966abde581c8af"
    ]
  }
}
```

Bug: b/324418194
Change-Id: Icac113572523b61c83450880615418bf7df8bba7
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5407888
Reviewed-by: Joanna Wang <jojwang@chromium.org>
Commit-Queue: Stephanie Kim <kimstephanie@google.com>
changes/88/5407888/24
Stephanie Kim 1 year ago committed by LUCI CQ
parent b0583b002c
commit 589ccd8d54

@ -765,6 +765,7 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
if len(object_name_set) != len(dep_value['objects']):
raise Exception('Duplicate object names detected in {} GCS '
'dependency.'.format(name))
gcs_root = self.GetGcsRoot()
for obj in dep_value['objects']:
deps_to_add.append(
GcsDependency(parent=self,
@ -774,6 +775,7 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
sha256sum=obj['sha256sum'],
output_file=obj.get('output_file'),
size_bytes=obj['size_bytes'],
gcs_root=gcs_root,
custom_vars=self.custom_vars,
should_process=should_process,
relative=use_relative_paths,
@ -1223,6 +1225,12 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
if self.should_recurse:
self.ParseDepsFile()
gcs_root = self.GetGcsRoot()
if gcs_root:
if command == 'revert':
gcs_root.clobber()
elif command == 'update':
gcs_root.clobber_deps_with_updated_objects(self.name)
self._run_is_done(file_list or [])
@ -1236,6 +1244,9 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
for s in self.dependencies:
if s.should_process:
work_queue.enqueue(s)
gcs_root = self.GetGcsRoot()
if gcs_root and command == 'update':
gcs_root.resolve_objects(self.name)
if command == 'recurse':
# Skip file only checkout.
@ -1389,6 +1400,13 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
return None
return self.root.GetCipdRoot()
def GetGcsRoot(self):
if self.root is self:
# Let's not infinitely recurse. If this is root and isn't an
# instance of GClient, do nothing.
return None
return self.root.GetGcsRoot()
def subtree(self, include_all):
"""Breadth first recursion excluding root node."""
dependencies = self.dependencies
@ -1709,6 +1727,7 @@ solutions = %(solution_list)s
self._enforced_cpu = (detect_host_arch.HostArch(), )
self._root_dir = root_dir
self._cipd_root = None
self._gcs_root = None
self.config_content = None
def _CheckConfig(self):
@ -2494,6 +2513,11 @@ it or fix the checkout.
log_level='info' if self._options.verbose else None)
return self._cipd_root
def GetGcsRoot(self):
if not self._gcs_root:
self._gcs_root = gclient_scm.GcsRoot(self.root_dir)
return self._gcs_root
@property
def root_dir(self):
"""Root directory of gclient checkout."""
@ -2517,14 +2541,16 @@ class GcsDependency(Dependency):
"""A Dependency object that represents a single GCS bucket and object"""
def __init__(self, parent, name, bucket, object_name, sha256sum,
output_file, size_bytes, custom_vars, should_process, relative,
condition):
output_file, size_bytes, gcs_root, custom_vars, should_process,
relative, condition):
self.bucket = bucket
self.object_name = object_name
self.sha256sum = sha256sum
self.output_file = output_file
self.size_bytes = size_bytes
url = f'gs://{self.bucket}/{self.object_name}'
self._gcs_root = gcs_root
self._gcs_root.add_object(parent.name, name, object_name)
super(GcsDependency, self).__init__(parent=parent,
name=f'{name}:{object_name}',
url=url,

@ -1913,6 +1913,110 @@ class CipdWrapper(SCMWrapper):
"""
class GcsRoot(object):
"""Root to keep track of all GCS objects, per checkout"""
def __init__(self, root_dir):
self._mutator_lock = threading.Lock()
self._root_dir = root_dir
# Populated when the DEPS file is parsed
# The objects here have not yet been downloaded and written into
# the .gcs_entries file
self._parsed_objects = {}
# .gcs_entries keeps track of which GCS deps have already been installed
# Maps checkout_name -> {GCS dep path -> [object_name]}
# This file is in the same directory as .gclient
self._gcs_entries_file = os.path.join(self._root_dir, '.gcs_entries')
# Contents of the .gcs_entries file
self._gcs_entries = self.read_gcs_entries()
@property
def root_dir(self):
return self._root_dir
def add_object(self, checkout_name, dep_path, object_name):
"""Records the object in the _parsed_objects variable
This does not actually download the object"""
with self._mutator_lock:
if checkout_name not in self._parsed_objects:
self._parsed_objects[checkout_name] = {}
if dep_path not in self._parsed_objects[checkout_name]:
self._parsed_objects[checkout_name][dep_path] = [object_name]
else:
self._parsed_objects[checkout_name][dep_path].append(
object_name)
def read_gcs_entries(self):
"""Reads .gcs_entries file and loads the content into _gcs_entries"""
if not os.path.exists(self._gcs_entries_file):
return {}
with open(self._gcs_entries_file, 'r') as f:
content = f.read().rstrip()
if content:
return json.loads(content)
return {}
def resolve_objects(self, checkout_name):
"""Updates .gcs_entries with objects in _parsed_objects
This should only be called after the objects have been downloaded
and extracted."""
with self._mutator_lock:
object_dict = self._parsed_objects.get(checkout_name)
if not object_dict:
return
self._gcs_entries[checkout_name] = object_dict
with open(self._gcs_entries_file, 'w') as f:
f.write(json.dumps(self._gcs_entries, indent=2))
self._parsed_objects[checkout_name] = {}
def clobber_deps_with_updated_objects(self, checkout_name):
"""Clobber the path if an object or GCS dependency is removed/added
This must be called before the GCS dependencies are
downloaded and extracted."""
with self._mutator_lock:
parsed_object_dict = self._parsed_objects.get(checkout_name, {})
parsed_paths = set(parsed_object_dict.keys())
resolved_object_dict = self._gcs_entries.get(checkout_name, {})
resolved_paths = set(resolved_object_dict.keys())
# If any GCS deps are added or removed entirely, clobber that path
intersected_paths = parsed_paths.intersection(resolved_paths)
# Added paths
for path in parsed_paths - intersected_paths:
full_path = os.path.join(self.root_dir, path)
gclient_utils.rmtree(full_path)
# Removed paths
for path in resolved_paths - intersected_paths:
full_path = os.path.join(self.root_dir, path)
gclient_utils.rmtree(full_path)
# If any objects within a GCS dep are added/removed, clobber that
# entire path
for path in intersected_paths:
resolved_objects = resolved_object_dict[path]
parsed_objects = parsed_object_dict[path]
full_path = os.path.join(self.root_dir, path)
if (len(resolved_objects) != len(parsed_objects)
and os.path.exists(full_path)):
gclient_utils.rmtree(full_path)
def clobber(self):
"""Remove all dep path directories and clear .gcs_entries"""
for _, objects_dict in self._gcs_entries.items():
for dep_path, _ in objects_dict.items():
gclient_utils.rmtree(os.path.join(self.root_dir, dep_path))
if os.path.exists(self._gcs_entries_file):
os.remove(self._gcs_entries_file)
with self._mutator_lock:
self._gcs_entries = {}
class GcsWrapper(SCMWrapper):
"""Wrapper for GCS.

Loading…
Cancel
Save