From 589ccd8d54e65f82f30bf63ed78f0223b9edf855 Mon Sep 17 00:00:00 2001 From: Stephanie Kim Date: Fri, 5 Apr 2024 18:34:20 +0000 Subject: [PATCH] Add GCS root and .gcs_entries file to keep track of installed gcs deps Now that multiple objects can share a directory, when objects are removed, the directory should also remove the extracted contents from that specific object. Since those exact contents are unknown, the whole directory will be cleared. If an entire GCS dep is added or removed, the corresponding directory path will be cleared as well. .gcs_entries holds a record of which GCS deps and objects have been downloaded, per checkout. Example: ``` { "src": { "src/third_party/llvm-build/Release+Asserts": [ "Linux_x64/llvmobjdump-llvmorg-19-init-2941-ga0b3dbaf-22.tar.xz", "Linux_x64/clang-llvmorg-19-init-2941-ga0b3dbaf-22.tar.xz" ], "src/third_party/node/linux": [ "46795170ff5df9831955f163f6966abde581c8af" ] } } ``` Bug: b/324418194 Change-Id: Icac113572523b61c83450880615418bf7df8bba7 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5407888 Reviewed-by: Joanna Wang Commit-Queue: Stephanie Kim --- gclient.py | 30 +++++++++++++- gclient_scm.py | 104 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 2 deletions(-) diff --git a/gclient.py b/gclient.py index 685a22acc..3e9b84601 100755 --- a/gclient.py +++ b/gclient.py @@ -765,6 +765,7 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): if len(object_name_set) != len(dep_value['objects']): raise Exception('Duplicate object names detected in {} GCS ' 'dependency.'.format(name)) + gcs_root = self.GetGcsRoot() for obj in dep_value['objects']: deps_to_add.append( GcsDependency(parent=self, @@ -774,6 +775,7 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): sha256sum=obj['sha256sum'], output_file=obj.get('output_file'), size_bytes=obj['size_bytes'], + gcs_root=gcs_root, custom_vars=self.custom_vars, should_process=should_process, relative=use_relative_paths, @@ -1223,6 +1225,12 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): if self.should_recurse: self.ParseDepsFile() + gcs_root = self.GetGcsRoot() + if gcs_root: + if command == 'revert': + gcs_root.clobber() + elif command == 'update': + gcs_root.clobber_deps_with_updated_objects(self.name) self._run_is_done(file_list or []) @@ -1236,6 +1244,9 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): for s in self.dependencies: if s.should_process: work_queue.enqueue(s) + gcs_root = self.GetGcsRoot() + if gcs_root and command == 'update': + gcs_root.resolve_objects(self.name) if command == 'recurse': # Skip file only checkout. @@ -1389,6 +1400,13 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): return None return self.root.GetCipdRoot() + def GetGcsRoot(self): + if self.root is self: + # Let's not infinitely recurse. If this is root and isn't an + # instance of GClient, do nothing. + return None + return self.root.GetGcsRoot() + def subtree(self, include_all): """Breadth first recursion excluding root node.""" dependencies = self.dependencies @@ -1709,6 +1727,7 @@ solutions = %(solution_list)s self._enforced_cpu = (detect_host_arch.HostArch(), ) self._root_dir = root_dir self._cipd_root = None + self._gcs_root = None self.config_content = None def _CheckConfig(self): @@ -2494,6 +2513,11 @@ it or fix the checkout. log_level='info' if self._options.verbose else None) return self._cipd_root + def GetGcsRoot(self): + if not self._gcs_root: + self._gcs_root = gclient_scm.GcsRoot(self.root_dir) + return self._gcs_root + @property def root_dir(self): """Root directory of gclient checkout.""" @@ -2517,14 +2541,16 @@ class GcsDependency(Dependency): """A Dependency object that represents a single GCS bucket and object""" def __init__(self, parent, name, bucket, object_name, sha256sum, - output_file, size_bytes, custom_vars, should_process, relative, - condition): + output_file, size_bytes, gcs_root, custom_vars, should_process, + relative, condition): self.bucket = bucket self.object_name = object_name self.sha256sum = sha256sum self.output_file = output_file self.size_bytes = size_bytes url = f'gs://{self.bucket}/{self.object_name}' + self._gcs_root = gcs_root + self._gcs_root.add_object(parent.name, name, object_name) super(GcsDependency, self).__init__(parent=parent, name=f'{name}:{object_name}', url=url, diff --git a/gclient_scm.py b/gclient_scm.py index 9d4d5dcb7..2a43ff5d6 100644 --- a/gclient_scm.py +++ b/gclient_scm.py @@ -1913,6 +1913,110 @@ class CipdWrapper(SCMWrapper): """ +class GcsRoot(object): + """Root to keep track of all GCS objects, per checkout""" + + def __init__(self, root_dir): + self._mutator_lock = threading.Lock() + self._root_dir = root_dir + # Populated when the DEPS file is parsed + # The objects here have not yet been downloaded and written into + # the .gcs_entries file + self._parsed_objects = {} + # .gcs_entries keeps track of which GCS deps have already been installed + # Maps checkout_name -> {GCS dep path -> [object_name]} + # This file is in the same directory as .gclient + self._gcs_entries_file = os.path.join(self._root_dir, '.gcs_entries') + # Contents of the .gcs_entries file + self._gcs_entries = self.read_gcs_entries() + + @property + def root_dir(self): + return self._root_dir + + def add_object(self, checkout_name, dep_path, object_name): + """Records the object in the _parsed_objects variable + + This does not actually download the object""" + with self._mutator_lock: + if checkout_name not in self._parsed_objects: + self._parsed_objects[checkout_name] = {} + if dep_path not in self._parsed_objects[checkout_name]: + self._parsed_objects[checkout_name][dep_path] = [object_name] + else: + self._parsed_objects[checkout_name][dep_path].append( + object_name) + + def read_gcs_entries(self): + """Reads .gcs_entries file and loads the content into _gcs_entries""" + if not os.path.exists(self._gcs_entries_file): + return {} + + with open(self._gcs_entries_file, 'r') as f: + content = f.read().rstrip() + if content: + return json.loads(content) + return {} + + def resolve_objects(self, checkout_name): + """Updates .gcs_entries with objects in _parsed_objects + + This should only be called after the objects have been downloaded + and extracted.""" + with self._mutator_lock: + object_dict = self._parsed_objects.get(checkout_name) + if not object_dict: + return + self._gcs_entries[checkout_name] = object_dict + with open(self._gcs_entries_file, 'w') as f: + f.write(json.dumps(self._gcs_entries, indent=2)) + self._parsed_objects[checkout_name] = {} + + def clobber_deps_with_updated_objects(self, checkout_name): + """Clobber the path if an object or GCS dependency is removed/added + + This must be called before the GCS dependencies are + downloaded and extracted.""" + with self._mutator_lock: + parsed_object_dict = self._parsed_objects.get(checkout_name, {}) + parsed_paths = set(parsed_object_dict.keys()) + + resolved_object_dict = self._gcs_entries.get(checkout_name, {}) + resolved_paths = set(resolved_object_dict.keys()) + + # If any GCS deps are added or removed entirely, clobber that path + intersected_paths = parsed_paths.intersection(resolved_paths) + # Added paths + for path in parsed_paths - intersected_paths: + full_path = os.path.join(self.root_dir, path) + gclient_utils.rmtree(full_path) + # Removed paths + for path in resolved_paths - intersected_paths: + full_path = os.path.join(self.root_dir, path) + gclient_utils.rmtree(full_path) + + # If any objects within a GCS dep are added/removed, clobber that + # entire path + for path in intersected_paths: + resolved_objects = resolved_object_dict[path] + parsed_objects = parsed_object_dict[path] + + full_path = os.path.join(self.root_dir, path) + if (len(resolved_objects) != len(parsed_objects) + and os.path.exists(full_path)): + gclient_utils.rmtree(full_path) + + def clobber(self): + """Remove all dep path directories and clear .gcs_entries""" + for _, objects_dict in self._gcs_entries.items(): + for dep_path, _ in objects_dict.items(): + gclient_utils.rmtree(os.path.join(self.root_dir, dep_path)) + if os.path.exists(self._gcs_entries_file): + os.remove(self._gcs_entries_file) + with self._mutator_lock: + self._gcs_entries = {} + + class GcsWrapper(SCMWrapper): """Wrapper for GCS.