From 97246c4f73e6692065ea4d3c87c63641a810f064 Mon Sep 17 00:00:00 2001 From: danakj Date: Mon, 27 May 2024 14:37:07 +0000 Subject: [PATCH] Revert "[gclient] Delete GCS output_dir on download" This reverts commit 7d95eb2eb054447592585c73a8ff7adad97ecba1. Reason for revert: Causing bot_update failures: https://issues.chromium.org/issues/342945505 Original change's description: > [gclient] Delete GCS output_dir on download > > GCS output_directory needs to be cleaned up prior to extracting new > content. At the same time, it can't be cleaned up by individual GCS > fetcher as there can be multiple objects in the same output_dir. > > This change adds a top level support to delete old GCS output_dir. If > any of objects need to be downloaded, the output_dir will be completely > deleted and all objects will be redownloaded. > > R=jojwang@google.com > > Bug: 342522902, 338612245 > Change-Id: Icbe4f1238cac54d7390bbb9b6fc5f17c538cca62 > Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5570466 > Reviewed-by: Joanna Wang > Commit-Queue: Josip Sokcevic Bug: 342522902, 338612245, 342945505 Change-Id: I93ed99d7311f15a7a24a03ccba8c310cc7e4c4d4 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5570280 Bot-Commit: Rubber Stamper Commit-Queue: danakj Reviewed-by: Yann Dago Reviewed-by: Friedrich Horschig Auto-Submit: danakj Owners-Override: danakj --- gclient.py | 144 ++++++++++++++++++++--------------------------------- 1 file changed, 53 insertions(+), 91 deletions(-) diff --git a/gclient.py b/gclient.py index 1e9f4eefe..9a474e57b 100755 --- a/gclient.py +++ b/gclient.py @@ -775,9 +775,6 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): relative=use_relative_paths, condition=condition)) elif dep_type == 'gcs': - if len(dep_value['objects']) == 0: - continue - # Validate that all objects are unique object_name_set = { o['object_name'] @@ -787,15 +784,13 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): raise Exception('Duplicate object names detected in {} GCS ' 'dependency.'.format(name)) gcs_root = self.GetGcsRoot() - - gcs_deps = [] for obj in dep_value['objects']: merged_condition = gclient_utils.merge_conditions( condition, obj.get('condition')) should_process_object = should_process and _should_process( merged_condition) - gcs_deps.append( + deps_to_add.append( GcsDependency(parent=self, name=name, bucket=dep_value['bucket'], @@ -808,18 +803,6 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): should_process=should_process_object, relative=use_relative_paths, condition=merged_condition)) - deps_to_add.extend(gcs_deps) - - # Check if at least one object needs to be downloaded. - needs_download = any(gcs.IsDownloadNeeded() for gcs in gcs_deps) - if needs_download and os.path.exists(gcs_deps[0].output_dir): - # Since we don't know what old content to remove, we remove - # the entire output_dir. All gcs_deps are expected to have - # the same output_dir, so we get the first one, which must - # exist. - logging.warning( - 'GCS dependency %s new version, removing old.', name) - shutil.rmtree(gcs_deps[0].output_dir) else: url = dep_value.get('url') deps_to_add.append( @@ -2639,40 +2622,6 @@ class GcsDependency(Dependency): relative=relative, condition=condition) - @property - def output_dir(self): - return os.path.join(self.root.root_dir, self.name.split(':')[0]) - - @property - def gcs_file_name(self): - return self.object_name.replace('/', '_') - - @property - def artifact_output_file(self): - return os.path.join(self.output_dir, self.output_file - or f'.{self.gcs_file_name}') - - @property - def hash_file(self): - return os.path.join(self.output_dir, f'.{self.file_prefix}_hash') - - @property - def migration_toggle_file(self): - return os.path.join( - self.output_dir, - download_from_google_storage.construct_migration_file_name( - self.object_name)) - - @property - def gcs_file_name(self): - # Replace forward slashes - return self.object_name.replace('/', '_') - - @property - def file_prefix(self): - # Drop any extensions - return self.gcs_file_name.replace('.', '_') - #override def verify_validity(self): """GCS dependencies allow duplicate name for objects in same directory.""" @@ -2696,24 +2645,25 @@ class GcsDependency(Dependency): f.write(content) f.write('\n') - def IsDownloadNeeded(self): + def IsDownloadNeeded(self, output_dir, output_file, hash_file, + migration_toggle_file): """Check if download and extract is needed.""" - if not self.should_process: - return False - if not os.path.exists(self.artifact_output_file): + if not os.path.exists(output_file): return True existing_hash = None - if os.path.exists(self.hash_file): + if os.path.exists(hash_file): try: - with open(self.hash_file, 'r') as f: + with open(hash_file, 'r') as f: existing_hash = f.read().rstrip() except IOError: return True else: return True - is_first_class_gcs = os.path.exists(self.migration_toggle_file) + # (b/328065301): Remove is_first_class_gcs_file logic when all GCS + # hooks are migrated to first class deps + is_first_class_gcs = os.path.exists(migration_toggle_file) if not is_first_class_gcs: return True @@ -2752,50 +2702,63 @@ class GcsDependency(Dependency): def DownloadGoogleStorage(self): """Calls GCS.""" + # Replace forward slashes + gcs_file_name = self.object_name.replace('/', '_') + root_dir = self.root.root_dir - if not self.IsDownloadNeeded(): + # Directory of the extracted tarfile contents + output_dir = os.path.join(root_dir, self.name.split(':')[0]) + output_file = os.path.join(output_dir, self.output_file + or f'.{gcs_file_name}') + + # Drop any extensions + file_prefix = gcs_file_name.replace('.', '_') + hash_file = os.path.join(output_dir, f'.{file_prefix}_hash') + migration_toggle_file = os.path.join( + output_dir, + download_from_google_storage.construct_migration_file_name( + self.object_name)) + if not self.IsDownloadNeeded(output_dir, output_file, hash_file, + migration_toggle_file): return - files_to_remove = [ - self.hash_file, - self.artifact_output_file, - self.migration_toggle_file, - ] - for f in files_to_remove: - if os.path.exists(f): - os.remove(f) + # Remove hashfile + if os.path.exists(hash_file): + os.remove(hash_file) + + # Remove tarfile + if os.path.exists(output_file): + os.remove(output_file) - # Ensure that the directory exists. Another process/thread may create - # it, so exist_ok is used. - os.makedirs(self.output_dir, exist_ok=True) + # Another GCS dep could be using the same output_dir, so don't remove + # it + if not os.path.exists(output_dir): + os.makedirs(output_dir) gsutil = download_from_google_storage.Gsutil( download_from_google_storage.GSUTIL_DEFAULT_PATH) if os.getenv('GCLIENT_TEST') == '1': - if 'no-extract' in self.artifact_output_file: - with open(self.artifact_output_file, 'w+') as f: + if 'no-extract' in output_file: + with open(output_file, 'w+') as f: f.write('non-extractable file') else: # Create fake tar file and extracted tar contents tmpdir = tempfile.mkdtemp() - copy_dir = os.path.join(tmpdir, self.gcs_file_name, - 'extracted_dir') + copy_dir = os.path.join(tmpdir, gcs_file_name, 'extracted_dir') if os.path.exists(copy_dir): shutil.rmtree(copy_dir) os.makedirs(copy_dir) with open(os.path.join(copy_dir, 'extracted_file'), 'w+') as f: f.write('extracted text') - with tarfile.open(self.artifact_output_file, "w:gz") as tar: + with tarfile.open(output_file, "w:gz") as tar: tar.add(copy_dir, arcname=os.path.basename(copy_dir)) else: - code, _, err = gsutil.check_call('cp', self.url, - self.artifact_output_file) + code, _, err = gsutil.check_call('cp', self.url, output_file) if code and err: raise Exception(f'{code}: {err}') # Check that something actually downloaded into the path - if not os.path.exists(self.artifact_output_file): - raise Exception( - f'Nothing was downloaded into {self.artifact_output_file}') + if not os.path.exists(output_file): + raise Exception(f'Nothing was downloaded into {output_file}') calculated_sha256sum = '' calculated_size_bytes = None @@ -2804,9 +2767,8 @@ class GcsDependency(Dependency): calculated_size_bytes = 10000 else: calculated_sha256sum = ( - upload_to_google_storage_first_class.get_sha256sum( - self.artifact_output_file)) - calculated_size_bytes = os.path.getsize(self.artifact_output_file) + upload_to_google_storage_first_class.get_sha256sum(output_file)) + calculated_size_bytes = os.path.getsize(output_file) if calculated_sha256sum != self.sha256sum: raise Exception('sha256sum does not match calculated hash. ' @@ -2822,8 +2784,8 @@ class GcsDependency(Dependency): calculated=calculated_size_bytes, )) - if tarfile.is_tarfile(self.artifact_output_file): - with tarfile.open(self.artifact_output_file, 'r:*') as tar: + if tarfile.is_tarfile(output_file): + with tarfile.open(output_file, 'r:*') as tar: formatted_names = [] for name in tar.getnames(): if name.startswith('./') and len(name) > 2: @@ -2838,19 +2800,19 @@ class GcsDependency(Dependency): raise Exception('tarfile contains invalid entries') tar_content_file = os.path.join( - self.output_dir, f'.{self.file_prefix}_content_names') + output_dir, f'.{file_prefix}_content_names') self.WriteToFile(json.dumps(tar.getnames()), tar_content_file) - tar.extractall(path=self.output_dir) + tar.extractall(path=output_dir) if os.getenv('GCLIENT_TEST') != '1': code, err = download_from_google_storage.set_executable_bit( - self.artifact_output_file, self.url, gsutil) + output_file, self.url, gsutil) if code != 0: raise Exception(f'{code}: {err}') - self.WriteToFile(calculated_sha256sum, self.hash_file) - self.WriteToFile(str(1), self.migration_toggle_file) + self.WriteToFile(calculated_sha256sum, hash_file) + self.WriteToFile(str(1), migration_toggle_file) #override def GetScmName(self):