Revert "[gclient] Delete GCS output_dir on download"

This reverts commit 7d95eb2eb0.

Reason for revert: Causing bot_update failures: https://issues.chromium.org/issues/342945505



Original change's description:
> [gclient] Delete GCS output_dir on download
>
> GCS output_directory needs to be cleaned up prior to extracting new
> content. At the same time, it can't be cleaned up by individual GCS
> fetcher as there can be multiple objects in the same output_dir.
>
> This change adds a top level support to delete old GCS output_dir. If
> any of objects need to be downloaded, the output_dir will be completely
> deleted and all objects will be redownloaded.
>
> R=jojwang@google.com
>
> Bug: 342522902, 338612245
> Change-Id: Icbe4f1238cac54d7390bbb9b6fc5f17c538cca62
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5570466
> Reviewed-by: Joanna Wang <jojwang@chromium.org>
> Commit-Queue: Josip Sokcevic <sokcevic@chromium.org>

Bug: 342522902, 338612245, 342945505
Change-Id: I93ed99d7311f15a7a24a03ccba8c310cc7e4c4d4
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5570280
Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Commit-Queue: danakj <danakj@chromium.org>
Reviewed-by: Yann Dago <ydago@google.com>
Reviewed-by: Friedrich Horschig <fhorschig@chromium.org>
Auto-Submit: danakj <danakj@chromium.org>
Owners-Override: danakj <danakj@chromium.org>
changes/80/5570280/3
danakj 1 year ago committed by LUCI CQ
parent 8a11c2d152
commit 97246c4f73

@ -775,9 +775,6 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
relative=use_relative_paths,
condition=condition))
elif dep_type == 'gcs':
if len(dep_value['objects']) == 0:
continue
# Validate that all objects are unique
object_name_set = {
o['object_name']
@ -787,15 +784,13 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
raise Exception('Duplicate object names detected in {} GCS '
'dependency.'.format(name))
gcs_root = self.GetGcsRoot()
gcs_deps = []
for obj in dep_value['objects']:
merged_condition = gclient_utils.merge_conditions(
condition, obj.get('condition'))
should_process_object = should_process and _should_process(
merged_condition)
gcs_deps.append(
deps_to_add.append(
GcsDependency(parent=self,
name=name,
bucket=dep_value['bucket'],
@ -808,18 +803,6 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
should_process=should_process_object,
relative=use_relative_paths,
condition=merged_condition))
deps_to_add.extend(gcs_deps)
# Check if at least one object needs to be downloaded.
needs_download = any(gcs.IsDownloadNeeded() for gcs in gcs_deps)
if needs_download and os.path.exists(gcs_deps[0].output_dir):
# Since we don't know what old content to remove, we remove
# the entire output_dir. All gcs_deps are expected to have
# the same output_dir, so we get the first one, which must
# exist.
logging.warning(
'GCS dependency %s new version, removing old.', name)
shutil.rmtree(gcs_deps[0].output_dir)
else:
url = dep_value.get('url')
deps_to_add.append(
@ -2639,40 +2622,6 @@ class GcsDependency(Dependency):
relative=relative,
condition=condition)
@property
def output_dir(self):
return os.path.join(self.root.root_dir, self.name.split(':')[0])
@property
def gcs_file_name(self):
return self.object_name.replace('/', '_')
@property
def artifact_output_file(self):
return os.path.join(self.output_dir, self.output_file
or f'.{self.gcs_file_name}')
@property
def hash_file(self):
return os.path.join(self.output_dir, f'.{self.file_prefix}_hash')
@property
def migration_toggle_file(self):
return os.path.join(
self.output_dir,
download_from_google_storage.construct_migration_file_name(
self.object_name))
@property
def gcs_file_name(self):
# Replace forward slashes
return self.object_name.replace('/', '_')
@property
def file_prefix(self):
# Drop any extensions
return self.gcs_file_name.replace('.', '_')
#override
def verify_validity(self):
"""GCS dependencies allow duplicate name for objects in same directory."""
@ -2696,24 +2645,25 @@ class GcsDependency(Dependency):
f.write(content)
f.write('\n')
def IsDownloadNeeded(self):
def IsDownloadNeeded(self, output_dir, output_file, hash_file,
migration_toggle_file):
"""Check if download and extract is needed."""
if not self.should_process:
return False
if not os.path.exists(self.artifact_output_file):
if not os.path.exists(output_file):
return True
existing_hash = None
if os.path.exists(self.hash_file):
if os.path.exists(hash_file):
try:
with open(self.hash_file, 'r') as f:
with open(hash_file, 'r') as f:
existing_hash = f.read().rstrip()
except IOError:
return True
else:
return True
is_first_class_gcs = os.path.exists(self.migration_toggle_file)
# (b/328065301): Remove is_first_class_gcs_file logic when all GCS
# hooks are migrated to first class deps
is_first_class_gcs = os.path.exists(migration_toggle_file)
if not is_first_class_gcs:
return True
@ -2752,50 +2702,63 @@ class GcsDependency(Dependency):
def DownloadGoogleStorage(self):
"""Calls GCS."""
# Replace forward slashes
gcs_file_name = self.object_name.replace('/', '_')
root_dir = self.root.root_dir
if not self.IsDownloadNeeded():
# Directory of the extracted tarfile contents
output_dir = os.path.join(root_dir, self.name.split(':')[0])
output_file = os.path.join(output_dir, self.output_file
or f'.{gcs_file_name}')
# Drop any extensions
file_prefix = gcs_file_name.replace('.', '_')
hash_file = os.path.join(output_dir, f'.{file_prefix}_hash')
migration_toggle_file = os.path.join(
output_dir,
download_from_google_storage.construct_migration_file_name(
self.object_name))
if not self.IsDownloadNeeded(output_dir, output_file, hash_file,
migration_toggle_file):
return
files_to_remove = [
self.hash_file,
self.artifact_output_file,
self.migration_toggle_file,
]
for f in files_to_remove:
if os.path.exists(f):
os.remove(f)
# Remove hashfile
if os.path.exists(hash_file):
os.remove(hash_file)
# Remove tarfile
if os.path.exists(output_file):
os.remove(output_file)
# Ensure that the directory exists. Another process/thread may create
# it, so exist_ok is used.
os.makedirs(self.output_dir, exist_ok=True)
# Another GCS dep could be using the same output_dir, so don't remove
# it
if not os.path.exists(output_dir):
os.makedirs(output_dir)
gsutil = download_from_google_storage.Gsutil(
download_from_google_storage.GSUTIL_DEFAULT_PATH)
if os.getenv('GCLIENT_TEST') == '1':
if 'no-extract' in self.artifact_output_file:
with open(self.artifact_output_file, 'w+') as f:
if 'no-extract' in output_file:
with open(output_file, 'w+') as f:
f.write('non-extractable file')
else:
# Create fake tar file and extracted tar contents
tmpdir = tempfile.mkdtemp()
copy_dir = os.path.join(tmpdir, self.gcs_file_name,
'extracted_dir')
copy_dir = os.path.join(tmpdir, gcs_file_name, 'extracted_dir')
if os.path.exists(copy_dir):
shutil.rmtree(copy_dir)
os.makedirs(copy_dir)
with open(os.path.join(copy_dir, 'extracted_file'), 'w+') as f:
f.write('extracted text')
with tarfile.open(self.artifact_output_file, "w:gz") as tar:
with tarfile.open(output_file, "w:gz") as tar:
tar.add(copy_dir, arcname=os.path.basename(copy_dir))
else:
code, _, err = gsutil.check_call('cp', self.url,
self.artifact_output_file)
code, _, err = gsutil.check_call('cp', self.url, output_file)
if code and err:
raise Exception(f'{code}: {err}')
# Check that something actually downloaded into the path
if not os.path.exists(self.artifact_output_file):
raise Exception(
f'Nothing was downloaded into {self.artifact_output_file}')
if not os.path.exists(output_file):
raise Exception(f'Nothing was downloaded into {output_file}')
calculated_sha256sum = ''
calculated_size_bytes = None
@ -2804,9 +2767,8 @@ class GcsDependency(Dependency):
calculated_size_bytes = 10000
else:
calculated_sha256sum = (
upload_to_google_storage_first_class.get_sha256sum(
self.artifact_output_file))
calculated_size_bytes = os.path.getsize(self.artifact_output_file)
upload_to_google_storage_first_class.get_sha256sum(output_file))
calculated_size_bytes = os.path.getsize(output_file)
if calculated_sha256sum != self.sha256sum:
raise Exception('sha256sum does not match calculated hash. '
@ -2822,8 +2784,8 @@ class GcsDependency(Dependency):
calculated=calculated_size_bytes,
))
if tarfile.is_tarfile(self.artifact_output_file):
with tarfile.open(self.artifact_output_file, 'r:*') as tar:
if tarfile.is_tarfile(output_file):
with tarfile.open(output_file, 'r:*') as tar:
formatted_names = []
for name in tar.getnames():
if name.startswith('./') and len(name) > 2:
@ -2838,19 +2800,19 @@ class GcsDependency(Dependency):
raise Exception('tarfile contains invalid entries')
tar_content_file = os.path.join(
self.output_dir, f'.{self.file_prefix}_content_names')
output_dir, f'.{file_prefix}_content_names')
self.WriteToFile(json.dumps(tar.getnames()), tar_content_file)
tar.extractall(path=self.output_dir)
tar.extractall(path=output_dir)
if os.getenv('GCLIENT_TEST') != '1':
code, err = download_from_google_storage.set_executable_bit(
self.artifact_output_file, self.url, gsutil)
output_file, self.url, gsutil)
if code != 0:
raise Exception(f'{code}: {err}')
self.WriteToFile(calculated_sha256sum, self.hash_file)
self.WriteToFile(str(1), self.migration_toggle_file)
self.WriteToFile(calculated_sha256sum, hash_file)
self.WriteToFile(str(1), migration_toggle_file)
#override
def GetScmName(self):

Loading…
Cancel
Save