From 2c9e581fada443abc35ed184f91a0256559d17b7 Mon Sep 17 00:00:00 2001 From: Stephanie Kim Date: Fri, 5 Apr 2024 16:58:44 +0000 Subject: [PATCH] Allow multiple objects per GCS dep Allows multiple objects to use the same directory path. This requires that each object has its own unique hash file and unique migration file name. All object names must be unique. Also update download_from_google_storage to check for the unique migration file name. Remove ConvertToGcs smoke tests since we're not converting any git <-> gcs deps. Example: ``` 'src/third_party/llvm-build/Release+Asserts': { 'dep_type': 'gcs', 'condition': 'not llvm_force_head_revision', 'bucket': 'chromium-browser-clang', 'objects': [ { 'object_name': 'Linux_x64/clang-llvmorg-19-init-2941-ga0b3dbaf-22.tar.xz', 'sha256sum': '7b33138d8592199f97d132242d7b3e10f460c5c9655d49a3ad3767218fba7a77', 'size_bytes': 50212876, }, { 'object_name': 'Linux_x64/llvmobjdump-llvmorg-19-init-2941-ga0b3dbaf-22.tar.xz', 'sha256sum': '14d669650cd212eb0ccb8c34a9e655338dfdee66fe2ecdaa517f6bd607c09a97', 'size_bytes': 5302312, }, ] }, ``` TODO: update .gitignore to search for *_is_first_class and *_hash Bug: b/324418194 Change-Id: I89d34b06ee24f4c1aa316cd51530ad078e823143 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5420793 Commit-Queue: Stephanie Kim Reviewed-by: Joanna Wang --- download_from_google_storage.py | 26 +++++++-- gclient.py | 84 ++++++++++++++++----------- gclient_eval.py | 23 +++++--- testing_support/fake_repos.py | 42 ++++++++------ tests/gclient_gcs_smoketest.py | 100 +++++++------------------------- 5 files changed, 131 insertions(+), 144 deletions(-) diff --git a/download_from_google_storage.py b/download_from_google_storage.py index 0ba838773..8217eb1a4 100755 --- a/download_from_google_storage.py +++ b/download_from_google_storage.py @@ -39,7 +39,16 @@ PLATFORM_MAPPING = { } # (b/328065301): Remove when all GCS hooks are migrated to first class deps -MIGRATION_TOGGLE_FILE_NAME = 'is_first_class_gcs' +MIGRATION_TOGGLE_FILE_SUFFIX = '_is_first_class_gcs' + + +def construct_migration_file_name(gcs_object_name): + # Remove any forward slashes + gcs_file_name = gcs_object_name.replace('/', '_') + # Remove any extensions + gcs_file_name = gcs_file_name.split('.')[0] + + return gcs_file_name + MIGRATION_TOGGLE_FILE_SUFFIX class InvalidFileError(IOError): @@ -262,8 +271,8 @@ def _downloader_worker_thread(thread_num, if not working_dir: raise Exception( 'Unable to construct a working_dir from the output_filename.') - migration_file_name = os.path.join(working_dir, - MIGRATION_TOGGLE_FILE_NAME) + migration_file_name = os.path.join( + working_dir, construct_migration_file_name(input_sha1_sum)) extract_dir = None if extract: if not output_filename.endswith('.tar.gz'): @@ -481,8 +490,15 @@ def download_from_google_storage(input_filename, base_url, gsutil, num_threads, # If the directory was created by a first class GCS # dep, remove the migration file and re-download using the # latest hook. - migration_file = os.path.join(working_dir, MIGRATION_TOGGLE_FILE_NAME) - is_first_class_gcs = os.path.exists(migration_file) + is_first_class_gcs = False + # Check all paths to see if they have an equivalent is_first_class_gcs file + # If directory is False, there will be only one item in input_data + for sha1, _ in input_data: + migration_file_name = os.path.join(working_dir, + construct_migration_file_name(sha1)) + if os.path.exists(migration_file_name): + is_first_class_gcs = True + if not force and not is_first_class_gcs and all( _data_exists(sha1, path, extract) for sha1, path in input_data): return 0 diff --git a/gclient.py b/gclient.py index c51d3c7ce..685a22acc 100755 --- a/gclient.py +++ b/gclient.py @@ -563,6 +563,8 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): package = self.GetExpandedPackageName() url = '%s/p/%s/+/%s' % (scm.GetActualRemoteURL(None), package, revision) + if scm.name == 'gcs': + url = self.url if os.path.isdir(scm.checkout_path): revision = scm.revinfo(None, None, None) @@ -755,18 +757,27 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): relative=use_relative_paths, condition=condition)) elif dep_type == 'gcs': - deps_to_add.append( - GcsDependency(parent=self, - name=name, - bucket=dep_value['bucket'], - object_name=dep_value['object_name'], - sha256sum=dep_value['sha256sum'], - output_file=dep_value.get('output_file'), - size_bytes=dep_value['size_bytes'], - custom_vars=self.custom_vars, - should_process=should_process, - relative=use_relative_paths, - condition=condition)) + # Validate that all objects are unique + object_name_set = { + o['object_name'] + for o in dep_value['objects'] + } + if len(object_name_set) != len(dep_value['objects']): + raise Exception('Duplicate object names detected in {} GCS ' + 'dependency.'.format(name)) + for obj in dep_value['objects']: + deps_to_add.append( + GcsDependency(parent=self, + name=name, + bucket=dep_value['bucket'], + object_name=obj['object_name'], + sha256sum=obj['sha256sum'], + output_file=obj.get('output_file'), + size_bytes=obj['size_bytes'], + custom_vars=self.custom_vars, + should_process=should_process, + relative=use_relative_paths, + condition=condition)) else: url = dep_value.get('url') deps_to_add.append( @@ -2513,12 +2524,9 @@ class GcsDependency(Dependency): self.sha256sum = sha256sum self.output_file = output_file self.size_bytes = size_bytes - url = 'gs://{bucket}/{object_name}'.format( - bucket=self.bucket, - object_name=self.object_name, - ) + url = f'gs://{self.bucket}/{self.object_name}' super(GcsDependency, self).__init__(parent=parent, - name=name, + name=f'{name}:{object_name}', url=url, managed=None, custom_deps=None, @@ -2530,6 +2538,12 @@ class GcsDependency(Dependency): relative=relative, condition=condition) + #override + def verify_validity(self): + """GCS dependencies allow duplicate name for objects in same directory.""" + logging.info('Dependency(%s).verify_validity()' % self.name) + return True + #override def run(self, revision_overrides, command, args, work_queue, options, patch_refs, target_branches, skip_sync_revisions): @@ -2547,12 +2561,12 @@ class GcsDependency(Dependency): f.write(sha1) f.write('\n') - def IsDownloadNeeded(self, output_dir, output_file): + def IsDownloadNeeded(self, output_dir, output_file, hash_file, + migration_toggle_file): """Check if download and extract is needed.""" if not os.path.exists(output_file): return True - hash_file = os.path.join(output_dir, 'hash') existing_hash = None if os.path.exists(hash_file): try: @@ -2565,9 +2579,7 @@ class GcsDependency(Dependency): # (b/328065301): Remove is_first_class_gcs_file logic when all GCS # hooks are migrated to first class deps - is_first_class_gcs_file = os.path.join( - output_dir, download_from_google_storage.MIGRATION_TOGGLE_FILE_NAME) - is_first_class_gcs = os.path.exists(is_first_class_gcs_file) + is_first_class_gcs = os.path.exists(migration_toggle_file) if not is_first_class_gcs: return True @@ -2603,15 +2615,22 @@ class GcsDependency(Dependency): root_dir = self.root.root_dir # Directory of the extracted tarfile contents - output_dir = os.path.join(root_dir, self.name) + output_dir = os.path.join(root_dir, self.name.split(':')[0]) output_file = os.path.join(output_dir, self.output_file or gcs_file_name) - if not self.IsDownloadNeeded(output_dir, output_file): + # Remove any forward slashes and drop any extensions + hash_name = self.object_name.replace('/', '_').split('.')[0] + hash_file = os.path.join(output_dir, hash_name + '_hash') + migration_toggle_file = os.path.join( + output_dir, + download_from_google_storage.construct_migration_file_name( + self.object_name)) + if not self.IsDownloadNeeded(output_dir, output_file, hash_file, + migration_toggle_file): return # Remove hashfile - hash_file = os.path.join(output_dir, 'hash') if os.path.exists(hash_file): os.remove(hash_file) @@ -2619,10 +2638,10 @@ class GcsDependency(Dependency): if os.path.exists(output_file): os.remove(output_file) - # Remove extracted contents - if os.path.exists(output_dir): - shutil.rmtree(output_dir) - os.makedirs(output_dir) + # Another GCS dep could be using the same output_dir, so don't remove + # it + if not os.path.exists(output_dir): + os.makedirs(output_dir) if os.getenv('GCLIENT_TEST') == '1': if 'no-extract' in output_file: @@ -2631,7 +2650,7 @@ class GcsDependency(Dependency): else: # Create fake tar file and extracted tar contents tmpdir = tempfile.mkdtemp() - copy_dir = os.path.join(tmpdir, self.name, 'extracted_dir') + copy_dir = os.path.join(tmpdir, gcs_file_name, 'extracted_dir') if os.path.exists(copy_dir): shutil.rmtree(copy_dir) os.makedirs(copy_dir) @@ -2640,10 +2659,9 @@ class GcsDependency(Dependency): with tarfile.open(output_file, "w:gz") as tar: tar.add(copy_dir, arcname=os.path.basename(copy_dir)) else: - gcs_url = 'gs://%s/%s' % (self.bucket, self.object_name) gsutil = download_from_google_storage.Gsutil( download_from_google_storage.GSUTIL_DEFAULT_PATH) - gsutil.check_call('cp', gcs_url, output_file) + gsutil.check_call('cp', self.url, output_file) calculated_sha256sum = '' calculated_size_bytes = None @@ -2680,8 +2698,6 @@ class GcsDependency(Dependency): raise Exception('tarfile contains invalid entries') tar.extractall(path=output_dir) self.WriteFilenameHash(calculated_sha256sum, hash_file) - migration_toggle_file = os.path.join( - output_dir, download_from_google_storage.MIGRATION_TOGGLE_FILE_NAME) with open(migration_toggle_file, 'w') as f: f.write(str(1)) f.write('\n') diff --git a/gclient_eval.py b/gclient_eval.py index 125e2ebbc..43ba97c73 100644 --- a/gclient_eval.py +++ b/gclient_eval.py @@ -133,14 +133,21 @@ _GCLIENT_DEPS_SCHEMA = _NodeDictSchema({ }), # GCS content. _NodeDictSchema({ - 'bucket': str, - 'object_name': str, - 'sha256sum': str, - 'size_bytes': int, - 'generation': int, - schema.Optional('output_file'): str, - schema.Optional('condition'): str, - schema.Optional('dep_type', default='gcs'): str, + 'bucket': + str, + 'objects': [ + _NodeDictSchema({ + 'object_name': str, + 'sha256sum': str, + 'size_bytes': int, + 'generation': int, + schema.Optional('output_file'): str, + }) + ], + schema.Optional('condition'): + str, + schema.Optional('dep_type', default='gcs'): + str, }), ), }) diff --git a/testing_support/fake_repos.py b/testing_support/fake_repos.py index 8f2bb0982..ed6c7622d 100755 --- a/testing_support/fake_repos.py +++ b/testing_support/fake_repos.py @@ -893,28 +893,34 @@ deps = { deps = { 'src/gcs_dep': { 'bucket': '123bucket', - 'object_name': 'deadbeef', 'dep_type': 'gcs', - 'sha256sum': 'abcd123', - 'size_bytes': 10000, - 'generation': 1542380408102454, + 'objects': [{ + 'object_name': 'deadbeef', + 'sha256sum': 'abcd123', + 'size_bytes': 10000, + 'generation': 1542380408102454, + }] }, 'src/another_gcs_dep': { 'bucket': '456bucket', - 'object_name': 'Linux/llvmfile.tar.gz', 'dep_type': 'gcs', - 'sha256sum': 'abcd123', - 'size_bytes': 10000, - 'generation': 1542380408102455, + 'objects': [{ + 'object_name': 'Linux/llvmfile.tar.gz', + 'sha256sum': 'abcd123', + 'size_bytes': 10000, + 'generation': 1542380408102455, + }] }, 'src/gcs_dep_with_output_file': { 'bucket': '789bucket', - 'object_name': 'clang-format-version123', 'dep_type': 'gcs', - 'sha256sum': 'abcd123', - 'output_file': 'clang-format-no-extract', - 'size_bytes': 10000, - 'generation': 1542380408102456, + 'objects': [{ + 'object_name': 'clang-format-version123', + 'sha256sum': 'abcd123', + 'output_file': 'clang-format-no-extract', + 'size_bytes': 10000, + 'generation': 1542380408102456, + }] }, }"""), 'origin': @@ -946,11 +952,13 @@ deps = { deps = { 'src/repo12': { 'bucket': 'bucket123', - 'object_name': 'path_to_file.tar.gz', 'dep_type': 'gcs', - 'sha256sum': 'abcd123', - 'size_bytes': 10000, - 'generation': 1542380408102454, + 'objects': [{ + 'object_name': 'path_to_file.tar.gz', + 'sha256sum': 'abcd123', + 'size_bytes': 10000, + 'generation': 1542380408102454, + }] }, } """, diff --git a/tests/gclient_gcs_smoketest.py b/tests/gclient_gcs_smoketest.py index 156edb99d..22d583a0b 100644 --- a/tests/gclient_gcs_smoketest.py +++ b/tests/gclient_gcs_smoketest.py @@ -35,101 +35,41 @@ class GClientSmokeGcs(gclient_smoketest_base.GClientSmokeBase): tree = self.mangle_git_tree(('repo_22@1', 'src')) tree.update({ - 'src/another_gcs_dep/hash': + 'src/another_gcs_dep/Linux_llvmfile_hash': 'abcd123\n', 'src/another_gcs_dep/llvmfile.tar.gz': 'tarfile', - 'src/another_gcs_dep/is_first_class_gcs': + 'src/another_gcs_dep/Linux_llvmfile_is_first_class_gcs': '1\n', 'src/another_gcs_dep/extracted_dir/extracted_file': 'extracted text', 'src/gcs_dep/deadbeef': 'tarfile', - 'src/gcs_dep/hash': + 'src/gcs_dep/deadbeef_hash': 'abcd123\n', - 'src/gcs_dep/is_first_class_gcs': + 'src/gcs_dep/deadbeef_is_first_class_gcs': '1\n', 'src/gcs_dep/extracted_dir/extracted_file': 'extracted text', - 'src/gcs_dep_with_output_file/hash': + 'src/gcs_dep_with_output_file/clang-format-version123_hash': 'abcd123\n', - 'src/gcs_dep_with_output_file/is_first_class_gcs': + 'src/gcs_dep_with_output_file/clang-format-version123_' + 'is_first_class_gcs': '1\n', 'src/gcs_dep_with_output_file/clang-format-no-extract': 'non-extractable file', }) self.assertTree(tree) - def testConvertGitToGcs(self): - self.gclient(['config', self.git_base + 'repo_23', '--name', 'src']) - - # repo_13@1 has src/repo12 as a git dependency. - self.gclient([ - 'sync', '-v', '-v', '-v', '--revision', - self.githash('repo_23', 1) - ]) - - tree = self.mangle_git_tree(('repo_23@1', 'src'), - ('repo_12@1', 'src/repo12')) - self.assertTree(tree) - - # repo_23@3 has src/repo12 as a gcs dependency. - self.gclient([ - 'sync', '-v', '-v', '-v', '--revision', - self.githash('repo_23', 3), '--delete_unversioned_trees' - ]) - - tree = self.mangle_git_tree(('repo_23@3', 'src')) - tree.update({ - 'src/repo12/extracted_dir/extracted_file': 'extracted text', - 'src/repo12/hash': 'abcd123\n', - 'src/repo12/is_first_class_gcs': '1\n', - 'src/repo12/path_to_file.tar.gz': 'tarfile', - }) - self.assertTree(tree) - - def testConvertGcsToGit(self): - self.gclient(['config', self.git_base + 'repo_23', '--name', 'src']) - - # repo_13@3 has src/repo12 as a cipd dependency. - self.gclient([ - 'sync', '-v', '-v', '-v', '--revision', - self.githash('repo_23', 3), '--delete_unversioned_trees' - ]) - - tree = self.mangle_git_tree(('repo_23@3', 'src')) - tree.update({ - 'src/repo12/extracted_dir/extracted_file': 'extracted text', - 'src/repo12/hash': 'abcd123\n', - 'src/repo12/is_first_class_gcs': '1\n', - 'src/repo12/path_to_file.tar.gz': 'tarfile', - }) - self.assertTree(tree) - - # repo_23@1 has src/repo12 as a git dependency. - self.gclient([ - 'sync', '-v', '-v', '-v', '--revision', - self.githash('repo_23', 1) - ]) - - tree = self.mangle_git_tree(('repo_23@1', 'src'), - ('repo_12@1', 'src/repo12')) - tree.update({ - 'src/repo12/extracted_dir/extracted_file': 'extracted text', - 'src/repo12/hash': 'abcd123\n', - 'src/repo12/is_first_class_gcs': '1\n', - 'src/repo12/path_to_file.tar.gz': 'tarfile', - }) - self.assertTree(tree) - def testRevInfo(self): self.gclient(['config', self.git_base + 'repo_22', '--name', 'src']) self.gclient(['sync']) results = self.gclient(['revinfo']) out = ('src: %(base)srepo_22\n' - 'src/another_gcs_dep: gs://456bucket/Linux/llvmfile.tar.gz\n' - 'src/gcs_dep: gs://123bucket/deadbeef\n' - 'src/gcs_dep_with_output_file: ' + 'src/another_gcs_dep:Linux/llvmfile.tar.gz: ' + 'gs://456bucket/Linux/llvmfile.tar.gz\n' + 'src/gcs_dep:deadbeef: gs://123bucket/deadbeef\n' + 'src/gcs_dep_with_output_file:clang-format-version123: ' 'gs://789bucket/clang-format-version123\n' % { 'base': self.git_base, }) @@ -139,15 +79,15 @@ class GClientSmokeGcs(gclient_smoketest_base.GClientSmokeBase): self.gclient(['config', self.git_base + 'repo_22', '--name', 'src']) self.gclient(['sync']) results = self.gclient(['revinfo', '--actual']) - out = ( - 'src: %(base)srepo_22@%(hash1)s\n' - 'src/another_gcs_dep: gs://456bucket/Linux/llvmfile.tar.gz@None\n' - 'src/gcs_dep: gs://123bucket/deadbeef@None\n' - 'src/gcs_dep_with_output_file: ' - 'gs://789bucket/clang-format-version123@None\n' % { - 'base': self.git_base, - 'hash1': self.githash('repo_22', 1), - }) + out = ('src: %(base)srepo_22@%(hash1)s\n' + 'src/another_gcs_dep:Linux/llvmfile.tar.gz: ' + 'gs://456bucket/Linux/llvmfile.tar.gz\n' + 'src/gcs_dep:deadbeef: gs://123bucket/deadbeef\n' + 'src/gcs_dep_with_output_file:clang-format-version123: ' + 'gs://789bucket/clang-format-version123\n' % { + 'base': self.git_base, + 'hash1': self.githash('repo_22', 1), + }) self.check((out, '', 0), results)