diff --git a/download_from_google_storage.py b/download_from_google_storage.py index 0ba838773..8217eb1a4 100755 --- a/download_from_google_storage.py +++ b/download_from_google_storage.py @@ -39,7 +39,16 @@ PLATFORM_MAPPING = { } # (b/328065301): Remove when all GCS hooks are migrated to first class deps -MIGRATION_TOGGLE_FILE_NAME = 'is_first_class_gcs' +MIGRATION_TOGGLE_FILE_SUFFIX = '_is_first_class_gcs' + + +def construct_migration_file_name(gcs_object_name): + # Remove any forward slashes + gcs_file_name = gcs_object_name.replace('/', '_') + # Remove any extensions + gcs_file_name = gcs_file_name.split('.')[0] + + return gcs_file_name + MIGRATION_TOGGLE_FILE_SUFFIX class InvalidFileError(IOError): @@ -262,8 +271,8 @@ def _downloader_worker_thread(thread_num, if not working_dir: raise Exception( 'Unable to construct a working_dir from the output_filename.') - migration_file_name = os.path.join(working_dir, - MIGRATION_TOGGLE_FILE_NAME) + migration_file_name = os.path.join( + working_dir, construct_migration_file_name(input_sha1_sum)) extract_dir = None if extract: if not output_filename.endswith('.tar.gz'): @@ -481,8 +490,15 @@ def download_from_google_storage(input_filename, base_url, gsutil, num_threads, # If the directory was created by a first class GCS # dep, remove the migration file and re-download using the # latest hook. - migration_file = os.path.join(working_dir, MIGRATION_TOGGLE_FILE_NAME) - is_first_class_gcs = os.path.exists(migration_file) + is_first_class_gcs = False + # Check all paths to see if they have an equivalent is_first_class_gcs file + # If directory is False, there will be only one item in input_data + for sha1, _ in input_data: + migration_file_name = os.path.join(working_dir, + construct_migration_file_name(sha1)) + if os.path.exists(migration_file_name): + is_first_class_gcs = True + if not force and not is_first_class_gcs and all( _data_exists(sha1, path, extract) for sha1, path in input_data): return 0 diff --git a/gclient.py b/gclient.py index c51d3c7ce..685a22acc 100755 --- a/gclient.py +++ b/gclient.py @@ -563,6 +563,8 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): package = self.GetExpandedPackageName() url = '%s/p/%s/+/%s' % (scm.GetActualRemoteURL(None), package, revision) + if scm.name == 'gcs': + url = self.url if os.path.isdir(scm.checkout_path): revision = scm.revinfo(None, None, None) @@ -755,18 +757,27 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): relative=use_relative_paths, condition=condition)) elif dep_type == 'gcs': - deps_to_add.append( - GcsDependency(parent=self, - name=name, - bucket=dep_value['bucket'], - object_name=dep_value['object_name'], - sha256sum=dep_value['sha256sum'], - output_file=dep_value.get('output_file'), - size_bytes=dep_value['size_bytes'], - custom_vars=self.custom_vars, - should_process=should_process, - relative=use_relative_paths, - condition=condition)) + # Validate that all objects are unique + object_name_set = { + o['object_name'] + for o in dep_value['objects'] + } + if len(object_name_set) != len(dep_value['objects']): + raise Exception('Duplicate object names detected in {} GCS ' + 'dependency.'.format(name)) + for obj in dep_value['objects']: + deps_to_add.append( + GcsDependency(parent=self, + name=name, + bucket=dep_value['bucket'], + object_name=obj['object_name'], + sha256sum=obj['sha256sum'], + output_file=obj.get('output_file'), + size_bytes=obj['size_bytes'], + custom_vars=self.custom_vars, + should_process=should_process, + relative=use_relative_paths, + condition=condition)) else: url = dep_value.get('url') deps_to_add.append( @@ -2513,12 +2524,9 @@ class GcsDependency(Dependency): self.sha256sum = sha256sum self.output_file = output_file self.size_bytes = size_bytes - url = 'gs://{bucket}/{object_name}'.format( - bucket=self.bucket, - object_name=self.object_name, - ) + url = f'gs://{self.bucket}/{self.object_name}' super(GcsDependency, self).__init__(parent=parent, - name=name, + name=f'{name}:{object_name}', url=url, managed=None, custom_deps=None, @@ -2530,6 +2538,12 @@ class GcsDependency(Dependency): relative=relative, condition=condition) + #override + def verify_validity(self): + """GCS dependencies allow duplicate name for objects in same directory.""" + logging.info('Dependency(%s).verify_validity()' % self.name) + return True + #override def run(self, revision_overrides, command, args, work_queue, options, patch_refs, target_branches, skip_sync_revisions): @@ -2547,12 +2561,12 @@ class GcsDependency(Dependency): f.write(sha1) f.write('\n') - def IsDownloadNeeded(self, output_dir, output_file): + def IsDownloadNeeded(self, output_dir, output_file, hash_file, + migration_toggle_file): """Check if download and extract is needed.""" if not os.path.exists(output_file): return True - hash_file = os.path.join(output_dir, 'hash') existing_hash = None if os.path.exists(hash_file): try: @@ -2565,9 +2579,7 @@ class GcsDependency(Dependency): # (b/328065301): Remove is_first_class_gcs_file logic when all GCS # hooks are migrated to first class deps - is_first_class_gcs_file = os.path.join( - output_dir, download_from_google_storage.MIGRATION_TOGGLE_FILE_NAME) - is_first_class_gcs = os.path.exists(is_first_class_gcs_file) + is_first_class_gcs = os.path.exists(migration_toggle_file) if not is_first_class_gcs: return True @@ -2603,15 +2615,22 @@ class GcsDependency(Dependency): root_dir = self.root.root_dir # Directory of the extracted tarfile contents - output_dir = os.path.join(root_dir, self.name) + output_dir = os.path.join(root_dir, self.name.split(':')[0]) output_file = os.path.join(output_dir, self.output_file or gcs_file_name) - if not self.IsDownloadNeeded(output_dir, output_file): + # Remove any forward slashes and drop any extensions + hash_name = self.object_name.replace('/', '_').split('.')[0] + hash_file = os.path.join(output_dir, hash_name + '_hash') + migration_toggle_file = os.path.join( + output_dir, + download_from_google_storage.construct_migration_file_name( + self.object_name)) + if not self.IsDownloadNeeded(output_dir, output_file, hash_file, + migration_toggle_file): return # Remove hashfile - hash_file = os.path.join(output_dir, 'hash') if os.path.exists(hash_file): os.remove(hash_file) @@ -2619,10 +2638,10 @@ class GcsDependency(Dependency): if os.path.exists(output_file): os.remove(output_file) - # Remove extracted contents - if os.path.exists(output_dir): - shutil.rmtree(output_dir) - os.makedirs(output_dir) + # Another GCS dep could be using the same output_dir, so don't remove + # it + if not os.path.exists(output_dir): + os.makedirs(output_dir) if os.getenv('GCLIENT_TEST') == '1': if 'no-extract' in output_file: @@ -2631,7 +2650,7 @@ class GcsDependency(Dependency): else: # Create fake tar file and extracted tar contents tmpdir = tempfile.mkdtemp() - copy_dir = os.path.join(tmpdir, self.name, 'extracted_dir') + copy_dir = os.path.join(tmpdir, gcs_file_name, 'extracted_dir') if os.path.exists(copy_dir): shutil.rmtree(copy_dir) os.makedirs(copy_dir) @@ -2640,10 +2659,9 @@ class GcsDependency(Dependency): with tarfile.open(output_file, "w:gz") as tar: tar.add(copy_dir, arcname=os.path.basename(copy_dir)) else: - gcs_url = 'gs://%s/%s' % (self.bucket, self.object_name) gsutil = download_from_google_storage.Gsutil( download_from_google_storage.GSUTIL_DEFAULT_PATH) - gsutil.check_call('cp', gcs_url, output_file) + gsutil.check_call('cp', self.url, output_file) calculated_sha256sum = '' calculated_size_bytes = None @@ -2680,8 +2698,6 @@ class GcsDependency(Dependency): raise Exception('tarfile contains invalid entries') tar.extractall(path=output_dir) self.WriteFilenameHash(calculated_sha256sum, hash_file) - migration_toggle_file = os.path.join( - output_dir, download_from_google_storage.MIGRATION_TOGGLE_FILE_NAME) with open(migration_toggle_file, 'w') as f: f.write(str(1)) f.write('\n') diff --git a/gclient_eval.py b/gclient_eval.py index 125e2ebbc..43ba97c73 100644 --- a/gclient_eval.py +++ b/gclient_eval.py @@ -133,14 +133,21 @@ _GCLIENT_DEPS_SCHEMA = _NodeDictSchema({ }), # GCS content. _NodeDictSchema({ - 'bucket': str, - 'object_name': str, - 'sha256sum': str, - 'size_bytes': int, - 'generation': int, - schema.Optional('output_file'): str, - schema.Optional('condition'): str, - schema.Optional('dep_type', default='gcs'): str, + 'bucket': + str, + 'objects': [ + _NodeDictSchema({ + 'object_name': str, + 'sha256sum': str, + 'size_bytes': int, + 'generation': int, + schema.Optional('output_file'): str, + }) + ], + schema.Optional('condition'): + str, + schema.Optional('dep_type', default='gcs'): + str, }), ), }) diff --git a/testing_support/fake_repos.py b/testing_support/fake_repos.py index 8f2bb0982..ed6c7622d 100755 --- a/testing_support/fake_repos.py +++ b/testing_support/fake_repos.py @@ -893,28 +893,34 @@ deps = { deps = { 'src/gcs_dep': { 'bucket': '123bucket', - 'object_name': 'deadbeef', 'dep_type': 'gcs', - 'sha256sum': 'abcd123', - 'size_bytes': 10000, - 'generation': 1542380408102454, + 'objects': [{ + 'object_name': 'deadbeef', + 'sha256sum': 'abcd123', + 'size_bytes': 10000, + 'generation': 1542380408102454, + }] }, 'src/another_gcs_dep': { 'bucket': '456bucket', - 'object_name': 'Linux/llvmfile.tar.gz', 'dep_type': 'gcs', - 'sha256sum': 'abcd123', - 'size_bytes': 10000, - 'generation': 1542380408102455, + 'objects': [{ + 'object_name': 'Linux/llvmfile.tar.gz', + 'sha256sum': 'abcd123', + 'size_bytes': 10000, + 'generation': 1542380408102455, + }] }, 'src/gcs_dep_with_output_file': { 'bucket': '789bucket', - 'object_name': 'clang-format-version123', 'dep_type': 'gcs', - 'sha256sum': 'abcd123', - 'output_file': 'clang-format-no-extract', - 'size_bytes': 10000, - 'generation': 1542380408102456, + 'objects': [{ + 'object_name': 'clang-format-version123', + 'sha256sum': 'abcd123', + 'output_file': 'clang-format-no-extract', + 'size_bytes': 10000, + 'generation': 1542380408102456, + }] }, }"""), 'origin': @@ -946,11 +952,13 @@ deps = { deps = { 'src/repo12': { 'bucket': 'bucket123', - 'object_name': 'path_to_file.tar.gz', 'dep_type': 'gcs', - 'sha256sum': 'abcd123', - 'size_bytes': 10000, - 'generation': 1542380408102454, + 'objects': [{ + 'object_name': 'path_to_file.tar.gz', + 'sha256sum': 'abcd123', + 'size_bytes': 10000, + 'generation': 1542380408102454, + }] }, } """, diff --git a/tests/gclient_gcs_smoketest.py b/tests/gclient_gcs_smoketest.py index 156edb99d..22d583a0b 100644 --- a/tests/gclient_gcs_smoketest.py +++ b/tests/gclient_gcs_smoketest.py @@ -35,101 +35,41 @@ class GClientSmokeGcs(gclient_smoketest_base.GClientSmokeBase): tree = self.mangle_git_tree(('repo_22@1', 'src')) tree.update({ - 'src/another_gcs_dep/hash': + 'src/another_gcs_dep/Linux_llvmfile_hash': 'abcd123\n', 'src/another_gcs_dep/llvmfile.tar.gz': 'tarfile', - 'src/another_gcs_dep/is_first_class_gcs': + 'src/another_gcs_dep/Linux_llvmfile_is_first_class_gcs': '1\n', 'src/another_gcs_dep/extracted_dir/extracted_file': 'extracted text', 'src/gcs_dep/deadbeef': 'tarfile', - 'src/gcs_dep/hash': + 'src/gcs_dep/deadbeef_hash': 'abcd123\n', - 'src/gcs_dep/is_first_class_gcs': + 'src/gcs_dep/deadbeef_is_first_class_gcs': '1\n', 'src/gcs_dep/extracted_dir/extracted_file': 'extracted text', - 'src/gcs_dep_with_output_file/hash': + 'src/gcs_dep_with_output_file/clang-format-version123_hash': 'abcd123\n', - 'src/gcs_dep_with_output_file/is_first_class_gcs': + 'src/gcs_dep_with_output_file/clang-format-version123_' + 'is_first_class_gcs': '1\n', 'src/gcs_dep_with_output_file/clang-format-no-extract': 'non-extractable file', }) self.assertTree(tree) - def testConvertGitToGcs(self): - self.gclient(['config', self.git_base + 'repo_23', '--name', 'src']) - - # repo_13@1 has src/repo12 as a git dependency. - self.gclient([ - 'sync', '-v', '-v', '-v', '--revision', - self.githash('repo_23', 1) - ]) - - tree = self.mangle_git_tree(('repo_23@1', 'src'), - ('repo_12@1', 'src/repo12')) - self.assertTree(tree) - - # repo_23@3 has src/repo12 as a gcs dependency. - self.gclient([ - 'sync', '-v', '-v', '-v', '--revision', - self.githash('repo_23', 3), '--delete_unversioned_trees' - ]) - - tree = self.mangle_git_tree(('repo_23@3', 'src')) - tree.update({ - 'src/repo12/extracted_dir/extracted_file': 'extracted text', - 'src/repo12/hash': 'abcd123\n', - 'src/repo12/is_first_class_gcs': '1\n', - 'src/repo12/path_to_file.tar.gz': 'tarfile', - }) - self.assertTree(tree) - - def testConvertGcsToGit(self): - self.gclient(['config', self.git_base + 'repo_23', '--name', 'src']) - - # repo_13@3 has src/repo12 as a cipd dependency. - self.gclient([ - 'sync', '-v', '-v', '-v', '--revision', - self.githash('repo_23', 3), '--delete_unversioned_trees' - ]) - - tree = self.mangle_git_tree(('repo_23@3', 'src')) - tree.update({ - 'src/repo12/extracted_dir/extracted_file': 'extracted text', - 'src/repo12/hash': 'abcd123\n', - 'src/repo12/is_first_class_gcs': '1\n', - 'src/repo12/path_to_file.tar.gz': 'tarfile', - }) - self.assertTree(tree) - - # repo_23@1 has src/repo12 as a git dependency. - self.gclient([ - 'sync', '-v', '-v', '-v', '--revision', - self.githash('repo_23', 1) - ]) - - tree = self.mangle_git_tree(('repo_23@1', 'src'), - ('repo_12@1', 'src/repo12')) - tree.update({ - 'src/repo12/extracted_dir/extracted_file': 'extracted text', - 'src/repo12/hash': 'abcd123\n', - 'src/repo12/is_first_class_gcs': '1\n', - 'src/repo12/path_to_file.tar.gz': 'tarfile', - }) - self.assertTree(tree) - def testRevInfo(self): self.gclient(['config', self.git_base + 'repo_22', '--name', 'src']) self.gclient(['sync']) results = self.gclient(['revinfo']) out = ('src: %(base)srepo_22\n' - 'src/another_gcs_dep: gs://456bucket/Linux/llvmfile.tar.gz\n' - 'src/gcs_dep: gs://123bucket/deadbeef\n' - 'src/gcs_dep_with_output_file: ' + 'src/another_gcs_dep:Linux/llvmfile.tar.gz: ' + 'gs://456bucket/Linux/llvmfile.tar.gz\n' + 'src/gcs_dep:deadbeef: gs://123bucket/deadbeef\n' + 'src/gcs_dep_with_output_file:clang-format-version123: ' 'gs://789bucket/clang-format-version123\n' % { 'base': self.git_base, }) @@ -139,15 +79,15 @@ class GClientSmokeGcs(gclient_smoketest_base.GClientSmokeBase): self.gclient(['config', self.git_base + 'repo_22', '--name', 'src']) self.gclient(['sync']) results = self.gclient(['revinfo', '--actual']) - out = ( - 'src: %(base)srepo_22@%(hash1)s\n' - 'src/another_gcs_dep: gs://456bucket/Linux/llvmfile.tar.gz@None\n' - 'src/gcs_dep: gs://123bucket/deadbeef@None\n' - 'src/gcs_dep_with_output_file: ' - 'gs://789bucket/clang-format-version123@None\n' % { - 'base': self.git_base, - 'hash1': self.githash('repo_22', 1), - }) + out = ('src: %(base)srepo_22@%(hash1)s\n' + 'src/another_gcs_dep:Linux/llvmfile.tar.gz: ' + 'gs://456bucket/Linux/llvmfile.tar.gz\n' + 'src/gcs_dep:deadbeef: gs://123bucket/deadbeef\n' + 'src/gcs_dep_with_output_file:clang-format-version123: ' + 'gs://789bucket/clang-format-version123\n' % { + 'base': self.git_base, + 'hash1': self.githash('repo_22', 1), + }) self.check((out, '', 0), results)