Allow multiple objects per GCS dep

Allows multiple objects to use the same directory path.
This requires that each object has its own unique hash file and
unique migration file name. All object names must be unique.

Also update download_from_google_storage to check for the unique
migration file name.

Remove ConvertToGcs smoke tests since we're not converting
any git <-> gcs deps.

Example:
```
'src/third_party/llvm-build/Release+Asserts': {
  'dep_type': 'gcs',
  'condition': 'not llvm_force_head_revision',
  'bucket': 'chromium-browser-clang',
  'objects': [
      {
          'object_name': 'Linux_x64/clang-llvmorg-19-init-2941-ga0b3dbaf-22.tar.xz',
          'sha256sum': '7b33138d8592199f97d132242d7b3e10f460c5c9655d49a3ad3767218fba7a77',
          'size_bytes': 50212876,
      },
      {
         'object_name': 'Linux_x64/llvmobjdump-llvmorg-19-init-2941-ga0b3dbaf-22.tar.xz',
         'sha256sum': '14d669650cd212eb0ccb8c34a9e655338dfdee66fe2ecdaa517f6bd607c09a97',
         'size_bytes': 5302312,
       },
    ]
 },
```

TODO: update .gitignore to search for *_is_first_class and *_hash

Bug: b/324418194
Change-Id: I89d34b06ee24f4c1aa316cd51530ad078e823143
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5420793
Commit-Queue: Stephanie Kim <kimstephanie@google.com>
Reviewed-by: Joanna Wang <jojwang@chromium.org>
changes/93/5420793/18
Stephanie Kim 1 year ago committed by LUCI CQ
parent e6aba2bcf1
commit 2c9e581fad

@ -39,7 +39,16 @@ PLATFORM_MAPPING = {
}
# (b/328065301): Remove when all GCS hooks are migrated to first class deps
MIGRATION_TOGGLE_FILE_NAME = 'is_first_class_gcs'
MIGRATION_TOGGLE_FILE_SUFFIX = '_is_first_class_gcs'
def construct_migration_file_name(gcs_object_name):
# Remove any forward slashes
gcs_file_name = gcs_object_name.replace('/', '_')
# Remove any extensions
gcs_file_name = gcs_file_name.split('.')[0]
return gcs_file_name + MIGRATION_TOGGLE_FILE_SUFFIX
class InvalidFileError(IOError):
@ -262,8 +271,8 @@ def _downloader_worker_thread(thread_num,
if not working_dir:
raise Exception(
'Unable to construct a working_dir from the output_filename.')
migration_file_name = os.path.join(working_dir,
MIGRATION_TOGGLE_FILE_NAME)
migration_file_name = os.path.join(
working_dir, construct_migration_file_name(input_sha1_sum))
extract_dir = None
if extract:
if not output_filename.endswith('.tar.gz'):
@ -481,8 +490,15 @@ def download_from_google_storage(input_filename, base_url, gsutil, num_threads,
# If the directory was created by a first class GCS
# dep, remove the migration file and re-download using the
# latest hook.
migration_file = os.path.join(working_dir, MIGRATION_TOGGLE_FILE_NAME)
is_first_class_gcs = os.path.exists(migration_file)
is_first_class_gcs = False
# Check all paths to see if they have an equivalent is_first_class_gcs file
# If directory is False, there will be only one item in input_data
for sha1, _ in input_data:
migration_file_name = os.path.join(working_dir,
construct_migration_file_name(sha1))
if os.path.exists(migration_file_name):
is_first_class_gcs = True
if not force and not is_first_class_gcs and all(
_data_exists(sha1, path, extract) for sha1, path in input_data):
return 0

@ -563,6 +563,8 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
package = self.GetExpandedPackageName()
url = '%s/p/%s/+/%s' % (scm.GetActualRemoteURL(None), package,
revision)
if scm.name == 'gcs':
url = self.url
if os.path.isdir(scm.checkout_path):
revision = scm.revinfo(None, None, None)
@ -755,18 +757,27 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
relative=use_relative_paths,
condition=condition))
elif dep_type == 'gcs':
deps_to_add.append(
GcsDependency(parent=self,
name=name,
bucket=dep_value['bucket'],
object_name=dep_value['object_name'],
sha256sum=dep_value['sha256sum'],
output_file=dep_value.get('output_file'),
size_bytes=dep_value['size_bytes'],
custom_vars=self.custom_vars,
should_process=should_process,
relative=use_relative_paths,
condition=condition))
# Validate that all objects are unique
object_name_set = {
o['object_name']
for o in dep_value['objects']
}
if len(object_name_set) != len(dep_value['objects']):
raise Exception('Duplicate object names detected in {} GCS '
'dependency.'.format(name))
for obj in dep_value['objects']:
deps_to_add.append(
GcsDependency(parent=self,
name=name,
bucket=dep_value['bucket'],
object_name=obj['object_name'],
sha256sum=obj['sha256sum'],
output_file=obj.get('output_file'),
size_bytes=obj['size_bytes'],
custom_vars=self.custom_vars,
should_process=should_process,
relative=use_relative_paths,
condition=condition))
else:
url = dep_value.get('url')
deps_to_add.append(
@ -2513,12 +2524,9 @@ class GcsDependency(Dependency):
self.sha256sum = sha256sum
self.output_file = output_file
self.size_bytes = size_bytes
url = 'gs://{bucket}/{object_name}'.format(
bucket=self.bucket,
object_name=self.object_name,
)
url = f'gs://{self.bucket}/{self.object_name}'
super(GcsDependency, self).__init__(parent=parent,
name=name,
name=f'{name}:{object_name}',
url=url,
managed=None,
custom_deps=None,
@ -2530,6 +2538,12 @@ class GcsDependency(Dependency):
relative=relative,
condition=condition)
#override
def verify_validity(self):
"""GCS dependencies allow duplicate name for objects in same directory."""
logging.info('Dependency(%s).verify_validity()' % self.name)
return True
#override
def run(self, revision_overrides, command, args, work_queue, options,
patch_refs, target_branches, skip_sync_revisions):
@ -2547,12 +2561,12 @@ class GcsDependency(Dependency):
f.write(sha1)
f.write('\n')
def IsDownloadNeeded(self, output_dir, output_file):
def IsDownloadNeeded(self, output_dir, output_file, hash_file,
migration_toggle_file):
"""Check if download and extract is needed."""
if not os.path.exists(output_file):
return True
hash_file = os.path.join(output_dir, 'hash')
existing_hash = None
if os.path.exists(hash_file):
try:
@ -2565,9 +2579,7 @@ class GcsDependency(Dependency):
# (b/328065301): Remove is_first_class_gcs_file logic when all GCS
# hooks are migrated to first class deps
is_first_class_gcs_file = os.path.join(
output_dir, download_from_google_storage.MIGRATION_TOGGLE_FILE_NAME)
is_first_class_gcs = os.path.exists(is_first_class_gcs_file)
is_first_class_gcs = os.path.exists(migration_toggle_file)
if not is_first_class_gcs:
return True
@ -2603,15 +2615,22 @@ class GcsDependency(Dependency):
root_dir = self.root.root_dir
# Directory of the extracted tarfile contents
output_dir = os.path.join(root_dir, self.name)
output_dir = os.path.join(root_dir, self.name.split(':')[0])
output_file = os.path.join(output_dir, self.output_file
or gcs_file_name)
if not self.IsDownloadNeeded(output_dir, output_file):
# Remove any forward slashes and drop any extensions
hash_name = self.object_name.replace('/', '_').split('.')[0]
hash_file = os.path.join(output_dir, hash_name + '_hash')
migration_toggle_file = os.path.join(
output_dir,
download_from_google_storage.construct_migration_file_name(
self.object_name))
if not self.IsDownloadNeeded(output_dir, output_file, hash_file,
migration_toggle_file):
return
# Remove hashfile
hash_file = os.path.join(output_dir, 'hash')
if os.path.exists(hash_file):
os.remove(hash_file)
@ -2619,10 +2638,10 @@ class GcsDependency(Dependency):
if os.path.exists(output_file):
os.remove(output_file)
# Remove extracted contents
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
os.makedirs(output_dir)
# Another GCS dep could be using the same output_dir, so don't remove
# it
if not os.path.exists(output_dir):
os.makedirs(output_dir)
if os.getenv('GCLIENT_TEST') == '1':
if 'no-extract' in output_file:
@ -2631,7 +2650,7 @@ class GcsDependency(Dependency):
else:
# Create fake tar file and extracted tar contents
tmpdir = tempfile.mkdtemp()
copy_dir = os.path.join(tmpdir, self.name, 'extracted_dir')
copy_dir = os.path.join(tmpdir, gcs_file_name, 'extracted_dir')
if os.path.exists(copy_dir):
shutil.rmtree(copy_dir)
os.makedirs(copy_dir)
@ -2640,10 +2659,9 @@ class GcsDependency(Dependency):
with tarfile.open(output_file, "w:gz") as tar:
tar.add(copy_dir, arcname=os.path.basename(copy_dir))
else:
gcs_url = 'gs://%s/%s' % (self.bucket, self.object_name)
gsutil = download_from_google_storage.Gsutil(
download_from_google_storage.GSUTIL_DEFAULT_PATH)
gsutil.check_call('cp', gcs_url, output_file)
gsutil.check_call('cp', self.url, output_file)
calculated_sha256sum = ''
calculated_size_bytes = None
@ -2680,8 +2698,6 @@ class GcsDependency(Dependency):
raise Exception('tarfile contains invalid entries')
tar.extractall(path=output_dir)
self.WriteFilenameHash(calculated_sha256sum, hash_file)
migration_toggle_file = os.path.join(
output_dir, download_from_google_storage.MIGRATION_TOGGLE_FILE_NAME)
with open(migration_toggle_file, 'w') as f:
f.write(str(1))
f.write('\n')

@ -133,14 +133,21 @@ _GCLIENT_DEPS_SCHEMA = _NodeDictSchema({
}),
# GCS content.
_NodeDictSchema({
'bucket': str,
'object_name': str,
'sha256sum': str,
'size_bytes': int,
'generation': int,
schema.Optional('output_file'): str,
schema.Optional('condition'): str,
schema.Optional('dep_type', default='gcs'): str,
'bucket':
str,
'objects': [
_NodeDictSchema({
'object_name': str,
'sha256sum': str,
'size_bytes': int,
'generation': int,
schema.Optional('output_file'): str,
})
],
schema.Optional('condition'):
str,
schema.Optional('dep_type', default='gcs'):
str,
}),
),
})

@ -893,28 +893,34 @@ deps = {
deps = {
'src/gcs_dep': {
'bucket': '123bucket',
'object_name': 'deadbeef',
'dep_type': 'gcs',
'sha256sum': 'abcd123',
'size_bytes': 10000,
'generation': 1542380408102454,
'objects': [{
'object_name': 'deadbeef',
'sha256sum': 'abcd123',
'size_bytes': 10000,
'generation': 1542380408102454,
}]
},
'src/another_gcs_dep': {
'bucket': '456bucket',
'object_name': 'Linux/llvmfile.tar.gz',
'dep_type': 'gcs',
'sha256sum': 'abcd123',
'size_bytes': 10000,
'generation': 1542380408102455,
'objects': [{
'object_name': 'Linux/llvmfile.tar.gz',
'sha256sum': 'abcd123',
'size_bytes': 10000,
'generation': 1542380408102455,
}]
},
'src/gcs_dep_with_output_file': {
'bucket': '789bucket',
'object_name': 'clang-format-version123',
'dep_type': 'gcs',
'sha256sum': 'abcd123',
'output_file': 'clang-format-no-extract',
'size_bytes': 10000,
'generation': 1542380408102456,
'objects': [{
'object_name': 'clang-format-version123',
'sha256sum': 'abcd123',
'output_file': 'clang-format-no-extract',
'size_bytes': 10000,
'generation': 1542380408102456,
}]
},
}"""),
'origin':
@ -946,11 +952,13 @@ deps = {
deps = {
'src/repo12': {
'bucket': 'bucket123',
'object_name': 'path_to_file.tar.gz',
'dep_type': 'gcs',
'sha256sum': 'abcd123',
'size_bytes': 10000,
'generation': 1542380408102454,
'objects': [{
'object_name': 'path_to_file.tar.gz',
'sha256sum': 'abcd123',
'size_bytes': 10000,
'generation': 1542380408102454,
}]
},
}
""",

@ -35,101 +35,41 @@ class GClientSmokeGcs(gclient_smoketest_base.GClientSmokeBase):
tree = self.mangle_git_tree(('repo_22@1', 'src'))
tree.update({
'src/another_gcs_dep/hash':
'src/another_gcs_dep/Linux_llvmfile_hash':
'abcd123\n',
'src/another_gcs_dep/llvmfile.tar.gz':
'tarfile',
'src/another_gcs_dep/is_first_class_gcs':
'src/another_gcs_dep/Linux_llvmfile_is_first_class_gcs':
'1\n',
'src/another_gcs_dep/extracted_dir/extracted_file':
'extracted text',
'src/gcs_dep/deadbeef':
'tarfile',
'src/gcs_dep/hash':
'src/gcs_dep/deadbeef_hash':
'abcd123\n',
'src/gcs_dep/is_first_class_gcs':
'src/gcs_dep/deadbeef_is_first_class_gcs':
'1\n',
'src/gcs_dep/extracted_dir/extracted_file':
'extracted text',
'src/gcs_dep_with_output_file/hash':
'src/gcs_dep_with_output_file/clang-format-version123_hash':
'abcd123\n',
'src/gcs_dep_with_output_file/is_first_class_gcs':
'src/gcs_dep_with_output_file/clang-format-version123_'
'is_first_class_gcs':
'1\n',
'src/gcs_dep_with_output_file/clang-format-no-extract':
'non-extractable file',
})
self.assertTree(tree)
def testConvertGitToGcs(self):
self.gclient(['config', self.git_base + 'repo_23', '--name', 'src'])
# repo_13@1 has src/repo12 as a git dependency.
self.gclient([
'sync', '-v', '-v', '-v', '--revision',
self.githash('repo_23', 1)
])
tree = self.mangle_git_tree(('repo_23@1', 'src'),
('repo_12@1', 'src/repo12'))
self.assertTree(tree)
# repo_23@3 has src/repo12 as a gcs dependency.
self.gclient([
'sync', '-v', '-v', '-v', '--revision',
self.githash('repo_23', 3), '--delete_unversioned_trees'
])
tree = self.mangle_git_tree(('repo_23@3', 'src'))
tree.update({
'src/repo12/extracted_dir/extracted_file': 'extracted text',
'src/repo12/hash': 'abcd123\n',
'src/repo12/is_first_class_gcs': '1\n',
'src/repo12/path_to_file.tar.gz': 'tarfile',
})
self.assertTree(tree)
def testConvertGcsToGit(self):
self.gclient(['config', self.git_base + 'repo_23', '--name', 'src'])
# repo_13@3 has src/repo12 as a cipd dependency.
self.gclient([
'sync', '-v', '-v', '-v', '--revision',
self.githash('repo_23', 3), '--delete_unversioned_trees'
])
tree = self.mangle_git_tree(('repo_23@3', 'src'))
tree.update({
'src/repo12/extracted_dir/extracted_file': 'extracted text',
'src/repo12/hash': 'abcd123\n',
'src/repo12/is_first_class_gcs': '1\n',
'src/repo12/path_to_file.tar.gz': 'tarfile',
})
self.assertTree(tree)
# repo_23@1 has src/repo12 as a git dependency.
self.gclient([
'sync', '-v', '-v', '-v', '--revision',
self.githash('repo_23', 1)
])
tree = self.mangle_git_tree(('repo_23@1', 'src'),
('repo_12@1', 'src/repo12'))
tree.update({
'src/repo12/extracted_dir/extracted_file': 'extracted text',
'src/repo12/hash': 'abcd123\n',
'src/repo12/is_first_class_gcs': '1\n',
'src/repo12/path_to_file.tar.gz': 'tarfile',
})
self.assertTree(tree)
def testRevInfo(self):
self.gclient(['config', self.git_base + 'repo_22', '--name', 'src'])
self.gclient(['sync'])
results = self.gclient(['revinfo'])
out = ('src: %(base)srepo_22\n'
'src/another_gcs_dep: gs://456bucket/Linux/llvmfile.tar.gz\n'
'src/gcs_dep: gs://123bucket/deadbeef\n'
'src/gcs_dep_with_output_file: '
'src/another_gcs_dep:Linux/llvmfile.tar.gz: '
'gs://456bucket/Linux/llvmfile.tar.gz\n'
'src/gcs_dep:deadbeef: gs://123bucket/deadbeef\n'
'src/gcs_dep_with_output_file:clang-format-version123: '
'gs://789bucket/clang-format-version123\n' % {
'base': self.git_base,
})
@ -139,15 +79,15 @@ class GClientSmokeGcs(gclient_smoketest_base.GClientSmokeBase):
self.gclient(['config', self.git_base + 'repo_22', '--name', 'src'])
self.gclient(['sync'])
results = self.gclient(['revinfo', '--actual'])
out = (
'src: %(base)srepo_22@%(hash1)s\n'
'src/another_gcs_dep: gs://456bucket/Linux/llvmfile.tar.gz@None\n'
'src/gcs_dep: gs://123bucket/deadbeef@None\n'
'src/gcs_dep_with_output_file: '
'gs://789bucket/clang-format-version123@None\n' % {
'base': self.git_base,
'hash1': self.githash('repo_22', 1),
})
out = ('src: %(base)srepo_22@%(hash1)s\n'
'src/another_gcs_dep:Linux/llvmfile.tar.gz: '
'gs://456bucket/Linux/llvmfile.tar.gz\n'
'src/gcs_dep:deadbeef: gs://123bucket/deadbeef\n'
'src/gcs_dep_with_output_file:clang-format-version123: '
'gs://789bucket/clang-format-version123\n' % {
'base': self.git_base,
'hash1': self.githash('repo_22', 1),
})
self.check((out, '', 0), results)

Loading…
Cancel
Save