Reland "Reland "Add support for GCS deps""

This is a reland of commit 46d5382f69

Whats fixed: Removed the call_google_storage refactor so
that all `import download_from_google_storage` callsites
(in depot_tools and chromium/src) won't be affected.
Instead, gclient.py will import Gsutil from download_from_google_storage.

Original change's description:
> Reland "Add support for GCS deps"
>
> This is a reland of commit 3eedee7b55
>
> Fixed more imports in download_google_storage.py
>
> Original change's description:
> > Add support for GCS deps
> >
> > Also take out GCS calling logic from download_google_storage and
> > into call_google_storage.
> >
> > GCS deps look like:
> >    'src/third_party/node/linux': {
> >        'dep_type': 'gcs',
> >        'condition': 'checkout_linux',
> >        'bucket': 'chromium-nodejs/20.11.0',
> >        'object_name': '46795170ff5df9831955f163f6966abde581c8af',
> >        'sha256sum': '887504c37404898ca41b896f448ee6d7fc24179d8fb6a4b79d028ab7e1b7153d',
> >    },
> >
> >    'src/third_party/llvm-build/Release+Asserts': {
> >        'dep_type': 'gcs',
> >        'condition': 'checkout_linux',
> >        'bucket': 'chromium-browser-clang',
> >        'object_name': 'Linux_x64/clang-llvmorg-18-init-17730-gf670112a-2.tar.xz',
> >        'sha256sum': '1e46df9b4e63c074064d75646310cb76be2f19815997a8486987189d80f991e8',
> >    },
> >
> > Example directory for src/third_party/node/linux after gclient sync:
> > - tar_file.gz is the downloaded file from GCS.
> > - node_linux_x64/ is extracted in its path.
> > - `hash` contains the sha of GCS filename.
> > ```
> > chromium/src/ ->
> >    third_party/node/linux/ ->
> >        hash, tar_file.gz, node_linux_x64/
> > ```
> >
> > Bug: b/324418194
> > Change-Id: Ibcbbff27e211f194ddb8a08494af56570a84a12b
> > Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5299722
> > Commit-Queue: Stephanie Kim <kimstephanie@google.com>
> > Reviewed-by: Joanna Wang <jojwang@chromium.org>
>
> Bug: b/324418194
> Change-Id: Ie64265a86abcec0135408715a45c32a8bb7c7408
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5346338
> Reviewed-by: Joanna Wang <jojwang@chromium.org>
> Commit-Queue: Stephanie Kim <kimstephanie@google.com>

Bug: b/324418194
Change-Id: I8b58dadbaa740fd9da1fbaf29b3b6ff5ef67fd12
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5352896
Commit-Queue: Stephanie Kim <kimstephanie@google.com>
Reviewed-by: Joanna Wang <jojwang@chromium.org>
changes/96/5352896/4
Stephanie Kim 2 years ago committed by LUCI CQ
parent 29e08c1737
commit 63ae5028c1

@ -82,6 +82,7 @@
__version__ = '0.7'
import copy
import hashlib
import json
import logging
import optparse
@ -91,12 +92,16 @@ import posixpath
import pprint
import re
import sys
import shutil
import tarfile
import tempfile
import time
import urllib.parse
from collections.abc import Collection, Mapping, Sequence
import detect_host_arch
import download_from_google_storage
import git_common
import gclient_eval
import gclient_paths
@ -748,6 +753,17 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
should_process=should_process,
relative=use_relative_paths,
condition=condition))
elif dep_type == 'gcs':
deps_to_add.append(
GcsDependency(parent=self,
name=name,
bucket=dep_value['bucket'],
object_name=dep_value['object_name'],
sha256sum=dep_value['sha256sum'],
custom_vars=self.custom_vars,
should_process=should_process,
relative=use_relative_paths,
condition=condition))
else:
url = dep_value.get('url')
deps_to_add.append(
@ -2483,6 +2499,153 @@ it or fix the checkout.
return self._enforced_cpu
class GcsDependency(Dependency):
"""A Dependency object that represents a single GCS bucket and object"""
def __init__(self, parent, name, bucket, object_name, sha256sum,
custom_vars, should_process, relative, condition):
self.bucket = bucket
self.object_name = object_name
self.sha256sum = sha256sum
url = 'gs://{bucket}/{object_name}'.format(
bucket=self.bucket,
object_name=self.object_name,
)
super(GcsDependency, self).__init__(parent=parent,
name=name,
url=url,
managed=None,
custom_deps=None,
custom_vars=custom_vars,
custom_hooks=None,
deps_file=None,
should_process=should_process,
should_recurse=False,
relative=relative,
condition=condition)
#override
def run(self, revision_overrides, command, args, work_queue, options,
patch_refs, target_branches, skip_sync_revisions):
"""Downloads GCS package."""
logging.info('GcsDependency(%s).run()' % self.name)
if not self.should_process:
return
self.DownloadGoogleStorage()
super(GcsDependency,
self).run(revision_overrides, command, args, work_queue, options,
patch_refs, target_branches, skip_sync_revisions)
def WriteFilenameHash(self, sha1, hash_file):
with open(hash_file, 'w') as f:
f.write(sha1)
f.write('\n')
def IsDownloadNeeded(self, output_dir, output_file):
"""Check if download and extract is needed."""
download_needed = False
if not os.path.exists(output_file):
download_needed = True
hash_file = os.path.join(output_dir, 'hash')
existing_hash = None
if os.path.exists(hash_file):
try:
with open(hash_file, 'r') as f:
existing_hash = f.read().rstrip()
except IOError:
download_needed = True
else:
download_needed = True
if existing_hash != self.sha256sum:
download_needed = True
return download_needed
def GetSha256Sum(self, filename):
sha = hashlib.sha256()
with open(filename, 'rb') as f:
while True:
# Read in 1mb chunks, so it doesn't all have to be loaded into
# memory.
chunk = f.read(1024 * 1024)
if not chunk:
break
sha.update(chunk)
return sha.hexdigest()
def DownloadGoogleStorage(self):
"""Calls GCS."""
gcs_file_name = self.object_name.split('/')[-1]
root_dir = self.root.root_dir
# Directory of the extracted tarfile contents
output_dir = os.path.join(root_dir, self.name)
output_file = os.path.join(output_dir, gcs_file_name)
if not self.IsDownloadNeeded(output_dir, output_file):
return
# Remove hashfile
hash_file = os.path.join(output_dir, 'hash')
if os.path.exists(hash_file):
os.remove(hash_file)
# Remove tarfile
if os.path.exists(output_file):
os.remove(output_file)
# Remove extracted contents
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
os.makedirs(output_dir)
if os.getenv('GCLIENT_TEST') == '1':
# Create fake tar file and extracted tar contents
tmpdir = tempfile.mkdtemp()
copy_dir = os.path.join(tmpdir, self.name, 'extracted_dir')
if os.path.exists(copy_dir):
shutil.rmtree(copy_dir)
os.makedirs(copy_dir)
with open(os.path.join(copy_dir, 'extracted_file'), 'w+') as f:
f.write('extracted text')
with tarfile.open(output_file, "w:gz") as tar:
tar.add(copy_dir, arcname=os.path.basename(copy_dir))
else:
gcs_url = 'gs://%s/%s' % (self.bucket, self.object_name)
gsutil = download_from_google_storage.Gsutil(
download_from_google_storage.GSUTIL_DEFAULT_PATH)
gsutil.check_call('cp', gcs_url, output_file)
calculated_sha256sum = ''
if os.getenv('GCLIENT_TEST') == '1':
calculated_sha256sum = 'abcd123'
else:
calculated_sha256sum = self.GetSha256Sum(output_file)
if calculated_sha256sum != self.sha256sum:
raise Exception('sha256sum does not match calculated hash. '
'{original} vs {calculated}'.format(
original=self.sha256sum,
calculated=calculated_sha256sum,
))
with tarfile.open(output_file, 'r:*') as tar:
tar.extractall(path=output_dir)
self.WriteFilenameHash(calculated_sha256sum, hash_file)
#override
def GetScmName(self):
"""Always 'gcs'."""
return 'gcs'
#override
def CreateSCM(self, out_cb=None):
"""Create a Wrapper instance suitable for handling this GCS dependency."""
return gclient_scm.GcsWrapper(self.url, self.root.root_dir, self.name,
self.outbuf, out_cb)
class CipdDependency(Dependency):
"""A Dependency object that represents a single CIPD package."""
def __init__(self, parent, name, dep_value, cipd_root, custom_vars,

@ -131,6 +131,14 @@ _GCLIENT_DEPS_SCHEMA = _NodeDictSchema({
schema.Optional('dep_type', default='cipd'):
str,
}),
# GCS content.
_NodeDictSchema({
'bucket': str,
'object_name': str,
'sha256sum': str,
schema.Optional('condition'): str,
schema.Optional('dep_type', default='gcs'): str,
}),
),
})

@ -1913,6 +1913,57 @@ class CipdWrapper(SCMWrapper):
"""
class GcsWrapper(SCMWrapper):
"""Wrapper for GCS.
Currently only supports content from Google Cloud Storage.
"""
name = 'gcs'
def __init__(self,
url=None,
root_dir=None,
relpath=None,
out_fh=None,
out_cb=None):
super(GcsWrapper, self).__init__(url=url,
root_dir=root_dir,
relpath=relpath,
out_fh=out_fh,
out_cb=out_cb)
#override
def GetCacheMirror(self):
return None
#override
def GetActualRemoteURL(self, options):
return None
#override
def DoesRemoteURLMatch(self, options):
del options
return True
def revert(self, options, args, file_list):
"""Does nothing."""
def diff(self, options, args, file_list):
"""GCS has no notion of diffing."""
def pack(self, options, args, file_list):
"""GCS has no notion of diffing."""
def revinfo(self, options, args, file_list):
"""Does nothing"""
def status(self, options, args, file_list):
pass
def update(self, options, args, file_list):
"""Does nothing."""
class CogWrapper(SCMWrapper):
"""Wrapper for Cog, all no-op."""
name = 'cog'

@ -16,6 +16,7 @@ import random
import re
import socket
import sys
import tarfile
import tempfile
import textwrap
import time
@ -47,6 +48,9 @@ def read_tree(tree_root):
for f in [join(root, f) for f in files if not f.startswith('.')]:
filepath = f[len(tree_root) + 1:].replace(os.sep, '/')
assert len(filepath) > 0, f
if tarfile.is_tarfile(join(root, f)):
tree[filepath] = 'tarfile'
continue
with io.open(join(root, f), encoding='utf-8') as f:
tree[filepath] = f.read()
return tree
@ -210,7 +214,7 @@ class FakeReposBase(object):
class FakeRepos(FakeReposBase):
"""Implements populateGit()."""
NB_GIT_REPOS = 21
NB_GIT_REPOS = 23
def populateGit(self):
# Testing:
@ -881,6 +885,63 @@ deps = {
},
)
self._commit_git(
'repo_22', {
'DEPS':
textwrap.dedent("""\
vars = {}
deps = {
'src/gcs_dep': {
'bucket': '123bucket',
'object_name': 'deadbeef',
'dep_type': 'gcs',
'sha256sum': 'abcd123',
},
'src/another_gcs_dep': {
'bucket': '456bucket',
'object_name': 'Linux/llvmfile.tar.gz',
'dep_type': 'gcs',
'sha256sum': 'abcd123',
},
}"""),
'origin':
'git/repo_22@1\n'
})
self._commit_git(
'repo_23', {
'DEPS': """
deps = {
'src/repo12': '/repo_12',
}""",
'origin': 'git/repo_23@1\n',
})
self._commit_git(
'repo_23', {
'DEPS': """
deps = {
'src/repo12': '/repo_12@refs/changes/1212',
}""",
'origin': 'git/repo_23@2\n',
})
# src/repo12 is now a GCS dependency.
self._commit_git(
'repo_23', {
'DEPS': """
deps = {
'src/repo12': {
'bucket': 'bucket123',
'object_name': 'path_to_file.tar.gz',
'dep_type': 'gcs',
'sha256sum': 'abcd123',
},
}
""",
'origin': 'git/repo_23@3\n'
})
class FakeRepoSkiaDEPS(FakeReposBase):
"""Simulates the Skia DEPS transition in Chrome."""

@ -0,0 +1,140 @@
#!/usr/bin/env vpython3
# Copyright (c) 2024 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Smoke tests for gclient.py.
Shell out 'gclient' and run gcs tests.
"""
import logging
import os
import sys
import unittest
from unittest import mock
import gclient_smoketest_base
import subprocess2
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
class GClientSmokeGcs(gclient_smoketest_base.GClientSmokeBase):
def setUp(self):
super(GClientSmokeGcs, self).setUp()
self.enabled = self.FAKE_REPOS.set_up_git()
if not self.enabled:
self.skipTest('git fake repos not available')
self.env['PATH'] = (os.path.join(ROOT_DIR, 'testing_support') +
os.pathsep + self.env['PATH'])
def testSyncGcs(self):
self.gclient(['config', self.git_base + 'repo_22', '--name', 'src'])
self.gclient(['sync'])
tree = self.mangle_git_tree(('repo_22@1', 'src'))
tree.update({
'src/another_gcs_dep/hash':
'abcd123\n',
'src/another_gcs_dep/llvmfile.tar.gz':
'tarfile',
'src/another_gcs_dep/extracted_dir/extracted_file':
'extracted text',
'src/gcs_dep/deadbeef':
'tarfile',
'src/gcs_dep/hash':
'abcd123\n',
'src/gcs_dep/extracted_dir/extracted_file':
'extracted text',
})
self.assertTree(tree)
def testConvertGitToGcs(self):
self.gclient(['config', self.git_base + 'repo_23', '--name', 'src'])
# repo_13@1 has src/repo12 as a git dependency.
self.gclient([
'sync', '-v', '-v', '-v', '--revision',
self.githash('repo_23', 1)
])
tree = self.mangle_git_tree(('repo_23@1', 'src'),
('repo_12@1', 'src/repo12'))
self.assertTree(tree)
# repo_23@3 has src/repo12 as a gcs dependency.
self.gclient([
'sync', '-v', '-v', '-v', '--revision',
self.githash('repo_23', 3), '--delete_unversioned_trees'
])
tree = self.mangle_git_tree(('repo_23@3', 'src'))
tree.update({
'src/repo12/extracted_dir/extracted_file': 'extracted text',
'src/repo12/hash': 'abcd123\n',
'src/repo12/path_to_file.tar.gz': 'tarfile',
})
self.assertTree(tree)
def testConvertGcsToGit(self):
self.gclient(['config', self.git_base + 'repo_23', '--name', 'src'])
# repo_13@3 has src/repo12 as a cipd dependency.
self.gclient([
'sync', '-v', '-v', '-v', '--revision',
self.githash('repo_23', 3), '--delete_unversioned_trees'
])
tree = self.mangle_git_tree(('repo_23@3', 'src'))
tree.update({
'src/repo12/extracted_dir/extracted_file': 'extracted text',
'src/repo12/hash': 'abcd123\n',
'src/repo12/path_to_file.tar.gz': 'tarfile',
})
self.assertTree(tree)
# repo_23@1 has src/repo12 as a git dependency.
self.gclient([
'sync', '-v', '-v', '-v', '--revision',
self.githash('repo_23', 1)
])
tree = self.mangle_git_tree(('repo_23@1', 'src'),
('repo_12@1', 'src/repo12'))
tree.update({
'src/repo12/extracted_dir/extracted_file': 'extracted text',
'src/repo12/hash': 'abcd123\n',
'src/repo12/path_to_file.tar.gz': 'tarfile',
})
self.assertTree(tree)
def testRevInfo(self):
self.gclient(['config', self.git_base + 'repo_22', '--name', 'src'])
self.gclient(['sync'])
results = self.gclient(['revinfo'])
out = ('src: %(base)srepo_22\n'
'src/another_gcs_dep: gs://456bucket/Linux/llvmfile.tar.gz\n'
'src/gcs_dep: gs://123bucket/deadbeef\n' % {
'base': self.git_base,
})
self.check((out, '', 0), results)
def testRevInfoActual(self):
self.gclient(['config', self.git_base + 'repo_22', '--name', 'src'])
self.gclient(['sync'])
results = self.gclient(['revinfo', '--actual'])
out = (
'src: %(base)srepo_22@%(hash1)s\n'
'src/another_gcs_dep: gs://456bucket/Linux/llvmfile.tar.gz@None\n'
'src/gcs_dep: gs://123bucket/deadbeef@None\n' % {
'base': self.git_base,
'hash1': self.githash('repo_22', 1),
})
self.check((out, '', 0), results)
if __name__ == '__main__':
if '-v' in sys.argv:
logging.basicConfig(level=logging.DEBUG)
unittest.main()

@ -1022,6 +1022,33 @@ class CipdWrapperTestCase(unittest.TestCase):
scm.update(None, (), [])
class GcsWrapperTestCase(unittest.TestCase):
def setUp(self):
self.workdir = tempfile.mkdtemp()
self.url = 'gs://123bucket/path_to_tar.gz'
def createScm(self):
return gclient_scm.GcsWrapper(url=self.url,
root_dir=self.workdir,
relpath='fake_relpath')
def testRevert(self):
"""Checks that revert does nothing."""
scm = self.createScm()
scm.revert(None, (), [])
def testRevinfo(self):
"""Checks that revinfo does nothing."""
scm = self.createScm()
scm.revinfo(None, (), [])
def testUpdate(self):
"""Checks that update does nothing."""
scm = self.createScm()
scm.update(None, (), [])
class BranchHeadsFakeRepo(fake_repos.FakeReposBase):
def populateGit(self):
# Creates a tree that looks like this:

Loading…
Cancel
Save