You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			271 lines
		
	
	
		
			9.8 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			271 lines
		
	
	
		
			9.8 KiB
		
	
	
	
		
			Python
		
	
#!/usr/bin/env python3
 | 
						|
# Copyright (c) 2024 The Chromium Authors. All rights reserved.
 | 
						|
# Use of this source code is governed by a BSD-style license that can be
 | 
						|
# found in the LICENSE file.
 | 
						|
"""Uploads files to Google Storage and output DEPS blob."""
 | 
						|
 | 
						|
import hashlib
 | 
						|
import optparse
 | 
						|
import os
 | 
						|
import json
 | 
						|
import tempfile
 | 
						|
 | 
						|
import re
 | 
						|
import sys
 | 
						|
import tarfile
 | 
						|
 | 
						|
from download_from_google_storage import Gsutil
 | 
						|
from download_from_google_storage import GSUTIL_DEFAULT_PATH
 | 
						|
from typing import List
 | 
						|
 | 
						|
MISSING_GENERATION_MSG = (
 | 
						|
    'missing generation number, please retrieve from Cloud Storage'
 | 
						|
    'before saving to DEPS')
 | 
						|
 | 
						|
USAGE_STRING = """%prog [options] target [target2 ...].
 | 
						|
Target(s) is the files or directies intended to be uploaded to Google Storage.
 | 
						|
If a single target is a directory, it will be compressed and uploaded as a
 | 
						|
tar.gz file.
 | 
						|
If target is "-", then a list of directories will be taken from standard input.
 | 
						|
The list of directories will be compressed together and uploaded as one tar.gz
 | 
						|
file.
 | 
						|
 | 
						|
Example usage
 | 
						|
------------
 | 
						|
./upload_to_google_storage_first_class.py --bucket gsutil-upload-playground
 | 
						|
--object-name my_object_name hello_world.txt
 | 
						|
 | 
						|
./upload_to_google_storage_first_class.py --bucket gsutil-upload-playground
 | 
						|
--object-name my_object_name my_dir1
 | 
						|
 | 
						|
./upload_to_google_storage_first_class.py --bucket gsutil-upload-playground
 | 
						|
--object-name my_object_name my_dir1 my_dir2
 | 
						|
 | 
						|
Scan the current directory and upload all files larger than 1MB:
 | 
						|
find . -name .svn -prune -o -size +1000k -type f -print0 |
 | 
						|
./upload_to_google_storage_first_class.py --bucket gsutil-upload-playground
 | 
						|
--object-name my_object_name -
 | 
						|
"""
 | 
						|
 | 
						|
 | 
						|
def get_targets(args: List[str], parser: optparse.OptionParser,
 | 
						|
                use_null_terminator: bool) -> List[str]:
 | 
						|
    """Get target(s) to upload to GCS"""
 | 
						|
    if not args:
 | 
						|
        parser.error('Missing target.')
 | 
						|
 | 
						|
    if len(args) == 1 and args[0] == '-':
 | 
						|
        # Take stdin as a newline or null separated list of files.
 | 
						|
        if use_null_terminator:
 | 
						|
            return sys.stdin.read().split('\0')
 | 
						|
 | 
						|
        return sys.stdin.read().splitlines()
 | 
						|
 | 
						|
    return args
 | 
						|
 | 
						|
 | 
						|
def create_archive(dirs: List[str]) -> str:
 | 
						|
    """Given a list of directories, compress them all into one tar file"""
 | 
						|
    # tarfile name cannot have a forward slash or else an error will be
 | 
						|
    # thrown
 | 
						|
    _, filename = tempfile.mkstemp(suffix='.tar.gz')
 | 
						|
    with tarfile.open(filename, 'w:gz') as tar:
 | 
						|
        for d in dirs:
 | 
						|
            tar.add(d)
 | 
						|
    return filename
 | 
						|
 | 
						|
 | 
						|
def validate_archive_dirs(dirs: List[str]) -> bool:
 | 
						|
    """Validate the list of directories"""
 | 
						|
    for d in dirs:
 | 
						|
        # We don't allow .. in paths in our archives.
 | 
						|
        if d == '..':
 | 
						|
            return False
 | 
						|
        # We only allow dirs.
 | 
						|
        if not os.path.isdir(d):
 | 
						|
            return False
 | 
						|
        # Symlinks must point to a target inside the dirs
 | 
						|
        if os.path.islink(d) and not any(
 | 
						|
                os.realpath(d).startswith(os.realpath(dir_prefix))
 | 
						|
                for dir_prefix in dirs):
 | 
						|
            return False
 | 
						|
        # We required that the subdirectories we are archiving are all just
 | 
						|
        # below cwd.
 | 
						|
        if d not in next(os.walk('.'))[1]:
 | 
						|
            return False
 | 
						|
 | 
						|
    return True
 | 
						|
 | 
						|
 | 
						|
def get_sha256sum(filename: str) -> str:
 | 
						|
    """Get the sha256sum of the file"""
 | 
						|
    sha = hashlib.sha256()
 | 
						|
    with open(filename, 'rb') as f:
 | 
						|
        while True:
 | 
						|
            # Read in 1mb chunks, so it doesn't all have to be loaded into
 | 
						|
            # memory.
 | 
						|
            chunk = f.read(1024 * 1024)
 | 
						|
            if not chunk:
 | 
						|
                break
 | 
						|
            sha.update(chunk)
 | 
						|
    return sha.hexdigest()
 | 
						|
 | 
						|
 | 
						|
def upload_to_google_storage(file: str, base_url: str, object_name: str,
 | 
						|
                             gsutil: Gsutil, force: bool, gzip: str,
 | 
						|
                             dry_run: bool) -> str:
 | 
						|
    """Upload file to GCS"""
 | 
						|
    file_url = '%s/%s' % (base_url, object_name)
 | 
						|
    if gsutil.check_call('ls', file_url)[0] == 0 and not force:
 | 
						|
        # File exists, check MD5 hash.
 | 
						|
        _, out, _ = gsutil.check_call_with_retries('ls', '-L', file_url)
 | 
						|
        etag_match = re.search(r'ETag:\s+\S+', out)
 | 
						|
        if etag_match:
 | 
						|
            raise Exception('File with url %s already exists' % file_url)
 | 
						|
    if dry_run:
 | 
						|
        return
 | 
						|
    print("Uploading %s as %s" % (file, file_url))
 | 
						|
    gsutil_args = ['-h', 'Cache-Control:public, max-age=31536000', 'cp', '-v']
 | 
						|
    if gzip:
 | 
						|
        gsutil_args.extend(['-z', gzip])
 | 
						|
    gsutil_args.extend([file, file_url])
 | 
						|
    code, _, err = gsutil.check_call_with_retries(*gsutil_args)
 | 
						|
    if code != 0:
 | 
						|
        raise Exception(
 | 
						|
            code, 'Encountered error on uploading %s to %s\n%s' %
 | 
						|
            (file, file_url, err))
 | 
						|
    pattern = re.escape(file_url) + '#(?P<generation>\d+)'
 | 
						|
    # The geneartion number is printed as part of the progress / status info
 | 
						|
    # which gsutil outputs to stderr to keep separated from any final output
 | 
						|
    # data.
 | 
						|
    for line in err.strip().splitlines():
 | 
						|
        m = re.search(pattern, line)
 | 
						|
        if m:
 | 
						|
            return m.group('generation')
 | 
						|
    print('Warning: generation number could not be parsed from status'
 | 
						|
          f'info: {err}')
 | 
						|
    return MISSING_GENERATION_MSG
 | 
						|
 | 
						|
 | 
						|
def construct_deps_blob(bucket: str, object_name: str, file: str,
 | 
						|
                        generation: str) -> dict:
 | 
						|
    """Output a blob hint that would need be added to a DEPS file"""
 | 
						|
    return {
 | 
						|
        'path': {
 | 
						|
            'dep_type':
 | 
						|
            'gcs',
 | 
						|
            'bucket':
 | 
						|
            bucket,
 | 
						|
            'objects': [{
 | 
						|
                'object_name': object_name,
 | 
						|
                'sha256sum': get_sha256sum(file),
 | 
						|
                'size_bytes': os.path.getsize(file),
 | 
						|
                'generation': int(generation),
 | 
						|
            }],
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
def main():
 | 
						|
    parser = optparse.OptionParser(USAGE_STRING)
 | 
						|
    parser.add_option('-b',
 | 
						|
                      '--bucket',
 | 
						|
                      help='Google Storage bucket to upload to.')
 | 
						|
    parser.add_option('-o',
 | 
						|
                      '--object-name',
 | 
						|
                      help='Optional object name of uploaded tar file. '
 | 
						|
                      'If empty, the sha256sum will be the object name.')
 | 
						|
    parser.add_option('-d',
 | 
						|
                      '--dry-run',
 | 
						|
                      action='store_true',
 | 
						|
                      help='Check if file already exists on GS without '
 | 
						|
                      'uploading it and output DEP blob.')
 | 
						|
    parser.add_option('-c',
 | 
						|
                      '--config',
 | 
						|
                      action='store_true',
 | 
						|
                      help='Alias for "gsutil config".  Run this if you want '
 | 
						|
                      'to initialize your saved Google Storage '
 | 
						|
                      'credentials.  This will create a read-only '
 | 
						|
                      'credentials file in ~/.boto.depot_tools.')
 | 
						|
    parser.add_option('-e', '--boto', help='Specify a custom boto file.')
 | 
						|
    parser.add_option('-f',
 | 
						|
                      '--force',
 | 
						|
                      action='store_true',
 | 
						|
                      help='Force upload even if remote file exists.')
 | 
						|
    parser.add_option('-g',
 | 
						|
                      '--gsutil_path',
 | 
						|
                      default=GSUTIL_DEFAULT_PATH,
 | 
						|
                      help='Path to the gsutil script.')
 | 
						|
    parser.add_option('-0',
 | 
						|
                      '--use_null_terminator',
 | 
						|
                      action='store_true',
 | 
						|
                      help='Use \\0 instead of \\n when parsing '
 | 
						|
                      'the file list from stdin.  This is useful if the input '
 | 
						|
                      'is coming from "find ... -print0".')
 | 
						|
    parser.add_option('-z',
 | 
						|
                      '--gzip',
 | 
						|
                      metavar='ext',
 | 
						|
                      help='For files which end in <ext> gzip them before '
 | 
						|
                      'upload. '
 | 
						|
                      'ext is a comma-separated list')
 | 
						|
    (options, args) = parser.parse_args()
 | 
						|
 | 
						|
    # Enumerate our inputs.
 | 
						|
    input_filenames = get_targets(args, parser, options.use_null_terminator)
 | 
						|
 | 
						|
    if len(input_filenames) > 1 or (len(input_filenames) == 1
 | 
						|
                                    and os.path.isdir(input_filenames[0])):
 | 
						|
        if not validate_archive_dirs(input_filenames):
 | 
						|
            parser.error(
 | 
						|
                'Only directories just below cwd are valid entries. '
 | 
						|
                'Entries cannot contain .. and entries can not be symlinks. '
 | 
						|
                'Entries was %s' % input_filenames)
 | 
						|
            return 1
 | 
						|
        file = create_archive(input_filenames)
 | 
						|
    else:
 | 
						|
        file = input_filenames[0]
 | 
						|
 | 
						|
    object_name = options.object_name
 | 
						|
    if not object_name:
 | 
						|
        object_name = get_sha256sum(file)
 | 
						|
 | 
						|
    # Make sure we can find a working instance of gsutil.
 | 
						|
    if os.path.exists(GSUTIL_DEFAULT_PATH):
 | 
						|
        gsutil = Gsutil(GSUTIL_DEFAULT_PATH, boto_path=options.boto)
 | 
						|
    else:
 | 
						|
        gsutil = None
 | 
						|
        for path in os.environ["PATH"].split(os.pathsep):
 | 
						|
            if os.path.exists(path) and 'gsutil' in os.listdir(path):
 | 
						|
                gsutil = Gsutil(os.path.join(path, 'gsutil'),
 | 
						|
                                boto_path=options.boto)
 | 
						|
        if not gsutil:
 | 
						|
            parser.error('gsutil not found in %s, bad depot_tools checkout?' %
 | 
						|
                         GSUTIL_DEFAULT_PATH)
 | 
						|
 | 
						|
    # Passing in -g/--config will run our copy of GSUtil, then quit.
 | 
						|
    if options.config:
 | 
						|
        print('===Note from depot_tools===')
 | 
						|
        print('If you do not have a project ID, enter "0" when asked for one.')
 | 
						|
        print('===End note from depot_tools===')
 | 
						|
        print()
 | 
						|
        gsutil.check_call('version')
 | 
						|
        return gsutil.call('config')
 | 
						|
 | 
						|
    base_url = 'gs://%s' % options.bucket
 | 
						|
 | 
						|
    generation = upload_to_google_storage(file, base_url, object_name, gsutil,
 | 
						|
                                          options.force, options.gzip,
 | 
						|
                                          options.dry_run)
 | 
						|
    print(
 | 
						|
        json.dumps(construct_deps_blob(options.bucket, object_name, file,
 | 
						|
                                       generation),
 | 
						|
                   indent=2))
 | 
						|
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
    try:
 | 
						|
        sys.exit(main())
 | 
						|
    except KeyboardInterrupt:
 | 
						|
        sys.stderr.write('interrupted\n')
 | 
						|
        sys.exit(1)
 |