diff --git a/recipes/README.recipes.md b/recipes/README.recipes.md index 7d3b461c6..ad00a47fc 100644 --- a/recipes/README.recipes.md +++ b/recipes/README.recipes.md @@ -516,13 +516,13 @@ DEPRECATED. Consider using gerrit.get_change_description instead. — **def [upload](/recipes/recipe_modules/git_cl/api.py#47)(self, message, upload_args=None, \*\*kwargs):** ### *recipe_modules* / [gitiles](/recipes/recipe_modules/gitiles) -[DEPS](/recipes/recipe_modules/gitiles/__init__.py#1): [recipe\_engine/json][recipe_engine/recipe_modules/json], [recipe\_engine/path][recipe_engine/recipe_modules/path], [recipe\_engine/python][recipe_engine/recipe_modules/python], [recipe\_engine/raw\_io][recipe_engine/recipe_modules/raw_io], [recipe\_engine/url][recipe_engine/recipe_modules/url] +[DEPS](/recipes/recipe_modules/gitiles/__init__.py#5): [recipe\_engine/json][recipe_engine/recipe_modules/json], [recipe\_engine/path][recipe_engine/recipe_modules/path], [recipe\_engine/python][recipe_engine/recipe_modules/python], [recipe\_engine/raw\_io][recipe_engine/recipe_modules/raw_io], [recipe\_engine/step][recipe_engine/recipe_modules/step], [recipe\_engine/url][recipe_engine/recipe_modules/url] #### **class [Gitiles](/recipes/recipe_modules/gitiles/api.py#10)([RecipeApi][recipe_engine/wkt/RecipeApi]):** Module for polling a git repository using the Gitiles web interface. -— **def [commit\_log](/recipes/recipe_modules/gitiles/api.py#102)(self, url, commit, step_name=None, attempts=None):** +— **def [commit\_log](/recipes/recipe_modules/gitiles/api.py#113)(self, url, commit, step_name=None, attempts=None):** Returns: (dict) the Gitiles commit log structure for a given commit. @@ -532,7 +532,23 @@ Args: step_name (str): If not None, override the step name. attempts (int): Number of times to try the request before failing. -— **def [download\_file](/recipes/recipe_modules/gitiles/api.py#118)(self, repository_url, file_path, branch='master', step_name=None, attempts=None, \*\*kwargs):** +— **def [download\_archive](/recipes/recipe_modules/gitiles/api.py#155)(self, repository_url, destination, revision='refs/heads/master'):** + +Downloads an archive of the repo and extracts it to `destination`. + +If the gitiles server attempts to provide a tarball with paths which escape +`destination`, this function will extract all valid files and then +raise StepFailure with an attribute `StepFailure.gitiles_skipped_files` +containing the names of the files that were skipped. + +Args: + repository_url (str): Full URL to the repository + destination (Path): Local path to extract the archive to. Must not exist + prior to this call. + revision (str): The ref or revision in the repo to download. Defaults to + 'refs/heads/master'. + +— **def [download\_file](/recipes/recipe_modules/gitiles/api.py#129)(self, repository_url, file_path, branch='master', step_name=None, attempts=None, \*\*kwargs):** Downloads raw file content from a Gitiles repository. @@ -546,7 +562,7 @@ Args: Returns: Raw file content. -— **def [log](/recipes/recipe_modules/gitiles/api.py#56)(self, url, ref, limit=0, cursor=None, step_name=None, attempts=None, \*\*kwargs):** +— **def [log](/recipes/recipe_modules/gitiles/api.py#67)(self, url, ref, limit=0, cursor=None, step_name=None, attempts=None, \*\*kwargs):** Returns the most recent commits under the given ref with properties. @@ -569,7 +585,7 @@ Returns: Cursor can be used for subsequent calls to log for paging. If None, signals that there are no more commits to fetch. -— **def [refs](/recipes/recipe_modules/gitiles/api.py#44)(self, url, step_name='refs', attempts=None):** +— **def [refs](/recipes/recipe_modules/gitiles/api.py#55)(self, url, step_name='refs', attempts=None):** Returns a list of refs in the remote repository. ### *recipe_modules* / [gsutil](/recipes/recipe_modules/gsutil) @@ -778,9 +794,9 @@ like checkout or compile), and some of these tests have failed. — **def [RunSteps](/recipes/recipe_modules/git_cl/examples/full.py#17)(api):** ### *recipes* / [gitiles:examples/full](/recipes/recipe_modules/gitiles/examples/full.py) -[DEPS](/recipes/recipe_modules/gitiles/examples/full.py#5): [gitiles](#recipe_modules-gitiles), [recipe\_engine/json][recipe_engine/recipe_modules/json], [recipe\_engine/properties][recipe_engine/recipe_modules/properties] +[DEPS](/recipes/recipe_modules/gitiles/examples/full.py#5): [gitiles](#recipe_modules-gitiles), [recipe\_engine/json][recipe_engine/recipe_modules/json], [recipe\_engine/path][recipe_engine/recipe_modules/path], [recipe\_engine/properties][recipe_engine/recipe_modules/properties], [recipe\_engine/step][recipe_engine/recipe_modules/step] -— **def [RunSteps](/recipes/recipe_modules/gitiles/examples/full.py#12)(api):** +— **def [RunSteps](/recipes/recipe_modules/gitiles/examples/full.py#14)(api):** ### *recipes* / [gsutil:examples/full](/recipes/recipe_modules/gsutil/examples/full.py) [DEPS](/recipes/recipe_modules/gsutil/examples/full.py#5): [gsutil](#recipe_modules-gsutil), [recipe\_engine/path][recipe_engine/recipe_modules/path] diff --git a/recipes/recipe_modules/gitiles/__init__.py b/recipes/recipe_modules/gitiles/__init__.py index 16991642a..f8e8ca517 100644 --- a/recipes/recipe_modules/gitiles/__init__.py +++ b/recipes/recipe_modules/gitiles/__init__.py @@ -1,7 +1,12 @@ +# Copyright 2018 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + DEPS = [ 'recipe_engine/json', 'recipe_engine/path', 'recipe_engine/python', 'recipe_engine/raw_io', + 'recipe_engine/step', 'recipe_engine/url', ] diff --git a/recipes/recipe_modules/gitiles/api.py b/recipes/recipe_modules/gitiles/api.py index c99957067..42c4086da 100644 --- a/recipes/recipe_modules/gitiles/api.py +++ b/recipes/recipe_modules/gitiles/api.py @@ -1,4 +1,4 @@ -# Copyright 2014 The Chromium Authors. All rights reserved. +# Copyright 2018 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -11,21 +11,33 @@ class Gitiles(recipe_api.RecipeApi): """Module for polling a git repository using the Gitiles web interface.""" def _fetch(self, url, step_name, fmt, attempts=None, add_json_log=True, - log_limit=None, log_start=None, **kwargs): + log_limit=None, log_start=None, extract_to=None, **kwargs): """Fetches information from Gitiles. Arguments: + fmt (str): one of ('text', 'json', 'archive'). Instructs the underlying + gitiles_client tool how to process the HTTP response. + * text - implies the response is base64 encoded + * json - implies the response is JSON + * archive - implies the response is a compressed tarball; requires + `extract_to`. + extract_to (Path): When fmt=='archive', instructs gitiles_client to + extract the archive to this non-existant folder. log_limit: for log URLs, limit number of results. None implies 1 page, as returned by Gitiles. log_start: for log URLs, the start cursor for paging. add_json_log: if True, will spill out json into log. """ - assert fmt in ('json', 'text') + assert fmt in ('json', 'text', 'archive') + args = [ '--json-file', self.m.json.output(add_json_log=add_json_log), '--url', url, '--format', fmt, ] + if fmt == 'archive': + assert extract_to is not None, 'archive format requires extract_to' + args.extend(['--extract-to', extract_to]) if attempts: args.extend(['--attempts', attempts]) if log_limit is not None: @@ -37,9 +49,8 @@ class Gitiles(recipe_api.RecipeApi): args.extend([ '--accept-statuses', ','.join([str(s) for s in accept_statuses])]) - a = self.m.python( + return self.m.python( step_name, self.resource('gerrit_client.py'), args, **kwargs) - return a def refs(self, url, step_name='refs', attempts=None): """Returns a list of refs in the remote repository.""" @@ -140,3 +151,52 @@ class Gitiles(recipe_api.RecipeApi): if step_result.json.output['value'] is None: return None return base64.b64decode(step_result.json.output['value']) + + def download_archive(self, repository_url, destination, + revision='refs/heads/master'): + """Downloads an archive of the repo and extracts it to `destination`. + + If the gitiles server attempts to provide a tarball with paths which escape + `destination`, this function will extract all valid files and then + raise StepFailure with an attribute `StepFailure.gitiles_skipped_files` + containing the names of the files that were skipped. + + Args: + repository_url (str): Full URL to the repository + destination (Path): Local path to extract the archive to. Must not exist + prior to this call. + revision (str): The ref or revision in the repo to download. Defaults to + 'refs/heads/master'. + """ + step_name = 'download %s @ %s' % (repository_url, revision) + fetch_url = self.m.url.join(repository_url, '+archive/%s.tgz' % (revision,)) + step_result = self._fetch( + fetch_url, + step_name, + fmt='archive', + add_json_log=False, + extract_to=destination, + step_test_data=lambda: self.m.json.test_api.output({ + 'extracted': { + 'filecount': 1337, + 'bytes': 7192345, + }, + }) + ) + self.m.path.mock_add_paths(destination) + j = step_result.json.output + if j['extracted']['filecount']: + stat = j['extracted'] + step_result.presentation.step_text += ( + '
extracted %s files - %.02f MB' % ( + stat['filecount'], stat['bytes'] / (1000.0**2))) + if j.get('skipped', {}).get('filecount'): + stat = j['skipped'] + step_result.presentation.step_text += ( + '
SKIPPED %s files - %.02f MB' % ( + stat['filecount'], stat['bytes'] / (1000.0**2))) + step_result.presentation.logs['skipped files'] = stat['names'] + step_result.presentation.status = self.m.step.FAILURE + ex = self.m.step.StepFailure(step_name) + ex.gitiles_skipped_files = stat['names'] + raise ex diff --git a/recipes/recipe_modules/gitiles/examples/full.expected/basic.json b/recipes/recipe_modules/gitiles/examples/full.expected/basic.json index 568ee7643..17aba30dc 100644 --- a/recipes/recipe_modules/gitiles/examples/full.expected/basic.json +++ b/recipes/recipe_modules/gitiles/examples/full.expected/basic.json @@ -547,6 +547,49 @@ ], "name": "fetch master:NONEXISTENT" }, + { + "cmd": [ + "python", + "-u", + "RECIPE_MODULE[depot_tools::gitiles]/resources/gerrit_client.py", + "--json-file", + "/path/to/tmp/json", + "--url", + "https://chromium.googlesource.com/chromium/src/+archive/refs/heads/master.tgz", + "--format", + "archive", + "--extract-to", + "[START_DIR]/archive" + ], + "name": "download https://chromium.googlesource.com/chromium/src @ refs/heads/master", + "~followup_annotations": [ + "@@@STEP_TEXT@
extracted 1337 files - 7.19 MB@@@" + ] + }, + { + "cmd": [ + "python", + "-u", + "RECIPE_MODULE[depot_tools::gitiles]/resources/gerrit_client.py", + "--json-file", + "/path/to/tmp/json", + "--url", + "https://chromium.googlesource.com/chromium/src/+archive/refs/heads/master.tgz", + "--format", + "archive", + "--extract-to", + "[START_DIR]/archive2" + ], + "name": "download https://chromium.googlesource.com/chromium/src @ refs/heads/master (2)", + "~followup_annotations": [ + "@@@STEP_TEXT@
extracted 10 files - 0.01 MB
SKIPPED 4 files - 7.19 MB@@@", + "@@@STEP_LOG_LINE@skipped files@/root@@@", + "@@@STEP_LOG_LINE@skipped files@../relative@@@", + "@@@STEP_LOG_LINE@skipped files@sneaky/../../relative@@@", + "@@@STEP_LOG_END@skipped files@@@", + "@@@STEP_FAILURE@@@" + ] + }, { "name": "$result", "recipe_result": null, diff --git a/recipes/recipe_modules/gitiles/examples/full.py b/recipes/recipe_modules/gitiles/examples/full.py index 6a249e54c..bb9df32bb 100644 --- a/recipes/recipe_modules/gitiles/examples/full.py +++ b/recipes/recipe_modules/gitiles/examples/full.py @@ -5,6 +5,8 @@ DEPS = [ 'gitiles', 'recipe_engine/json', + 'recipe_engine/step', + 'recipe_engine/path', 'recipe_engine/properties', ] @@ -22,6 +24,14 @@ def RunSteps(api): data = api.gitiles.download_file(url, 'NONEXISTENT', attempts=1, accept_statuses=[404]) + api.gitiles.download_archive(url, api.path['start_dir'].join('archive')) + + try: + api.gitiles.download_archive(url, api.path['start_dir'].join('archive2')) + assert False # pragma: no cover + except api.step.StepFailure as ex: + assert '/root' in ex.gitiles_skipped_files + def GenTests(api): yield ( @@ -65,4 +75,19 @@ def GenTests(api): 'fetch master:NONEXISTENT', api.json.output({'value': None}) ) + + api.step_data( + ('download https://chromium.googlesource.com/chromium/src @ ' + 'refs/heads/master (2)'), + api.json.output({ + 'extracted': { + 'filecount': 10, + 'bytes': 14925, + }, + 'skipped': { + 'filecount': 4, + 'bytes': 7192345, + 'names': ['/root', '../relative', 'sneaky/../../relative'], + }, + }) + ) ) diff --git a/recipes/recipe_modules/gitiles/resources/gerrit_client.py b/recipes/recipe_modules/gitiles/resources/gerrit_client.py index a34f23ce0..c38fc301d 100755 --- a/recipes/recipe_modules/gitiles/resources/gerrit_client.py +++ b/recipes/recipe_modules/gitiles/resources/gerrit_client.py @@ -15,6 +15,7 @@ import json import logging import os import sys +import tarfile import time import urllib import urlparse @@ -100,6 +101,16 @@ def main(arguments): parser = create_argparser() args = parser.parse_args(arguments) + if args.extract_to and args.format != "archive": + parser.error('--extract-to requires --format=archive') + if not args.extract_to and args.format == "archive": + parser.error('--format=archive requires --extract-to') + + if args.extract_to: + # make sure it is absolute and ends with '/' + args.extract_to = os.path.join(os.path.abspath(args.extract_to), '') + os.makedirs(args.extract_to) + parsed_url = urlparse.urlparse(args.url) if not parsed_url.scheme.startswith('http'): parser.error('Invalid URI scheme (expected http or https): %s' % args.url) @@ -125,11 +136,49 @@ def main(arguments): elif args.format == 'text': # Text fetching will pack the text into structured JSON. def handler(conn): - result = ReadHttpResponse(conn, **kwargs).read() # Wrap in a structured JSON for export to recipe module. return { - 'value': result or None, + 'value': ReadHttpResponse(conn, **kwargs).read() or None, } + elif args.format == 'archive': + # Archive fetching hooks result to tarfile extraction. This implementation + # is able to do a streaming extraction operation without having to buffer + # the entire tarfile. + def handler(conn): + ret = { + 'extracted': { + 'filecount': 0, + 'bytes': 0, + }, + 'skipped': { + 'filecount': 0, + 'bytes': 0, + 'names': [], + } + } + fileobj = ReadHttpResponse(conn, **kwargs) + with tarfile.open(mode='r|*', fileobj=fileobj) as tf: + # monkeypatch the TarFile object to allow printing messages and + # collecting stats for each extracted file. extractall makes a single + # linear pass over the tarfile, which is compatible with + # ReadHttpResponse; other naive implementations (such as `getmembers`) + # do random access over the file and would require buffering the whole + # thing (!!). + em = tf._extract_member + def _extract_member(tarinfo, targetpath): + if not os.path.abspath(targetpath).startswith(args.extract_to): + print 'Skipping %s' % (tarinfo.name,) + ret['skipped']['filecount'] += 1 + ret['skipped']['bytes'] += tarinfo.size + ret['skipped']['names'].append(tarinfo.name) + return + print 'Extracting %s' % (tarinfo.name,) + ret['extracted']['filecount'] += 1 + ret['extracted']['bytes'] += tarinfo.size + return em(tarinfo, targetpath) + tf._extract_member = _extract_member + tf.extractall(args.extract_to) + return ret if args.log_start: query_params['s'] = args.log_start @@ -158,10 +207,13 @@ def main(arguments): def create_argparser(): parser = argparse.ArgumentParser() parser.add_argument( - '-j', '--json-file', required=True, + '-j', '--json-file', help='Path to json file for output.') parser.add_argument( - '-f', '--format', required=True, choices=('json', 'text')) + '--extract-to', + help='Local path to extract archive url. Must not exist.') + parser.add_argument( + '-f', '--format', required=True, choices=('json', 'text', 'archive')) parser.add_argument( '-u', '--url', required=True, help='Url of gitiles. For example, '