diff --git a/recipes/README.recipes.md b/recipes/README.recipes.md
index 7d3b461c6..ad00a47fc 100644
--- a/recipes/README.recipes.md
+++ b/recipes/README.recipes.md
@@ -516,13 +516,13 @@ DEPRECATED. Consider using gerrit.get_change_description instead.
— **def [upload](/recipes/recipe_modules/git_cl/api.py#47)(self, message, upload_args=None, \*\*kwargs):**
### *recipe_modules* / [gitiles](/recipes/recipe_modules/gitiles)
-[DEPS](/recipes/recipe_modules/gitiles/__init__.py#1): [recipe\_engine/json][recipe_engine/recipe_modules/json], [recipe\_engine/path][recipe_engine/recipe_modules/path], [recipe\_engine/python][recipe_engine/recipe_modules/python], [recipe\_engine/raw\_io][recipe_engine/recipe_modules/raw_io], [recipe\_engine/url][recipe_engine/recipe_modules/url]
+[DEPS](/recipes/recipe_modules/gitiles/__init__.py#5): [recipe\_engine/json][recipe_engine/recipe_modules/json], [recipe\_engine/path][recipe_engine/recipe_modules/path], [recipe\_engine/python][recipe_engine/recipe_modules/python], [recipe\_engine/raw\_io][recipe_engine/recipe_modules/raw_io], [recipe\_engine/step][recipe_engine/recipe_modules/step], [recipe\_engine/url][recipe_engine/recipe_modules/url]
#### **class [Gitiles](/recipes/recipe_modules/gitiles/api.py#10)([RecipeApi][recipe_engine/wkt/RecipeApi]):**
Module for polling a git repository using the Gitiles web interface.
-— **def [commit\_log](/recipes/recipe_modules/gitiles/api.py#102)(self, url, commit, step_name=None, attempts=None):**
+— **def [commit\_log](/recipes/recipe_modules/gitiles/api.py#113)(self, url, commit, step_name=None, attempts=None):**
Returns: (dict) the Gitiles commit log structure for a given commit.
@@ -532,7 +532,23 @@ Args:
step_name (str): If not None, override the step name.
attempts (int): Number of times to try the request before failing.
-— **def [download\_file](/recipes/recipe_modules/gitiles/api.py#118)(self, repository_url, file_path, branch='master', step_name=None, attempts=None, \*\*kwargs):**
+— **def [download\_archive](/recipes/recipe_modules/gitiles/api.py#155)(self, repository_url, destination, revision='refs/heads/master'):**
+
+Downloads an archive of the repo and extracts it to `destination`.
+
+If the gitiles server attempts to provide a tarball with paths which escape
+`destination`, this function will extract all valid files and then
+raise StepFailure with an attribute `StepFailure.gitiles_skipped_files`
+containing the names of the files that were skipped.
+
+Args:
+ repository_url (str): Full URL to the repository
+ destination (Path): Local path to extract the archive to. Must not exist
+ prior to this call.
+ revision (str): The ref or revision in the repo to download. Defaults to
+ 'refs/heads/master'.
+
+— **def [download\_file](/recipes/recipe_modules/gitiles/api.py#129)(self, repository_url, file_path, branch='master', step_name=None, attempts=None, \*\*kwargs):**
Downloads raw file content from a Gitiles repository.
@@ -546,7 +562,7 @@ Args:
Returns:
Raw file content.
-— **def [log](/recipes/recipe_modules/gitiles/api.py#56)(self, url, ref, limit=0, cursor=None, step_name=None, attempts=None, \*\*kwargs):**
+— **def [log](/recipes/recipe_modules/gitiles/api.py#67)(self, url, ref, limit=0, cursor=None, step_name=None, attempts=None, \*\*kwargs):**
Returns the most recent commits under the given ref with properties.
@@ -569,7 +585,7 @@ Returns:
Cursor can be used for subsequent calls to log for paging. If None,
signals that there are no more commits to fetch.
-— **def [refs](/recipes/recipe_modules/gitiles/api.py#44)(self, url, step_name='refs', attempts=None):**
+— **def [refs](/recipes/recipe_modules/gitiles/api.py#55)(self, url, step_name='refs', attempts=None):**
Returns a list of refs in the remote repository.
### *recipe_modules* / [gsutil](/recipes/recipe_modules/gsutil)
@@ -778,9 +794,9 @@ like checkout or compile), and some of these tests have failed.
— **def [RunSteps](/recipes/recipe_modules/git_cl/examples/full.py#17)(api):**
### *recipes* / [gitiles:examples/full](/recipes/recipe_modules/gitiles/examples/full.py)
-[DEPS](/recipes/recipe_modules/gitiles/examples/full.py#5): [gitiles](#recipe_modules-gitiles), [recipe\_engine/json][recipe_engine/recipe_modules/json], [recipe\_engine/properties][recipe_engine/recipe_modules/properties]
+[DEPS](/recipes/recipe_modules/gitiles/examples/full.py#5): [gitiles](#recipe_modules-gitiles), [recipe\_engine/json][recipe_engine/recipe_modules/json], [recipe\_engine/path][recipe_engine/recipe_modules/path], [recipe\_engine/properties][recipe_engine/recipe_modules/properties], [recipe\_engine/step][recipe_engine/recipe_modules/step]
-— **def [RunSteps](/recipes/recipe_modules/gitiles/examples/full.py#12)(api):**
+— **def [RunSteps](/recipes/recipe_modules/gitiles/examples/full.py#14)(api):**
### *recipes* / [gsutil:examples/full](/recipes/recipe_modules/gsutil/examples/full.py)
[DEPS](/recipes/recipe_modules/gsutil/examples/full.py#5): [gsutil](#recipe_modules-gsutil), [recipe\_engine/path][recipe_engine/recipe_modules/path]
diff --git a/recipes/recipe_modules/gitiles/__init__.py b/recipes/recipe_modules/gitiles/__init__.py
index 16991642a..f8e8ca517 100644
--- a/recipes/recipe_modules/gitiles/__init__.py
+++ b/recipes/recipe_modules/gitiles/__init__.py
@@ -1,7 +1,12 @@
+# Copyright 2018 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
DEPS = [
'recipe_engine/json',
'recipe_engine/path',
'recipe_engine/python',
'recipe_engine/raw_io',
+ 'recipe_engine/step',
'recipe_engine/url',
]
diff --git a/recipes/recipe_modules/gitiles/api.py b/recipes/recipe_modules/gitiles/api.py
index c99957067..42c4086da 100644
--- a/recipes/recipe_modules/gitiles/api.py
+++ b/recipes/recipe_modules/gitiles/api.py
@@ -1,4 +1,4 @@
-# Copyright 2014 The Chromium Authors. All rights reserved.
+# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -11,21 +11,33 @@ class Gitiles(recipe_api.RecipeApi):
"""Module for polling a git repository using the Gitiles web interface."""
def _fetch(self, url, step_name, fmt, attempts=None, add_json_log=True,
- log_limit=None, log_start=None, **kwargs):
+ log_limit=None, log_start=None, extract_to=None, **kwargs):
"""Fetches information from Gitiles.
Arguments:
+ fmt (str): one of ('text', 'json', 'archive'). Instructs the underlying
+ gitiles_client tool how to process the HTTP response.
+ * text - implies the response is base64 encoded
+ * json - implies the response is JSON
+ * archive - implies the response is a compressed tarball; requires
+ `extract_to`.
+ extract_to (Path): When fmt=='archive', instructs gitiles_client to
+ extract the archive to this non-existant folder.
log_limit: for log URLs, limit number of results. None implies 1 page,
as returned by Gitiles.
log_start: for log URLs, the start cursor for paging.
add_json_log: if True, will spill out json into log.
"""
- assert fmt in ('json', 'text')
+ assert fmt in ('json', 'text', 'archive')
+
args = [
'--json-file', self.m.json.output(add_json_log=add_json_log),
'--url', url,
'--format', fmt,
]
+ if fmt == 'archive':
+ assert extract_to is not None, 'archive format requires extract_to'
+ args.extend(['--extract-to', extract_to])
if attempts:
args.extend(['--attempts', attempts])
if log_limit is not None:
@@ -37,9 +49,8 @@ class Gitiles(recipe_api.RecipeApi):
args.extend([
'--accept-statuses',
','.join([str(s) for s in accept_statuses])])
- a = self.m.python(
+ return self.m.python(
step_name, self.resource('gerrit_client.py'), args, **kwargs)
- return a
def refs(self, url, step_name='refs', attempts=None):
"""Returns a list of refs in the remote repository."""
@@ -140,3 +151,52 @@ class Gitiles(recipe_api.RecipeApi):
if step_result.json.output['value'] is None:
return None
return base64.b64decode(step_result.json.output['value'])
+
+ def download_archive(self, repository_url, destination,
+ revision='refs/heads/master'):
+ """Downloads an archive of the repo and extracts it to `destination`.
+
+ If the gitiles server attempts to provide a tarball with paths which escape
+ `destination`, this function will extract all valid files and then
+ raise StepFailure with an attribute `StepFailure.gitiles_skipped_files`
+ containing the names of the files that were skipped.
+
+ Args:
+ repository_url (str): Full URL to the repository
+ destination (Path): Local path to extract the archive to. Must not exist
+ prior to this call.
+ revision (str): The ref or revision in the repo to download. Defaults to
+ 'refs/heads/master'.
+ """
+ step_name = 'download %s @ %s' % (repository_url, revision)
+ fetch_url = self.m.url.join(repository_url, '+archive/%s.tgz' % (revision,))
+ step_result = self._fetch(
+ fetch_url,
+ step_name,
+ fmt='archive',
+ add_json_log=False,
+ extract_to=destination,
+ step_test_data=lambda: self.m.json.test_api.output({
+ 'extracted': {
+ 'filecount': 1337,
+ 'bytes': 7192345,
+ },
+ })
+ )
+ self.m.path.mock_add_paths(destination)
+ j = step_result.json.output
+ if j['extracted']['filecount']:
+ stat = j['extracted']
+ step_result.presentation.step_text += (
+ '
extracted %s files - %.02f MB' % (
+ stat['filecount'], stat['bytes'] / (1000.0**2)))
+ if j.get('skipped', {}).get('filecount'):
+ stat = j['skipped']
+ step_result.presentation.step_text += (
+ '
SKIPPED %s files - %.02f MB' % (
+ stat['filecount'], stat['bytes'] / (1000.0**2)))
+ step_result.presentation.logs['skipped files'] = stat['names']
+ step_result.presentation.status = self.m.step.FAILURE
+ ex = self.m.step.StepFailure(step_name)
+ ex.gitiles_skipped_files = stat['names']
+ raise ex
diff --git a/recipes/recipe_modules/gitiles/examples/full.expected/basic.json b/recipes/recipe_modules/gitiles/examples/full.expected/basic.json
index 568ee7643..17aba30dc 100644
--- a/recipes/recipe_modules/gitiles/examples/full.expected/basic.json
+++ b/recipes/recipe_modules/gitiles/examples/full.expected/basic.json
@@ -547,6 +547,49 @@
],
"name": "fetch master:NONEXISTENT"
},
+ {
+ "cmd": [
+ "python",
+ "-u",
+ "RECIPE_MODULE[depot_tools::gitiles]/resources/gerrit_client.py",
+ "--json-file",
+ "/path/to/tmp/json",
+ "--url",
+ "https://chromium.googlesource.com/chromium/src/+archive/refs/heads/master.tgz",
+ "--format",
+ "archive",
+ "--extract-to",
+ "[START_DIR]/archive"
+ ],
+ "name": "download https://chromium.googlesource.com/chromium/src @ refs/heads/master",
+ "~followup_annotations": [
+ "@@@STEP_TEXT@
extracted 1337 files - 7.19 MB@@@"
+ ]
+ },
+ {
+ "cmd": [
+ "python",
+ "-u",
+ "RECIPE_MODULE[depot_tools::gitiles]/resources/gerrit_client.py",
+ "--json-file",
+ "/path/to/tmp/json",
+ "--url",
+ "https://chromium.googlesource.com/chromium/src/+archive/refs/heads/master.tgz",
+ "--format",
+ "archive",
+ "--extract-to",
+ "[START_DIR]/archive2"
+ ],
+ "name": "download https://chromium.googlesource.com/chromium/src @ refs/heads/master (2)",
+ "~followup_annotations": [
+ "@@@STEP_TEXT@
extracted 10 files - 0.01 MB
SKIPPED 4 files - 7.19 MB@@@",
+ "@@@STEP_LOG_LINE@skipped files@/root@@@",
+ "@@@STEP_LOG_LINE@skipped files@../relative@@@",
+ "@@@STEP_LOG_LINE@skipped files@sneaky/../../relative@@@",
+ "@@@STEP_LOG_END@skipped files@@@",
+ "@@@STEP_FAILURE@@@"
+ ]
+ },
{
"name": "$result",
"recipe_result": null,
diff --git a/recipes/recipe_modules/gitiles/examples/full.py b/recipes/recipe_modules/gitiles/examples/full.py
index 6a249e54c..bb9df32bb 100644
--- a/recipes/recipe_modules/gitiles/examples/full.py
+++ b/recipes/recipe_modules/gitiles/examples/full.py
@@ -5,6 +5,8 @@
DEPS = [
'gitiles',
'recipe_engine/json',
+ 'recipe_engine/step',
+ 'recipe_engine/path',
'recipe_engine/properties',
]
@@ -22,6 +24,14 @@ def RunSteps(api):
data = api.gitiles.download_file(url, 'NONEXISTENT', attempts=1,
accept_statuses=[404])
+ api.gitiles.download_archive(url, api.path['start_dir'].join('archive'))
+
+ try:
+ api.gitiles.download_archive(url, api.path['start_dir'].join('archive2'))
+ assert False # pragma: no cover
+ except api.step.StepFailure as ex:
+ assert '/root' in ex.gitiles_skipped_files
+
def GenTests(api):
yield (
@@ -65,4 +75,19 @@ def GenTests(api):
'fetch master:NONEXISTENT',
api.json.output({'value': None})
)
+ + api.step_data(
+ ('download https://chromium.googlesource.com/chromium/src @ '
+ 'refs/heads/master (2)'),
+ api.json.output({
+ 'extracted': {
+ 'filecount': 10,
+ 'bytes': 14925,
+ },
+ 'skipped': {
+ 'filecount': 4,
+ 'bytes': 7192345,
+ 'names': ['/root', '../relative', 'sneaky/../../relative'],
+ },
+ })
+ )
)
diff --git a/recipes/recipe_modules/gitiles/resources/gerrit_client.py b/recipes/recipe_modules/gitiles/resources/gerrit_client.py
index a34f23ce0..c38fc301d 100755
--- a/recipes/recipe_modules/gitiles/resources/gerrit_client.py
+++ b/recipes/recipe_modules/gitiles/resources/gerrit_client.py
@@ -15,6 +15,7 @@ import json
import logging
import os
import sys
+import tarfile
import time
import urllib
import urlparse
@@ -100,6 +101,16 @@ def main(arguments):
parser = create_argparser()
args = parser.parse_args(arguments)
+ if args.extract_to and args.format != "archive":
+ parser.error('--extract-to requires --format=archive')
+ if not args.extract_to and args.format == "archive":
+ parser.error('--format=archive requires --extract-to')
+
+ if args.extract_to:
+ # make sure it is absolute and ends with '/'
+ args.extract_to = os.path.join(os.path.abspath(args.extract_to), '')
+ os.makedirs(args.extract_to)
+
parsed_url = urlparse.urlparse(args.url)
if not parsed_url.scheme.startswith('http'):
parser.error('Invalid URI scheme (expected http or https): %s' % args.url)
@@ -125,11 +136,49 @@ def main(arguments):
elif args.format == 'text':
# Text fetching will pack the text into structured JSON.
def handler(conn):
- result = ReadHttpResponse(conn, **kwargs).read()
# Wrap in a structured JSON for export to recipe module.
return {
- 'value': result or None,
+ 'value': ReadHttpResponse(conn, **kwargs).read() or None,
}
+ elif args.format == 'archive':
+ # Archive fetching hooks result to tarfile extraction. This implementation
+ # is able to do a streaming extraction operation without having to buffer
+ # the entire tarfile.
+ def handler(conn):
+ ret = {
+ 'extracted': {
+ 'filecount': 0,
+ 'bytes': 0,
+ },
+ 'skipped': {
+ 'filecount': 0,
+ 'bytes': 0,
+ 'names': [],
+ }
+ }
+ fileobj = ReadHttpResponse(conn, **kwargs)
+ with tarfile.open(mode='r|*', fileobj=fileobj) as tf:
+ # monkeypatch the TarFile object to allow printing messages and
+ # collecting stats for each extracted file. extractall makes a single
+ # linear pass over the tarfile, which is compatible with
+ # ReadHttpResponse; other naive implementations (such as `getmembers`)
+ # do random access over the file and would require buffering the whole
+ # thing (!!).
+ em = tf._extract_member
+ def _extract_member(tarinfo, targetpath):
+ if not os.path.abspath(targetpath).startswith(args.extract_to):
+ print 'Skipping %s' % (tarinfo.name,)
+ ret['skipped']['filecount'] += 1
+ ret['skipped']['bytes'] += tarinfo.size
+ ret['skipped']['names'].append(tarinfo.name)
+ return
+ print 'Extracting %s' % (tarinfo.name,)
+ ret['extracted']['filecount'] += 1
+ ret['extracted']['bytes'] += tarinfo.size
+ return em(tarinfo, targetpath)
+ tf._extract_member = _extract_member
+ tf.extractall(args.extract_to)
+ return ret
if args.log_start:
query_params['s'] = args.log_start
@@ -158,10 +207,13 @@ def main(arguments):
def create_argparser():
parser = argparse.ArgumentParser()
parser.add_argument(
- '-j', '--json-file', required=True,
+ '-j', '--json-file',
help='Path to json file for output.')
parser.add_argument(
- '-f', '--format', required=True, choices=('json', 'text'))
+ '--extract-to',
+ help='Local path to extract archive url. Must not exist.')
+ parser.add_argument(
+ '-f', '--format', required=True, choices=('json', 'text', 'archive'))
parser.add_argument(
'-u', '--url', required=True,
help='Url of gitiles. For example, '