# Copyright 2018 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. from __future__ import division import base64 import sys try: import urlparse except ImportError: # pragma: no cover import urllib.parse as urlparse from recipe_engine import recipe_api class Gitiles(recipe_api.RecipeApi): """Module for polling a git repository using the Gitiles web interface.""" def _fetch(self, url, step_name, fmt, attempts=None, add_json_log=True, log_limit=None, log_start=None, extract_to=None, **kwargs): """Fetches information from Gitiles. Args: * fmt (str): one of ('text', 'json', 'archive'). Instructs the underlying gitiles_client tool how to process the HTTP response. * text - implies the response is base64 encoded * json - implies the response is JSON * archive - implies the response is a compressed tarball; requires `extract_to`. * extract_to (Path): When fmt=='archive', instructs gitiles_client to extract the archive to this non-existent folder. * log_limit: for log URLs, limit number of results. None implies 1 page, as returned by Gitiles. * log_start: for log URLs, the start cursor for paging. * add_json_log: if True, will spill out json into log. """ assert fmt in ('json', 'text', 'archive') args = [ '--json-file', self.m.json.output(add_json_log=add_json_log), '--url', url, '--format', fmt, ] if fmt == 'archive': assert extract_to is not None, 'archive format requires extract_to' args.extend(['--extract-to', extract_to]) if attempts: args.extend(['--attempts', attempts]) if log_limit is not None: args.extend(['--log-limit', log_limit]) if log_start is not None: args.extend(['--log-start', log_start]) accept_statuses = kwargs.pop('accept_statuses', None) if accept_statuses: args.extend([ '--accept-statuses', ','.join([str(s) for s in accept_statuses])]) cmd = ['vpython3', '-u', self.resource('gerrit_client.py')] + args return self.m.step(step_name, cmd, **kwargs) def refs(self, url, step_name='refs', attempts=None): """Returns a list of refs in the remote repository.""" step_result = self._fetch( self.m.url.join(url, '+refs'), step_name, fmt='json', attempts=attempts) refs = sorted(str(ref) for ref in step_result.json.output) step_result.presentation.logs['refs'] = refs return refs def log(self, url, ref, limit=0, cursor=None, step_name=None, attempts=None, **kwargs): """Returns the most recent commits under the given ref with properties. Args: * url (str): URL of the remote repository. * ref (str): Name of the desired ref (see Gitiles.refs). * limit (int): Number of commits to limit the fetching to. Gitiles does not return all commits in one call; instead paging is used. 0 implies to return whatever first gerrit responds with. Otherwise, paging will be used to fetch at least this many commits, but all fetched commits will be returned. * cursor (str or None): The paging cursor used to fetch the next page. * step_name (str): Custom name for this step (optional). Returns: A tuple of (commits, cursor). Commits are a list of commits (as Gitiles dict structure) in reverse chronological order. The number of commits may be higher than limit argument. Cursor can be used for subsequent calls to log for paging. If None, signals that there are no more commits to fetch. """ assert limit >= 0 step_name = step_name or 'gitiles log: %s%s' % ( ref, ' from %s' % cursor if cursor else '') step_result = self._fetch( self.m.url.join(url, '+log/%s' % ref), step_name, log_limit=limit, log_start=cursor, attempts=attempts, fmt='json', add_json_log=True, **kwargs) # The output is formatted as a JSON dict with a "log" key. The "log" key # is a list of commit dicts, which contain information about the commit. commits = step_result.json.output['log'] cursor = step_result.json.output.get('next') step_result.presentation.step_text = ( '
%d commits fetched' % len(commits)) return commits, cursor def commit_log(self, url, commit, step_name=None, attempts=None): """Returns: (dict) the Gitiles commit log structure for a given commit. Args: * url (str): The base repository URL. * commit (str): The commit hash. * step_name (str): If not None, override the step name. * attempts (int): Number of times to try the request before failing. """ step_name = step_name or 'commit log: %s' % commit commit_url = '%s/+/%s' % (url, commit) step_result = self._fetch(commit_url, step_name, attempts=attempts, fmt='json') return step_result.json.output def download_file(self, repository_url, file_path, branch='main', step_name=None, attempts=None, **kwargs): """Downloads raw file content from a Gitiles repository. Args: * repository_url (str): Full URL to the repository. * branch (str): Branch of the repository. * file_path (str): Relative path to the file from the repository root. * step_name (str): Custom name for this step (optional). * attempts (int): Number of times to try the request before failing. Returns: Raw file content. """ fetch_url = self.m.url.join(repository_url, '+/%s/%s' % (branch, file_path)) step_result = self._fetch( fetch_url, step_name or 'fetch %s:%s' % (branch, file_path,), attempts=attempts, fmt='text', add_json_log=False, **kwargs) if step_result.json.output['value'] is None: return None # TODO(crbug.com/1227140): Clean up when py2 is no longer supported. value = base64.b64decode(step_result.json.output['value']) if sys.version_info >= (3,): return value.decode('utf-8') return value def download_archive(self, repository_url, destination, revision='refs/heads/main'): """Downloads an archive of the repo and extracts it to `destination`. If the gitiles server attempts to provide a tarball with paths which escape `destination`, this function will extract all valid files and then raise StepFailure with an attribute `StepFailure.gitiles_skipped_files` containing the names of the files that were skipped. Args: * repository_url (str): Full URL to the repository * destination (Path): Local path to extract the archive to. Must not exist prior to this call. * revision (str): The ref or revision in the repo to download. Defaults to 'refs/heads/main'. """ step_name = 'download %s @ %s' % (repository_url, revision) fetch_url = self.m.url.join(repository_url, '+archive/%s.tgz' % (revision,)) step_result = self._fetch( fetch_url, step_name, fmt='archive', add_json_log=False, extract_to=destination, step_test_data=lambda: self.m.json.test_api.output({ 'extracted': { 'filecount': 1337, 'bytes': 7192345, }, }) ) self.m.path.mock_add_paths(destination) j = step_result.json.output if j['extracted']['filecount']: stat = j['extracted'] step_result.presentation.step_text += ( '
extracted %s files - %.02f MB' % ( stat['filecount'], stat['bytes'] / (1000.0**2))) if j.get('skipped', {}).get('filecount'): stat = j['skipped'] step_result.presentation.step_text += ( '
SKIPPED %s files - %.02f MB' % ( stat['filecount'], stat['bytes'] / (1000.0**2))) step_result.presentation.logs['skipped files'] = stat['names'] step_result.presentation.status = self.m.step.FAILURE ex = self.m.step.StepFailure(step_name) ex.gitiles_skipped_files = stat['names'] raise ex def parse_repo_url(self, repo_url): """Returns (host, project) pair. Returns (None, None) if repo_url is not recognized. """ return parse_repo_url(repo_url) def unparse_repo_url(self, host, project): """Generates a Gitiles repo URL. See also parse_repo_url.""" return unparse_repo_url(host, project) def canonicalize_repo_url(self, repo_url): """Returns a canonical form of repo_url. If not recognized, returns as is. """ if repo_url: host, project = parse_repo_url(repo_url) if host and project: repo_url = unparse_repo_url(host, project) return repo_url def parse_http_host_and_path(url): # Copied from https://chromium.googlesource.com/infra/luci/recipes-py/+/809e57935211b3fcb802f74a7844d4f36eff6b87/recipe_modules/buildbucket/util.py parsed = urlparse.urlparse(url) if not parsed.scheme: parsed = urlparse.urlparse('https://' + url) if (parsed.scheme in ('http', 'https') and not parsed.params and not parsed.query and not parsed.fragment): return parsed.netloc, parsed.path return None, None def parse_repo_url(repo_url): """Returns (host, project) pair. Returns (None, None) if repo_url is not recognized. """ # Adapted from https://chromium.googlesource.com/infra/luci/recipes-py/+/809e57935211b3fcb802f74a7844d4f36eff6b87/recipe_modules/buildbucket/util.py host, project = parse_http_host_and_path(repo_url) if not host or not project or '+' in project.split('/'): return None, None project = project.strip('/') if project.startswith('a/'): project = project[len('a/'):] if project.endswith('.git'): project = project[:-len('.git')] return host, project def unparse_repo_url(host, project): return 'https://%s/%s' % (host, project)