depot_tools/recipes/recipe_modules/gitiles/api.py

# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import base64
import urlparse

from recipe_engine import recipe_api


class Gitiles(recipe_api.RecipeApi):
  """Module for polling a git repository using the Gitiles web interface."""

  def _fetch(self, url, step_name, fmt, attempts=None, add_json_log=True,
             log_limit=None, log_start=None, extract_to=None, **kwargs):
    """Fetches information from Gitiles.

    Arguments:
      fmt (str): one of ('text', 'json', 'archive'). Instructs the underlying
        gitiles_client tool how to process the HTTP response.
          * text - implies the response is base64 encoded
          * json - implies the response is JSON
          * archive - implies the response is a compressed tarball; requires
            `extract_to`.
      extract_to (Path): When fmt=='archive', instructs gitiles_client to
        extract the archive to this non-existant folder.
      log_limit: for log URLs, limit number of results. None implies 1 page,
        as returned by Gitiles.
      log_start: for log URLs, the start cursor for paging.
      add_json_log: if True, will spill out json into log.
    """
    assert fmt in ('json', 'text', 'archive')

    args = [
        '--json-file', self.m.json.output(add_json_log=add_json_log),
        '--url', url,
        '--format', fmt,
    ]
    if fmt == 'archive':
      assert extract_to is not None, 'archive format requires extract_to'
      args.extend(['--extract-to', extract_to])
    if attempts:
      args.extend(['--attempts', attempts])
    if log_limit is not None:
      args.extend(['--log-limit', log_limit])
    if log_start is not None:
      args.extend(['--log-start', log_start])
    accept_statuses = kwargs.pop('accept_statuses', None)
    if accept_statuses:
      args.extend([
          '--accept-statuses',
          ','.join([str(s) for s in accept_statuses])])
    return self.m.python(
        step_name, self.resource('gerrit_client.py'), args, **kwargs)

  def refs(self, url, step_name='refs', attempts=None):
    """Returns a list of refs in the remote repository."""
    step_result = self._fetch(
        self.m.url.join(url, '+refs'),
        step_name,
        fmt='json',
        attempts=attempts)

    refs = sorted(str(ref) for ref in step_result.json.output)
    step_result.presentation.logs['refs'] = refs
    return refs

  def log(self, url, ref, limit=0, cursor=None,
          step_name=None, attempts=None, **kwargs):
    """Returns the most recent commits under the given ref with properties.

    Args:
      url (str): URL of the remote repository.
      ref (str): Name of the desired ref (see Gitiles.refs).
      limit (int): Number of commits to limit the fetching to.
        Gitiles does not return all commits in one call; instead paging is
        used. 0 implies to return whatever first gerrit responds with.
        Otherwise, paging will be used to fetch at least this many
        commits, but all fetched commits will be returned.
      cursor (str or None): The paging cursor used to fetch the next page.
      step_name (str): Custom name for this step (optional).

    Returns:
      A tuple of (commits, cursor).
      Commits are a list of commits (as Gitiles dict structure) in reverse
      chronological order. The number of commits may be higher than limit
      argument.
      Cursor can be used for subsequent calls to log for paging. If None,
      signals that there are no more commits to fetch.
    """
    assert limit >= 0
    step_name = step_name or 'gitiles log: %s%s' % (
        ref, ' from %s' % cursor if cursor else '')

    step_result = self._fetch(
        self.m.url.join(url, '+log/%s' % ref),
        step_name,
        log_limit=limit,
        log_start=cursor,
        attempts=attempts,
        fmt='json',
        add_json_log=True,
        **kwargs)

    # The output is formatted as a JSON dict with a "log" key. The "log" key
    # is a list of commit dicts, which contain information about the commit.
    commits = step_result.json.output['log']
    cursor = step_result.json.output.get('next')

    step_result.presentation.step_text = (
        '<br />%d commits fetched' % len(commits))
    return commits, cursor

  def commit_log(self, url, commit, step_name=None, attempts=None):
    """Returns: (dict) the Gitiles commit log structure for a given commit.

    Args:
      url (str): The base repository URL.
      commit (str): The commit hash.
      step_name (str): If not None, override the step name.
      attempts (int): Number of times to try the request before failing.
    """
    step_name = step_name or 'commit log: %s' % commit

    commit_url = '%s/+/%s' % (url, commit)
    step_result = self._fetch(commit_url, step_name, attempts=attempts,
                              fmt='json')
    return step_result.json.output

  def download_file(self, repository_url, file_path, branch='master',
                    step_name=None, attempts=None, **kwargs):
    """Downloads raw file content from a Gitiles repository.

    Args:
      repository_url (str): Full URL to the repository.
      branch (str): Branch of the repository.
      file_path (str): Relative path to the file from the repository root.
      step_name (str): Custom name for this step (optional).
      attempts (int): Number of times to try the request before failing.

    Returns:
      Raw file content.
    """
    fetch_url = self.m.url.join(repository_url, '+/%s/%s' % (branch, file_path))
    step_result = self._fetch(
        fetch_url,
        step_name or 'fetch %s:%s' % (branch, file_path,),
        attempts=attempts,
        fmt='text',
        add_json_log=False,
        **kwargs)
    if step_result.json.output['value'] is None:
      return None
    return base64.b64decode(step_result.json.output['value'])

  def download_archive(self, repository_url, destination,
                       revision='refs/heads/master'):
    """Downloads an archive of the repo and extracts it to `destination`.

    If the gitiles server attempts to provide a tarball with paths which escape
    `destination`, this function will extract all valid files and then
    raise StepFailure with an attribute `StepFailure.gitiles_skipped_files`
    containing the names of the files that were skipped.

    Args:
      repository_url (str): Full URL to the repository
      destination (Path): Local path to extract the archive to. Must not exist
        prior to this call.
      revision (str): The ref or revision in the repo to download. Defaults to
        'refs/heads/master'.
    """
    step_name = 'download %s @ %s' % (repository_url, revision)
    fetch_url = self.m.url.join(repository_url, '+archive/%s.tgz' % (revision,))
    step_result = self._fetch(
      fetch_url,
      step_name,
      fmt='archive',
      add_json_log=False,
      extract_to=destination,
      step_test_data=lambda: self.m.json.test_api.output({
        'extracted': {
          'filecount': 1337,
          'bytes': 7192345,
        },
      })
    )
    self.m.path.mock_add_paths(destination)
    j = step_result.json.output
    if j['extracted']['filecount']:
      stat = j['extracted']
      step_result.presentation.step_text += (
        '<br/>extracted %s files - %.02f MB' % (
          stat['filecount'], stat['bytes'] / (1000.0**2)))
    if j.get('skipped', {}).get('filecount'):
      stat = j['skipped']
      step_result.presentation.step_text += (
        '<br/>SKIPPED %s files - %.02f MB' % (
          stat['filecount'], stat['bytes'] / (1000.0**2)))
      step_result.presentation.logs['skipped files'] = stat['names']
      step_result.presentation.status = self.m.step.FAILURE
      ex = self.m.step.StepFailure(step_name)
      ex.gitiles_skipped_files = stat['names']
      raise ex

  def parse_repo_url(self, repo_url):
    """Returns (host, project) pair.

    Returns (None, None) if repo_url is not recognized.
    """
    return parse_repo_url(repo_url)


def parse_http_host_and_path(url):
  # Copied from https://chromium.googlesource.com/infra/luci/recipes-py/+/809e57935211b3fcb802f74a7844d4f36eff6b87/recipe_modules/buildbucket/util.py
  parsed = urlparse.urlparse(url)
  if not parsed.scheme:
    parsed = urlparse.urlparse('https://' + url)
  if (parsed.scheme in ('http', 'https') and
      not parsed.params and
      not parsed.query and
      not parsed.fragment):
    return parsed.netloc, parsed.path
  return None, None


def parse_repo_url(repo_url):
  """Returns (host, project) pair.

  Returns (None, None) if repo_url is not recognized.
  """
  # Adapted from https://chromium.googlesource.com/infra/luci/recipes-py/+/809e57935211b3fcb802f74a7844d4f36eff6b87/recipe_modules/buildbucket/util.py
  host, project = parse_http_host_and_path(repo_url)
  if not host or not project or '+' in project.split('/'):
    return None, None
  project = project.strip('/')
  if project.startswith('a/'):
    project = project[len('a/'):]
  if project.endswith('.git'):
    project = project[:-len('.git')]
  return host, project