Fetch file contents from Gitiles in parallel

This makes it much faster to get a diff for a set of multiple files.

Bug: b/323243527
Change-Id: I6c54a4fa8782d4a0bbb187b8bed4fbd52be47344
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5384647
Reviewed-by: Josip Sokcevic <sokcevic@chromium.org>
Commit-Queue: Gavin Mak <gavinmak@google.com>
changes/47/5384647/4
Gavin Mak 1 year ago committed by LUCI CQ
parent d76c4d6045
commit 6551a7eb43

@ -13,9 +13,11 @@ import base64
import os
import platform
import sys
import concurrent.futures
import gclient_utils
from gerrit_util import CreateHttpConn, ReadHttpResponse
from gerrit_util import (CreateHttpConn, ReadHttpResponse,
MAX_CONCURRENT_CONNECTION)
import subprocess2
DEV_NULL = "/dev/null"
@ -89,31 +91,12 @@ def _process_diff(diff: str, src_root: str, dst_root: str) -> str:
return header + HEADER_DELIMITER + body
def create_diffs(host: str, repo: str, ref: str, root: str,
files: list[str]) -> dict[str, str]:
"""Calculates diffs of files in a directory against a commit.
Args:
host: Gerrit host.
repo: Gerrit repo.
ref: Gerrit commit.
root: Path of local directory containing modified files.
files: List of file paths relative to root.
Returns:
A dict mapping file paths to diffs.
Raises:
RuntimeError: If a file is missing in both the root and the repo.
"""
diffs = {}
with gclient_utils.temporary_directory() as tmp_root:
# TODO(gavinmak): Parallelize fetching content.
for file in files:
def _create_diff(host: str, repo: str, ref: str, root: str, file: str) -> str:
new_file = os.path.join(root, file)
if not os.path.exists(new_file):
new_file = None
with gclient_utils.temporary_directory() as tmp_root:
old_file = None
old_content = fetch_content(host, repo, ref, file)
if old_content:
@ -127,8 +110,36 @@ def create_diffs(host: str, repo: str, ref: str, root: str,
f"or from {host}/{repo}:{ref}.")
diff = git_diff(old_file, new_file)
diffs[file] = _process_diff(diff, tmp_root, root)
return _process_diff(diff, tmp_root, root)
def create_diffs(host: str, repo: str, ref: str, root: str,
files: list[str]) -> dict[str, str]:
"""Calculates diffs of files in a directory against a commit.
Args:
host: Gerrit host.
repo: Gerrit repo.
ref: Gerrit commit.
root: Path of local directory containing modified files.
files: List of file paths relative to root.
Returns:
A dict mapping file paths to diffs.
Raises:
RuntimeError: If a file is missing in both the root and the repo.
"""
diffs = {}
with concurrent.futures.ThreadPoolExecutor(
max_workers=MAX_CONCURRENT_CONNECTION) as executor:
futures_to_file = {
executor.submit(_create_diff, host, repo, ref, root, file): file
for file in files
}
for future in concurrent.futures.as_completed(futures_to_file):
file = futures_to_file[future]
diffs[file] = future.result()
return diffs

Loading…
Cancel
Save