`git cl split`: Save and load splittings from files

This CL implements the proposal (from the design doc in the cited bug)
to allow `git cl split` to save the generated splitting to a
human-readable and editable text file. This allows users to save the
splitting in case something goes wrong and they need to retry, and also
lets them tweak a generated splitting if they get something mostly, but
not quite, usable.

It also allows users to edit the generated splitting interactively, like
they do for e.g. CL descriptions during `git cl upload`.

Bug: 389069356
Change-Id: I8ae21f2eb7c022ba181ae5af273ce09605b5acec
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/6235727
Commit-Queue: Devon Loehr <dloehr@google.com>
Reviewed-by: Josip Sokcevic <sokcevic@chromium.org>
changes/27/6235727/7
Devon Loehr 9 months ago committed by LUCI CQ
parent 7c4bd3f94e
commit 0822fcc141

@ -5854,6 +5854,12 @@ def CMDsplit(parser, args):
parser.add_option('--topic', parser.add_option('--topic',
default=None, default=None,
help='Topic to specify when uploading') help='Topic to specify when uploading')
parser.add_option('--from-file',
type='str',
default=None,
help='If present, load the split CLs from the given file '
'instead of computing a splitting. These file are '
'generated each time the script is run.')
options, _ = parser.parse_args(args) options, _ = parser.parse_args(args)
if not options.description_file and not options.dry_run: if not options.description_file and not options.dry_run:
@ -5865,7 +5871,7 @@ def CMDsplit(parser, args):
return split_cl.SplitCl(options.description_file, options.comment_file, return split_cl.SplitCl(options.description_file, options.comment_file,
Changelist, WrappedCMDupload, options.dry_run, Changelist, WrappedCMDupload, options.dry_run,
options.cq_dry_run, options.enable_auto_submit, options.cq_dry_run, options.enable_auto_submit,
options.max_depth, options.topic, options.max_depth, options.topic, options.from_file,
settings.GetRoot()) settings.GetRoot())

@ -10,6 +10,7 @@ import os
import re import re
import subprocess2 import subprocess2
import sys import sys
import tempfile
from typing import List, Set, Tuple, Dict, Any from typing import List, Set, Tuple, Dict, Any
import gclient_utils import gclient_utils
@ -76,7 +77,7 @@ class CLInfo:
# Don't quote the reviewer emails in the output # Don't quote the reviewer emails in the output
reviewers_str = ", ".join(self.reviewers) reviewers_str = ", ".join(self.reviewers)
lines = [ lines = [
f"Reviewers: [{reviewers_str}]", f"Directories: {self.directories}" f"Reviewers: [{reviewers_str}]", f"Description: {self.directories}"
] + [f"{action}, {file}" for (action, file) in self.files] ] + [f"{action}, {file}" for (action, file) in self.files]
return "\n".join(lines) return "\n".join(lines)
@ -346,7 +347,8 @@ def PrintSummary(cl_infos, refactor_branch):
def SplitCl(description_file, comment_file, changelist, cmd_upload, dry_run, def SplitCl(description_file, comment_file, changelist, cmd_upload, dry_run,
cq_dry_run, enable_auto_submit, max_depth, topic, repository_root): cq_dry_run, enable_auto_submit, max_depth, topic, from_file,
repository_root):
""""Splits a branch into smaller branches and uploads CLs. """"Splits a branch into smaller branches and uploads CLs.
Args: Args:
@ -394,16 +396,28 @@ def SplitCl(description_file, comment_file, changelist, cmd_upload, dry_run,
if not dry_run and not CheckDescriptionBugLink(description): if not dry_run and not CheckDescriptionBugLink(description):
return 0 return 0
files_split_by_reviewers = SelectReviewersForFiles( if from_file:
cl, author, files, max_depth) cl_infos = LoadSplittingFromFile(from_file, files_on_disk=files)
cl_infos = CLInfoFromFilesAndOwnersDirectoriesDict( else:
files_split_by_reviewers) files_split_by_reviewers = SelectReviewersForFiles(
cl, author, files, max_depth)
cl_infos = CLInfoFromFilesAndOwnersDirectoriesDict(
files_split_by_reviewers)
if not dry_run: if not dry_run:
PrintSummary(cl_infos, refactor_branch) PrintSummary(cl_infos, refactor_branch)
answer = gclient_utils.AskForData('Proceed? (y/N):') answer = gclient_utils.AskForData(
if answer.lower() != 'y': 'Proceed? (y/N, or i to edit interactively): ')
return 0 if answer.lower() == 'i':
cl_infos = EditSplittingInteractively(cl_infos,
files_on_disk=files)
else:
# Save even if we're continuing, so the user can safely resume an
# aborted upload with the same splitting
SaveSplittingToTempFile(cl_infos)
if answer.lower() != 'y':
return 0
cls_per_reviewer = collections.defaultdict(int) cls_per_reviewer = collections.defaultdict(int)
for cl_index, cl_info in enumerate(cl_infos, 1): for cl_index, cl_info in enumerate(cl_infos, 1):
@ -431,6 +445,11 @@ def SplitCl(description_file, comment_file, changelist, cmd_upload, dry_run,
for reviewer, count in reviewer_rankings[:CL_SPLIT_TOP_REVIEWERS]: for reviewer, count in reviewer_rankings[:CL_SPLIT_TOP_REVIEWERS]:
print(f' {reviewer}: {count} CLs') print(f' {reviewer}: {count} CLs')
if dry_run:
# Wait until now to save the splitting so the file name doesn't get
# washed away by the flood of dry-run printing.
SaveSplittingToTempFile(cl_infos)
# Go back to the original branch. # Go back to the original branch.
git.run('checkout', refactor_branch) git.run('checkout', refactor_branch)
@ -489,3 +508,217 @@ def SelectReviewersForFiles(cl, author, files, max_depth):
info_split_by_reviewers[reviewers].owners_directories.append(directory) info_split_by_reviewers[reviewers].owners_directories.append(directory)
return info_split_by_reviewers return info_split_by_reviewers
def SaveSplittingToFile(cl_infos: List[CLInfo], filename: str, silent=False):
"""
Writes the listed CLs to the designated file, in a human-readable and
editable format. Include an explanation of the file format at the top,
as well as instructions for how to use it.
"""
preamble = (
"# CLs in this file must have the following format:\n"
"# A 'Reviewers: [...]' line, where '...' is a (possibly empty) list "
"of reviewer emails.\n"
"# A 'Description: ...' line, where '...' is any string (by default, "
"the list of directories the files have been pulled from).\n"
"# One or more file lines, consisting of an <action>, <file> pair, in "
"the format output by `git status`.\n\n"
"# Each 'Reviewers' line begins a new CL.\n"
"# To use the splitting in this file, use the --from-file option.\n\n")
cl_string = "\n\n".join([info.FormatForPrinting() for info in cl_infos])
gclient_utils.FileWrite(filename, preamble + cl_string)
if not silent:
print(f"Saved splitting to {filename}")
def SaveSplittingToTempFile(cl_infos: List[CLInfo], silent=False):
"""
Create a file in the user's temp directory, and save the splitting there.
"""
# We can't use gclient_utils.temporary_file because it will be removed
temp_file, temp_name = tempfile.mkstemp(prefix="split_cl_")
os.close(temp_file) # Necessary for windows
SaveSplittingToFile(cl_infos, temp_name, silent)
return temp_name
class ClSplitParseError(Exception):
pass
# Matches 'Reviewers: [...]', extracts the ...
reviewers_re = re.compile(r'Reviewers:\s*\[([^\]]*)\]')
# Matches 'Description: ...', extracts the ...
description_re = re.compile(r'Description:\s*(.+)')
# Matches '<action>, <file>', and extracts both
# <action> must be a valid code (either 1 or 2 letters)
file_re = re.compile(r'([MTADRC]{1,2}),\s*(.+)')
# TODO(crbug.com/389069356): Replace the "Description" line with an optional
# "Description" line, and adjust the description variables accordingly, as well
# as all the places in the code that expect to get a directory list.
# We use regex parsing instead of e.g. json because it lets us use a much more
# human-readable format, similar to the summary printed in dry runs
def ParseSplittings(lines: List[str]) -> List[CLInfo]:
"""
Parse a splitting file. We expect to get a series of lines in the format
of CLInfo.FormatForPrinting. In the following order, we expect to see
- A 'Reviewers: ' line containing a list,
- A 'Description: ' line containing anything, and
- A list of <action>, <path> pairs, each on its own line
Note that this function only transforms the file into a list of CLInfo
(if possible). It does not validate the information; for that, see
ValidateSplitting.
"""
cl_infos = []
current_cl_info = None
for line in lines:
line = line.strip()
# Skip empty or commented lines
if not line or line.startswith('#'):
continue
# Start a new CL whenever we see a new Reviewers: line
m = re.fullmatch(reviewers_re, line)
if m:
reviewers_str = m.group(1)
reviewers = [r.strip() for r in reviewers_str.split(",")]
# Account for empty list or trailing comma
if not reviewers[-1]:
reviewers = reviewers[:-1]
if current_cl_info:
cl_infos.append(current_cl_info)
current_cl_info = CLInfo(reviewers=reviewers)
continue
if not current_cl_info:
# Make sure no nonempty lines appear before the first CL
raise ClSplitParseError(
f"Error: Line appears before the first 'Reviewers: ' line:\n{line}"
)
# Description is just used as a description, so any string is fine
m = re.fullmatch(description_re, line)
if m:
if current_cl_info.directories:
raise ClSplitParseError(
f"Error parsing line: CL already has a directories entry\n{line}"
)
current_cl_info.directories = m.group(1).strip()
continue
# Any other line is presumed to be an '<action>, <file>' pair
m = re.fullmatch(file_re, line)
if m:
action, path = m.groups()
current_cl_info.files.append((action, path))
continue
raise ClSplitParseError("Error parsing line: Does not look like\n"
"'Reviewers: [...]',\n"
"'Description: ...', or\n"
f"a pair of '<action>, <file>':\n{line}")
if (current_cl_info):
cl_infos.append(current_cl_info)
return cl_infos
def ValidateSplitting(cl_infos: List[CLInfo], filename: str,
files_on_disk: List[Tuple[str, str]]):
"""
Ensure that the provided list of CLs is a valid splitting.
Specifically, check that:
- Each file is in at most one CL
- Each file and action appear in the list of changed files reported by git
- Warn if some files don't appear in any CL
- Warn if a reviewer string looks wrong, or if a CL is empty
"""
# Validate the parsed information
if not cl_infos:
EmitWarning("No CLs listed in file. No action will be taken.")
return []
files_in_loaded_cls = set()
# Collect all files, ensuring no duplicates
# Warn on empty CLs or invalid reviewer strings
for info in cl_infos:
if not info.files:
EmitWarning("CL has no files, and will be skipped:\n",
info.FormatForPrinting())
for file_info in info.files:
if file_info in files_in_loaded_cls:
raise ClSplitParseError(
f"File appears in multiple CLs in {filename}:\n{file_info}")
files_in_loaded_cls.add(file_info)
for reviewer in info.reviewers:
if not (re.fullmatch(r"[^@]+@[^.]+\..+", reviewer)):
EmitWarning("reviewer does not look like an email address: ",
reviewer)
# Strip empty CLs
cl_infos = [info for info in cl_infos if info.files]
# Ensure the files in the user-provided CL splitting match the files
# that git reports.
# Warn if not all the files git reports appear.
# Fail if the user mentions a file that isn't reported by git
files_on_disk = set(files_on_disk)
if not files_in_loaded_cls.issubset(files_on_disk):
extra_files = files_in_loaded_cls.difference(files_on_disk)
extra_files_str = "\n".join(f"{action}, {file}"
for (action, file) in extra_files)
raise ClSplitParseError(
f"Some files are listed in {filename} but do not match any files "
f"listed by git:\n{extra_files_str}")
unmentioned_files = files_on_disk.difference(files_in_loaded_cls)
if (unmentioned_files):
EmitWarning(
"the following files are not included in any CL in {filename}. "
"They will not be uploaded:\n", unmentioned_files)
def LoadSplittingFromFile(filename: str,
files_on_disk: List[Tuple[str, str]]) -> List[CLInfo]:
"""
Given a file and the list of <action>, <file> pairs reported by git,
read the file and return the list of CLInfos it contains.
"""
lines = gclient_utils.FileRead(filename).splitlines()
cl_infos = ParseSplittings(lines)
ValidateSplitting(cl_infos, filename, files_on_disk)
return cl_infos
def EditSplittingInteractively(
cl_infos: List[CLInfo],
files_on_disk: List[Tuple[str, str]]) -> List[CLInfo]:
"""
Allow the user to edit the generated splitting using their default editor.
Make sure the edited splitting is saved so they can retrieve it if needed.
"""
tmp_file = SaveSplittingToTempFile(cl_infos, silent=True)
splitting = gclient_utils.RunEditor(gclient_utils.FileRead(tmp_file), False)
cl_infos = ParseSplittings(splitting.splitlines())
# Save the edited splitting before validation, so the user can go back
# and edit it if there are any typos
SaveSplittingToFile(cl_infos, tmp_file)
ValidateSplitting(cl_infos, "the provided splitting", files_on_disk)
return cl_infos

@ -0,0 +1,12 @@
# CLs in this file must have the following format:
# A 'Reviewers: [...]' line, where '...' is a (possibly empty) list of reviewer emails.
# A 'Description: ...' line, where '...' is any string (by default, the list of directories the files have been pulled from).
# One or more file lines, consisting of an <action>, <file> pair, in the format output by `git status`.
# Each 'Reviewers' line begins a new CL.
# To use the splitting in this file, use the --from-file option.
Reviewers: [a@example.com]
Description: ['chrome/browser']
M, chrome/browser/a.cc
M, chrome/browser/b.cc

@ -0,0 +1,18 @@
# CLs in this file must have the following format:
# A 'Reviewers: [...]' line, where '...' is a (possibly empty) list of reviewer emails.
# A 'Description: ...' line, where '...' is any string (by default, the list of directories the files have been pulled from).
# One or more file lines, consisting of an <action>, <file> pair, in the format output by `git status`.
# Each 'Reviewers' line begins a new CL.
# To use the splitting in this file, use the --from-file option.
Reviewers: [a@example.com]
Description: ['chrome/browser']
M, chrome/browser/a.cc
M, chrome/browser/b.cc
Reviewers: [a@example.com, b@example.com]
Description: ['foo', 'bar/baz']
M, foo/browser/a.cc
M, bar/baz/b.cc
D, foo/bar/c.h

@ -0,0 +1,21 @@
Reviewers: [a@example.com]
Description: ['chrome/browser']
M, chrome/browser/a.cc
M, chrome/browser/b.cc
Reviewers: [a@example.com, b@example.com, ]
M, foo/browser/a.cc
Description: ['foo', 'bar/baz']
M, bar/baz/b.cc
D, foo/bar/c.h
Reviewers: []
Description: Custom string
A, a/b/c
Reviewers: [ ]
Description: Custom string
A, a/b/d
D, a/e

@ -0,0 +1,5 @@
Reviewers: [a@example.com]
Description: Whatever
M, a.cc
Reviewers: b@example.com

@ -0,0 +1,4 @@
Description: ['chrome/browser']
Reviewers: [a@example.com]
M, chrome/browser/a.cc
M, chrome/browser/b.cc

@ -0,0 +1,4 @@
Reviewers: [a@example.com]
Description: foo
Description: bar
M, a.cc

@ -0,0 +1,3 @@
Reviewers: [a@example.com]
Description: foo
a.cc

@ -0,0 +1,10 @@
Reviewers: [a@example.com]
Description: ['chrome/browser']
M, chrome/browser/a.cc
M, chrome/browser/b.cc
Reviewers: [a@example.com, b@example.com]
Description: ['foo', 'bar/baz']
M, chrome/browser/b.cc
M, bar/baz/b.cc
D, foo/bar/c.h

@ -0,0 +1,8 @@
Reviewers: [a@example.com]
Description: ['chrome/browser']
M, chrome/browser/a.cc
M, chrome/browser/b.cc
Reviewers: [b@example.com]
Description: ['chrome/']
A, c.h

@ -0,0 +1,3 @@
Reviewers: [John, Jane]
Description: Whatever
M, a.cc

@ -9,10 +9,21 @@ from unittest import mock
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import split_cl import split_cl
import gclient_utils
class SplitClTest(unittest.TestCase): class SplitClTest(unittest.TestCase):
@property
def _input_dir(self):
base = os.path.splitext(os.path.abspath(__file__))[0]
# Here _testMethodName is a string like "testCmdAssemblyFound"
# If the test doesn't have its own subdirectory, it uses a common one
path = os.path.join(base + ".inputs", self._testMethodName)
if not os.path.isdir(path):
path = os.path.join(base + ".inputs", "commonFiles")
return path
def testAddUploadedByGitClSplitToDescription(self): def testAddUploadedByGitClSplitToDescription(self):
description = """Convert use of X to Y in $directory description = """Convert use of X to Y in $directory
@ -244,6 +255,8 @@ class SplitClTest(unittest.TestCase):
self.mock_print_cl_info = self.StartPatcher("split_cl.PrintClInfo", self.mock_print_cl_info = self.StartPatcher("split_cl.PrintClInfo",
test) test)
self.mock_upload_cl = self.StartPatcher("split_cl.UploadCl", test) self.mock_upload_cl = self.StartPatcher("split_cl.UploadCl", test)
self.mock_save_splitting = self.StartPatcher(
"split_cl.SaveSplittingToTempFile", test)
# Suppress output for cleaner tests # Suppress output for cleaner tests
self.mock_print = self.StartPatcher("builtins.print", test) self.mock_print = self.StartPatcher("builtins.print", test)
@ -269,8 +282,8 @@ class SplitClTest(unittest.TestCase):
self.mock_get_reviewers.return_value = files_split_by_reviewers self.mock_get_reviewers.return_value = files_split_by_reviewers
self.mock_ask_for_data.return_value = proceed_response self.mock_ask_for_data.return_value = proceed_response
split_cl.SplitCl(description_file, None, mock.Mock(), None, dry_run, split_cl.SplitCl(description_file, None, mock.Mock(), mock.Mock(),
False, False, None, None, None) dry_run, False, False, None, None, None, None)
def testSplitClConfirm(self): def testSplitClConfirm(self):
split_cl_tester = self.SplitClTester(self) split_cl_tester = self.SplitClTester(self)
@ -315,6 +328,143 @@ class SplitClTest(unittest.TestCase):
len(files_split_by_reviewers)) len(files_split_by_reviewers))
split_cl_tester.mock_upload_cl.assert_not_called() split_cl_tester.mock_upload_cl.assert_not_called()
# Tests related to saving to and loading from files
# Sample CLInfos for testing
CLInfo_1 = split_cl.CLInfo(reviewers=["a@example.com"],
directories="['chrome/browser']",
files=[
("M", "chrome/browser/a.cc"),
("M", "chrome/browser/b.cc"),
])
CLInfo_2 = split_cl.CLInfo(reviewers=["a@example.com", "b@example.com"],
directories="['foo', 'bar/baz']",
files=[("M", "foo/browser/a.cc"),
("M", "bar/baz/b.cc"),
("D", "foo/bar/c.h")])
def testCLInfoFormat(self):
""" Make sure CLInfo printing works as expected """
def ReadAndStripPreamble(file):
""" Read the contents of a file and strip the automatically-added
preamble so we can do string comparison
"""
content = gclient_utils.FileRead(os.path.join(
self._input_dir, file))
# Strip preamble
stripped = [
line for line in content.splitlines()
if not line.startswith("#")
]
# Strip newlines in preamble
return "\n".join(stripped[2:])
# Direct string comparison
self.assertEqual(self.CLInfo_1.FormatForPrinting(),
ReadAndStripPreamble("1_cl.txt"))
self.assertEqual(
self.CLInfo_1.FormatForPrinting() +
"\n\n" + self.CLInfo_2.FormatForPrinting(),
ReadAndStripPreamble("2_cls.txt"))
@mock.patch("split_cl.EmitWarning")
def testParseCLInfo(self, mock_emit_warning):
""" Make sure we can parse valid files """
self.assertEqual([self.CLInfo_1],
split_cl.LoadSplittingFromFile(
os.path.join(self._input_dir, "1_cl.txt"),
self.CLInfo_1.files))
self.assertEqual([self.CLInfo_1, self.CLInfo_2],
split_cl.LoadSplittingFromFile(
os.path.join(self._input_dir, "2_cls.txt"),
self.CLInfo_1.files + self.CLInfo_2.files))
# Make sure everything in this file is valid to parse
split_cl.LoadSplittingFromFile(
os.path.join(self._input_dir, "odd_formatting.txt"),
self.CLInfo_1.files + self.CLInfo_2.files + [("A", "a/b/c"),
("A", "a/b/d"),
("D", "a/e")])
mock_emit_warning.assert_not_called()
def testParseBadFiles(self):
""" Make sure we don't parse invalid files """
for file in os.listdir(self._input_dir):
lines = gclient_utils.FileRead(os.path.join(self._input_dir,
file)).splitlines()
self.assertRaises(split_cl.ClSplitParseError,
split_cl.ParseSplittings, lines)
@mock.patch("split_cl.EmitWarning")
def testValidateBadFiles(self, mock_emit_warning):
""" Make sure we reject invalid CL lists """
# Warn on an empty file
split_cl.LoadSplittingFromFile(
os.path.join(self._input_dir, "warn_0_cls.txt"), [])
mock_emit_warning.assert_called_once()
mock_emit_warning.reset_mock()
# Warn if reviewers don't look like emails
split_cl.LoadSplittingFromFile(
os.path.join(self._input_dir, "warn_bad_reviewer_email.txt"),
[("M", "a.cc")])
self.assertEqual(mock_emit_warning.call_count, 2)
mock_emit_warning.reset_mock()
# Fail if a file appears in multiple CLs
self.assertRaises(
split_cl.ClSplitParseError, split_cl.LoadSplittingFromFile,
os.path.join(self._input_dir, "error_file_in_multiple_cls.txt"),
[("M", "chrome/browser/a.cc"), ("M", "chrome/browser/b.cc"),
("M", "bar/baz/b.cc"), ("D", "foo/bar/c.h")])
# Fail if a file is listed that doesn't appear on disk
self.assertRaises(
split_cl.ClSplitParseError, split_cl.LoadSplittingFromFile,
os.path.join(self._input_dir, "no_inherent_problems.txt"),
[("M", "chrome/browser/a.cc"), ("M", "chrome/browser/b.cc")])
self.assertRaises(
split_cl.ClSplitParseError,
split_cl.LoadSplittingFromFile,
os.path.join(self._input_dir, "no_inherent_problems.txt"),
[
("M", "chrome/browser/a.cc"),
("M", "chrome/browser/b.cc"),
("D", "c.h") # Wrong action, should still error
])
# Warn if not all files on disk are included
split_cl.LoadSplittingFromFile(
os.path.join(self._input_dir, "no_inherent_problems.txt"),
[("M", "chrome/browser/a.cc"), ("M", "chrome/browser/b.cc"),
("A", "c.h"), ("D", "d.h")])
mock_emit_warning.assert_called_once()
@mock.patch("builtins.print")
@mock.patch("gclient_utils.FileWrite")
def testParsingRoundTrip(self, mock_file_write, _):
""" Make sure that if we parse a file and save the result,
we get the same file. Only works on test files that are
nicely formatted. """
for file in os.listdir(self._input_dir):
if file == "odd_formatting.txt":
continue
contents = gclient_utils.FileRead(
os.path.join(self._input_dir, file))
parsed_contents = split_cl.ParseSplittings(contents.splitlines())
split_cl.SaveSplittingToFile(parsed_contents, "file.txt")
written_lines = [
args[0][1] for args in mock_file_write.call_args_list
]
self.assertEqual(contents, "".join(written_lines))
mock_file_write.reset_mock()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

Loading…
Cancel
Save