You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
depot_tools/split_cl.py

397 lines
14 KiB
Python

#!/usr/bin/env python
# Copyright 2017 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Splits a branch into smaller branches and uploads CLs."""
from __future__ import print_function
import collections
import os
import random
import re
import subprocess2
import sys
import tempfile
import git_footers
import owners
import owners_finder
import git_common as git
import third_party.pygtrie as trie
# If a call to `git cl split` will generate more than this number of CLs, the
# command will prompt the user to make sure they know what they're doing. Large
# numbers of CLs generated by `git cl split` have caused infrastructure issues
# in the past.
CL_SPLIT_FORCE_LIMIT = 10
def ReadFile(file_path):
"""Returns the content of |file_path|."""
with open(file_path) as f:
content = f.read()
return content
def EnsureInGitRepository():
"""Throws an exception if the current directory is not a git repository."""
git.run('rev-parse')
def CreateBranchForDirectory(prefix, cl_index, directory, upstream):
"""Creates a branch named |prefix| + "_" + |cl_index| + "_" + |directory|.
Return false if the branch already exists. |upstream| is used as upstream for
the created branch.
"""
existing_branches = set(git.branches(use_limit = False))
branch_name = '_'.join([prefix, str(cl_index), directory])
if branch_name in existing_branches:
return False
git.run('checkout', '-t', upstream, '-b', branch_name)
return True
def FormatDescriptionOrComment(txt, directory, cl_index, num_cls):
"""Replaces $directory with |directory|, $cl_index with |cl_index|, and
$num_cls with |num_cls| in |txt|."""
return txt.replace('$directory', '/' + directory).replace(
'$cl_index', str(cl_index)).replace('$num_cls', str(num_cls))
def AddUploadedByGitClSplitToDescription(description):
"""Adds a 'This CL was uploaded by git cl split.' line to |description|.
The line is added before footers, or at the end of |description| if it has no
footers.
"""
split_footers = git_footers.split_footers(description)
lines = split_footers[0]
if not lines[-1] or lines[-1].isspace():
lines = lines + ['']
lines = lines + ['This CL was uploaded by git cl split.']
if split_footers[1]:
lines += [''] + split_footers[1]
return '\n'.join(lines)
def UploadCl(cl_index, num_cls, refactor_branch, refactor_branch_upstream,
directory, files, description, comment, reviewer, changelist,
cmd_upload, cq_dry_run, enable_auto_submit):
"""Uploads a CL with all changes to |files| in |refactor_branch|.
Args:
cl_index: The index of this CL in the list of CLs to upload.
num_cls: The total number of CLs that will be uploaded.
refactor_branch: Name of the branch that contains the changes to upload.
refactor_branch_upstream: Name of the upstream of |refactor_branch|.
directory: Path to the directory that contains the OWNERS file for which
to upload a CL.
files: List of AffectedFile instances to include in the uploaded CL.
description: Description of the uploaded CL.
comment: Comment to post on the uploaded CL.
reviewer: The reviewer for the CL.
changelist: The Changelist class.
cmd_upload: The function associated with the git cl upload command.
cq_dry_run: If CL uploads should also do a cq dry run.
enable_auto_submit: If CL uploads should also enable auto submit.
"""
# Create a branch.
if not CreateBranchForDirectory(refactor_branch, cl_index, directory,
refactor_branch_upstream):
print('Skipping CL ' + cl_index + ' for directory "' + directory +
'" for which a branch already exists.')
return
# Checkout all changes to files in |files|.
deleted_files = [f.AbsoluteLocalPath() for f in files if f.Action() == 'D']
if deleted_files:
git.run(*['rm'] + deleted_files)
modified_files = [f.AbsoluteLocalPath() for f in files if f.Action() != 'D']
if modified_files:
git.run(*['checkout', refactor_branch, '--'] + modified_files)
# Commit changes. The temporary file is created with delete=False so that it
# can be deleted manually after git has read it rather than automatically
# when it is closed.
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
tmp_file.write(
FormatDescriptionOrComment(description, directory, cl_index, num_cls))
# Close the file to let git open it at the next line.
tmp_file.close()
git.run('commit', '-F', tmp_file.name)
os.remove(tmp_file.name)
# Upload a CL.
upload_args = ['-f', '-r', reviewer]
if cq_dry_run:
upload_args.append('--cq-dry-run')
if not comment:
upload_args.append('--send-mail')
if enable_auto_submit:
upload_args.append('--enable-auto-submit')
print('Uploading CL for ' + directory + '.')
cmd_upload(upload_args)
if comment:
changelist().AddComment(
FormatDescriptionOrComment(comment, directory, cl_index, num_cls),
publish=True)
class ChangeList(object):
"""Representation of a CL and the files affected by it."""
def __init__(self, path, owners_db, author, files):
self._path = path
self._files = files
self._owners_db = owners_db
self._author = author
self._owners = None
def _EnsureOwners(self):
if not self._owners:
self._owners = set()
files = [f.LocalPath() for f in self.GetFiles()]
if not files:
files = [self.GetPath()]
possible_owners = self._owners_db.all_possible_owners(
files, self._author).keys()
for owner in possible_owners:
if 0 == len(self._owners_db.files_not_covered_by(files, [owner])):
self._owners |= set([owner])
assert len(self._owners)
def Merge(self, other):
self._owners = self.GetCommonOwners(other)
self._files |= other.GetFiles()
def GetPath(self):
return self._path
def GetFiles(self):
return self._files
def GetOwners(self):
self._EnsureOwners()
return self._owners
def GetCommonOwners(self, other):
return self.GetOwners() & other.GetOwners()
def HaveCommonOwners(self, other):
return len(self.GetCommonOwners(other)) > 0
def GetChangeSizeInBytes(self):
return sum(
[c[0] + c[1] for f in self._files for c in f.ChangeSizeInBytes()])
def SplitCLs(owners_database, author, files):
"""Returns a map of files split by OWNERS file.
Returns:
A map where keys are paths to directories containing an OWNERS file and
values are lists of files sharing an OWNERS file.
"""
# The target CL size in # of changed bytes.
# TODO(yannic): Use # of changed lines instead and make this configurable.
max_cl_size = 1000
candidates = trie.Trie()
# Enable sorting so dry-run will split the CL the same way the CL is uploaded.
candidates.enable_sorting()
# 1. Create one CL candidate for every affected file.
for f in files:
path = f.LocalPath()
candidates[path] = ChangeList(path, owners_database, author, set([f]))
change_lists = []
# 2. Try to merge CL in common directories up to a maximum size of
# |max_cl_size|.
# This is O( len(files) * max([len(f.path) for f in files]) ).
edited = True
while edited:
edited = False
# 2.1. Iterate over all candidates and merge candidates into the candidate
# for their parent directory if the resulting CL doesn't exceed
# |max_cl_size|.
for item in candidates.items():
path = ''.join(item[0])
candidate = item[1]
# The number of CL candidates in subdirectories is equivalent to the
# number of nodes with prefix |path| in the Trie.
# Only try to merge |candidate| with the candidate for the parent
# directory if there are no more CLs for subdirectories.
sub_cls = len([''.join(k) for k in candidates.keys(path)]) - 1
if not sub_cls:
parent_path = os.path.dirname(path)
if len(parent_path) < 1:
# Don't create CLs for more than one top-level directory.
continue
if parent_path not in candidates:
candidates[parent_path] = ChangeList(parent_path, owners_database,
author, set())
parent_cl = candidates[parent_path]
if not parent_cl.HaveCommonOwners(candidate):
# Don't merge if the resulting CL would need more than one reviewer.
continue
# Merge |candidate| into the CL for it's parent directory and remove
# candidate.
edited = True
del candidates[path]
parent_cl.Merge(candidate)
# Add |parent_cl| to list of CLs to submit if the CL is larger than
# |max_cl_size|.
# TODO(yannic): Doing it this way, we might end up with CLs of size
# 2 * max_cl_size if we merged two candidates that just don't exceed
# the maximal size.
if parent_cl.GetChangeSizeInBytes() > max_cl_size:
change_lists.append(parent_cl)
del candidates[parent_path]
# 3. Add all remaining candidates to the list of CLs.
for item in candidates.items():
change_lists.append(item[1])
return change_lists
def PrintClInfo(cl_index, num_cls, directory, file_paths, description,
reviewer):
"""Prints info about a CL.
Args:
cl_index: The index of this CL in the list of CLs to upload.
num_cls: The total number of CLs that will be uploaded.
directory: Path to the directory that contains the OWNERS file for which
to upload a CL.
file_paths: A list of files in this CL.
description: The CL description.
reviewer: The reviewer for this CL.
"""
description_lines = FormatDescriptionOrComment(
description, directory, cl_index, num_cls).splitlines()
indented_description = '\n'.join([' ' + l for l in description_lines])
print('CL {}/{}'.format(cl_index, num_cls))
print('Path: {}'.format(directory))
print('Reviewers: {}'.format(reviewer))
print('\n' + indented_description + '\n')
print('\n'.join(file_paths))
print()
def _SelectReviewer(possible_owners, used_reviewers):
"""Select a reviewer from |owners| and adds them to the set of used reviewers.
Returns:
The reviewer.
"""
# It's debatable whether we want to avoid reusing reviewers. It could be
# easier to ask the smallest possible amount of reviewers to become familiar
# with the change being split. However, doing so would mean we send all CLs to
# top-level owners, which might be too much to ask from them.
# We may revisit this decicion later.
unused_reviewers = possible_owners.difference(used_reviewers)
if len(unused_reviewers) < 1:
unused_reviewers = possible_owners
# Pick a random reviwer from the set of owners so we don't prefer owners
# with emails of low lexical order.
reviewer = random.choice(tuple(unused_reviewers))
used_reviewers.add(reviewer)
return reviewer
def SplitCl(description_file, comment_file, changelist, cmd_upload, dry_run,
cq_dry_run, enable_auto_submit):
""""Splits a branch into smaller branches and uploads CLs.
Args:
description_file: File containing the description of uploaded CLs.
comment_file: File containing the comment of uploaded CLs.
changelist: The Changelist class.
cmd_upload: The function associated with the git cl upload command.
dry_run: Whether this is a dry run (no branches or CLs created).
cq_dry_run: If CL uploads should also do a cq dry run.
enable_auto_submit: If CL uploads should also enable auto submit.
Returns:
0 in case of success. 1 in case of error.
"""
description = AddUploadedByGitClSplitToDescription(ReadFile(description_file))
comment = ReadFile(comment_file) if comment_file else None
try:
EnsureInGitRepository()
cl = changelist()
change = cl.GetChange(cl.GetCommonAncestorWithUpstream())
files = change.AffectedFiles()
if not files:
print('Cannot split an empty CL.')
return 1
author = git.run('config', 'user.email').strip() or None
refactor_branch = git.current_branch()
assert refactor_branch, "Can't run from detached branch."
refactor_branch_upstream = git.upstream(refactor_branch)
assert refactor_branch_upstream, \
"Branch %s must have an upstream." % refactor_branch
owners_database = owners.Database(change.RepositoryRoot(), file, os.path)
owners_database.load_data_needed_for([f.LocalPath() for f in files])
change_lists = SplitCLs(owners_database, author, set(files))
num_cls = len(change_lists)
if cq_dry_run and num_cls > CL_SPLIT_FORCE_LIMIT:
print(
'This will generate "%r" CLs. This many CLs can potentially generate'
' too much load on the build infrastructure. Please email'
' infra-dev@chromium.org to ensure that this won\'t break anything.'
' The infra team reserves the right to cancel your jobs if they are'
' overloading the CQ.' % num_cls)
answer = raw_input('Proceed? (y/n):')
if answer.lower() != 'y':
return 0
reviewers = set()
for cl_index, cl in enumerate(change_lists, 1):
# Use '/' as a path separator in the branch name and the CL description
# and comment.
directory = cl.GetPath().replace(os.path.sep, '/')
file_paths = [f.LocalPath() for f in cl.GetFiles()]
reviewer = _SelectReviewer(cl.GetOwners(), reviewers)
if dry_run:
PrintClInfo(cl_index, num_cls, directory, file_paths, description,
reviewer)
else:
UploadCl(cl_index, num_cls, refactor_branch, refactor_branch_upstream,
directory, files, description, comment, reviewer, changelist,
cmd_upload, cq_dry_run, enable_auto_submit)
# Go back to the original branch.
git.run('checkout', refactor_branch)
except subprocess2.CalledProcessError as cpe:
sys.stderr.write(cpe.stderr)
return 1
return 0