From 367c6f720da20e9eb848ce86039138ddca942736 Mon Sep 17 00:00:00 2001 From: Patrick Monette Date: Tue, 2 Jul 2024 15:03:58 +0000 Subject: [PATCH] Add git squash-branch-tree This script allows squashing a branch and all of its downstream branches in one command. This is useful for squashing stacked branches that have a lot of commits each just before using `git rebase-update`, as this is highly likely to run into a lot of rebase conflicts, repeatedly. This should only be used if the user is okay with losing their git commit history. Change-Id: Id28a941d898def73f785d791499c7ee0784e0136 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5659262 Reviewed-by: Gavin Mak Reviewed-by: Josip Sokcevic Commit-Queue: Patrick Monette --- git-squash-branch-tree | 6 ++ git_squash_branch_tree.py | 132 +++++++++++++++++++++++++++ testing_support/git_test_utils.py | 5 +- tests/git_squash_branch_tree_test.py | 130 ++++++++++++++++++++++++++ 4 files changed, 272 insertions(+), 1 deletion(-) create mode 100755 git-squash-branch-tree create mode 100755 git_squash_branch_tree.py create mode 100755 tests/git_squash_branch_tree_test.py diff --git a/git-squash-branch-tree b/git-squash-branch-tree new file mode 100755 index 000000000..947be7eec --- /dev/null +++ b/git-squash-branch-tree @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +# Copyright 2024 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +. "$(type -P python_runner.sh)" diff --git a/git_squash_branch_tree.py b/git_squash_branch_tree.py new file mode 100755 index 000000000..724f7ff5b --- /dev/null +++ b/git_squash_branch_tree.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +# Copyright 2024 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +''' +Tool to squash all branches and their downstream branches. Useful to avoid +potential conflicts during a git rebase-update with multiple stacked CLs. +''' + +import argparse +import collections +import git_common as git +import sys + + +# Returns the list of branches that have diverged from their respective upstream +# branch. +def get_diverged_branches(tree): + diverged_branches = [] + for branch, upstream_branch in tree.items(): + # If the merge base of a branch and its upstream is not equal to the + # upstream, then it means that both branch diverged. + upstream_branch_hash = git.hash_one(upstream_branch) + merge_base_hash = git.hash_one(git.get_or_create_merge_base(branch)) + if upstream_branch_hash != merge_base_hash: + diverged_branches.append(branch) + return diverged_branches + + +# Returns a dictionary that contains the hash of every branch before the +# squashing started. +def get_initial_hashes(tree): + initial_hashes = {} + for branch, upstream_branch in tree.items(): + initial_hashes[branch] = git.hash_one(branch) + initial_hashes[upstream_branch] = git.hash_one(upstream_branch) + return initial_hashes + + +# Returns a dictionary that contains the downstream branches of every branch. +def get_downstream_branches(tree): + downstream_branches = collections.defaultdict(list) + for branch, upstream_branch in tree.items(): + downstream_branches[upstream_branch].append(branch) + return downstream_branches + + +# Squash a branch, taking care to rebase the branch on top of the new commit +# position of its upstream branch. +def squash_branch(branch, initial_hashes): + print('Squashing branch %s.' % branch) + assert initial_hashes[branch] == git.hash_one(branch) + + upstream_branch = git.upstream(branch) + old_upstream_branch = initial_hashes[upstream_branch] + + # Because the branch's upstream has potentially changed from squashing it, + # the current branch is rebased on top of the new upstream. + git.run('rebase', '--onto', upstream_branch, old_upstream_branch, branch, + '--update-refs') + + # Now do the squashing. + git.run('checkout', branch) + git.squash_current_branch() + + +# Squashes all branches that are part of the subtree starting at `branch`. +def squash_subtree(branch, initial_hashes, downstream_branches): + # The upstream default never has to be squashed (e.g. origin/main). + if branch != git.upstream_default(): + squash_branch(branch, initial_hashes) + + # Recurse on downstream branches, if any. + for downstream_branch in downstream_branches[branch]: + squash_subtree(downstream_branch, initial_hashes, downstream_branches) + + +def main(args=None): + parser = argparse.ArgumentParser() + parser.add_argument('--ignore-no-upstream', + action='store_true', + help='Allows proceeding if any branch has no ' + 'upstreams.') + parser.add_argument('--branch', + '-b', + type=str, + default=git.current_branch(), + help='The name of the branch who\'s subtree must be ' + 'squashed. Defaults to the current branch.') + opts = parser.parse_args(args) + + if git.is_dirty_git_tree('squash-branch-tree'): + return 1 + + branches_without_upstream, tree = git.get_branch_tree() + + if not opts.ignore_no_upstream and branches_without_upstream: + print('Cannot use `git squash-branch-tree` since the following\n' + 'branches don\'t have an upstream:') + for branch in branches_without_upstream: + print(f' - {branch}') + print('Use --ignore-no-upstream to ignore this check and proceed.') + return 1 + + diverged_branches = get_diverged_branches(tree) + if diverged_branches: + print('Cannot use `git squash-branch-tree` since the following\n' + 'branches have diverged from their upstream and could cause\n' + 'conflicts:') + for diverged_branch in diverged_branches: + print(f' - {diverged_branch}') + return 1 + + # Before doing the squashing, save the current branch checked out branch so + # we can go back to it at the end. + return_branch = git.current_branch() + + initial_hashes = get_initial_hashes(tree) + downstream_branches = get_downstream_branches(tree) + squash_subtree(opts.branch, initial_hashes, downstream_branches) + + git.run('checkout', return_branch) + + return 0 + + +if __name__ == '__main__': # pragma: no cover + try: + sys.exit(main(sys.argv[1:])) + except KeyboardInterrupt: + sys.stderr.write('interrupted\n') + sys.exit(1) diff --git a/testing_support/git_test_utils.py b/testing_support/git_test_utils.py index e398ae342..5b8b6aedf 100644 --- a/testing_support/git_test_utils.py +++ b/testing_support/git_test_utils.py @@ -289,6 +289,8 @@ class GitRepo(object): Args: schema - An instance of GitRepoSchema """ + self.last_commit = None + self.repo_path = os.path.realpath( tempfile.mkdtemp(dir=self.BASE_TEMP_DIR)) self.commit_map = {} @@ -521,7 +523,8 @@ class GitRepoReadOnlyTestBase(GitRepoSchemaTestBase): cls.repo = cls.r_schema.reify() def setUp(self): - self.repo.git('checkout', '-f', self.repo.last_commit) + if self.repo.last_commit is not None: + self.repo.git('checkout', '-f', self.repo.last_commit) @classmethod def tearDownClass(cls): diff --git a/tests/git_squash_branch_tree_test.py b/tests/git_squash_branch_tree_test.py new file mode 100755 index 000000000..d04642aed --- /dev/null +++ b/tests/git_squash_branch_tree_test.py @@ -0,0 +1,130 @@ +#!/usr/bin/env vpython3 +# coding=utf-8 +# Copyright 2024 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Tests for git_squash_branch_tree.""" + +import os +import sys +import unittest + +DEPOT_TOOLS_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, DEPOT_TOOLS_ROOT) + +from testing_support import git_test_utils + +import git_squash_branch_tree +import git_common + +git_common.TEST_MODE = True + + +class GitSquashBranchTreeTest(git_test_utils.GitRepoReadWriteTestBase): + # Empty repo. + REPO_SCHEMA = """ + """ + + def setUp(self): + super(GitSquashBranchTreeTest, self).setUp() + + # Note: Using the REPO_SCHEMA wouldn't simplify this test so it is not + # used. + # + # Create a repo with the follow schema + # + # main <- branchA <- branchB + # ^ + # \ branchC + # + # where each branch has 2 commits. + + # The repo is empty. Add the first commit or else most commands don't + # work, including `git branch`, which doesn't even show the main branch. + self.repo.git('commit', '-m', 'First commit', '--allow-empty') + + # Create the first branch downstream from `main` with 2 commits. + self.repo.git('checkout', '-B', 'branchA', '--track', 'main') + self._createFileAndCommit('fileA1') + self._createFileAndCommit('fileA2') + + # Create a branch downstream from `branchA` with 2 commits. + self.repo.git('checkout', '-B', 'branchB', '--track', 'branchA') + self._createFileAndCommit('fileB1') + self._createFileAndCommit('fileB2') + + # Create another branch downstream from `branchA` with 2 commits. + self.repo.git('checkout', '-B', 'branchC', '--track', 'branchA') + self._createFileAndCommit('fileC1') + self._createFileAndCommit('fileC2') + + def testGitSquashBranchTreeDefaultCurrent(self): + self.assertEqual(self._getCountAheadOfUpstream('branchA'), 2) + self.assertEqual(self._getCountAheadOfUpstream('branchB'), 2) + self.assertEqual(self._getCountAheadOfUpstream('branchC'), 2) + + # Note: Passing --ignore-no-upstream as this repo has no remote and so + # the `main` branch can't have an upstream. + self.repo.git('checkout', 'branchB') + self.repo.run(git_squash_branch_tree.main, ['--ignore-no-upstream']) + + self.assertEqual(self._getCountAheadOfUpstream('branchA'), 2) + self.assertEqual(self._getCountAheadOfUpstream('branchB'), 1) + self.assertEqual(self._getCountAheadOfUpstream('branchC'), 2) + + def testGitSquashBranchTreeAll(self): + self.assertEqual(self._getCountAheadOfUpstream('branchA'), 2) + self.assertEqual(self._getCountAheadOfUpstream('branchB'), 2) + self.assertEqual(self._getCountAheadOfUpstream('branchC'), 2) + + self.repo.run(git_squash_branch_tree.main, + ['--branch', 'branchA', '--ignore-no-upstream']) + + self.assertEqual(self._getCountAheadOfUpstream('branchA'), 1) + self.assertEqual(self._getCountAheadOfUpstream('branchB'), 1) + self.assertEqual(self._getCountAheadOfUpstream('branchC'), 1) + + def testGitSquashBranchTreeSingle(self): + self.assertEqual(self._getCountAheadOfUpstream('branchA'), 2) + self.assertEqual(self._getCountAheadOfUpstream('branchB'), 2) + self.assertEqual(self._getCountAheadOfUpstream('branchC'), 2) + + self.repo.run(git_squash_branch_tree.main, + ['--branch', 'branchB', '--ignore-no-upstream']) + + self.assertEqual(self._getCountAheadOfUpstream('branchA'), 2) + self.assertEqual(self._getCountAheadOfUpstream('branchB'), 1) + self.assertEqual(self._getCountAheadOfUpstream('branchC'), 2) + + self.repo.run(git_squash_branch_tree.main, + ['--branch', 'branchC', '--ignore-no-upstream']) + + self.assertEqual(self._getCountAheadOfUpstream('branchA'), 2) + self.assertEqual(self._getCountAheadOfUpstream('branchB'), 1) + self.assertEqual(self._getCountAheadOfUpstream('branchC'), 1) + + self.repo.run(git_squash_branch_tree.main, + ['--branch', 'branchA', '--ignore-no-upstream']) + + self.assertEqual(self._getCountAheadOfUpstream('branchA'), 1) + self.assertEqual(self._getCountAheadOfUpstream('branchB'), 1) + self.assertEqual(self._getCountAheadOfUpstream('branchC'), 1) + + # Creates a file with arbitrary contents and commit it to the current + # branch. + def _createFileAndCommit(self, filename): + with self.repo.open(filename, 'w') as f: + f.write('content') + self.repo.git('add', filename) + self.repo.git_commit('Added file ' + filename) + + # Returns the count of how many commits `branch` is ahead of its upstream. + def _getCountAheadOfUpstream(self, branch): + upstream = branch + '@{u}' + output = self.repo.git('rev-list', '--count', + upstream + '..' + branch).stdout + return int(output) + + +if __name__ == '__main__': + unittest.main()