From f0f34c47f753ada4da0be46eedea01ae3c4c86c8 Mon Sep 17 00:00:00 2001 From: Allen Li Date: Fri, 9 Aug 2024 01:40:20 +0000 Subject: [PATCH] Revert "[git-number] Remove unused git-number script." This reverts commit c92e85abb7cec53e5827f5484d2dab9f90a4339a. Reason for revert: broke a builder Original change's description: > [git-number] Remove unused git-number script. > > R=ayatane, yiwzhang > > Change-Id: I53430ba4cf4ff83e222b4d9ffdeddfd6299e3330 > Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5766853 > Reviewed-by: Yiwei Zhang > Reviewed-by: Allen Li > Commit-Queue: Robbie Iannucci Change-Id: Iaec18d6e688343ef1276166ff9e6bac033c6de56 No-Presubmit: true No-Tree-Checks: true No-Try: true Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5773571 Reviewed-by: Josip Sokcevic Bot-Commit: Rubber Stamper Auto-Submit: Allen Li Commit-Queue: Josip Sokcevic --- git_number.py | 313 +++++++++++++++++++++++++++++++++++++++ tests/git_number_test.py | 84 +++++++++++ 2 files changed, 397 insertions(+) create mode 100755 git_number.py create mode 100755 tests/git_number_test.py diff --git a/git_number.py b/git_number.py new file mode 100755 index 000000000..541a15841 --- /dev/null +++ b/git_number.py @@ -0,0 +1,313 @@ +#!/usr/bin/env vpython3 +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Usage: %prog [options] []* + +If no 's are supplied, it defaults to HEAD. + +Calculates the generation number for one or more commits in a git repo. + +Generation number of a commit C with parents P is defined as: + generation_number(C, []) = 0 + generation_number(C, P) = max(map(generation_number, P)) + 1 + +This number can be used to order commits relative to each other, as long as for +any pair of the commits, one is an ancestor of the other. + +Since calculating the generation number of a commit requires walking that +commit's entire history, this script caches all calculated data inside the git +repo that it operates on in the ref 'refs/number/commits'. +""" + +import binascii +import collections +import logging +import optparse +import os +import struct +import sys +import tempfile + +import git_common as git +import subprocess2 + +CHUNK_FMT = '!20sL' +CHUNK_SIZE = struct.calcsize(CHUNK_FMT) +DIRTY_TREES = collections.defaultdict(int) +REF = 'refs/number/commits' +AUTHOR_NAME = 'git-number' +AUTHOR_EMAIL = 'chrome-infrastructure-team@google.com' + +# Number of bytes to use for the prefix on our internal number structure. +# 0 is slow to deserialize. 2 creates way too much bookkeeping overhead (would +# need to reimplement cache data structures to be a bit more sophisticated than +# dicts. 1 seems to be just right. +PREFIX_LEN = 1 + +# Set this to 'threads' to gather coverage data while testing. +POOL_KIND = 'procs' + + +def pathlify(hash_prefix): + """Converts a binary object hash prefix into a posix path, one folder per + byte. + + >>> pathlify('\xDE\xAD') + 'de/ad' + """ + return '/'.join('%02x' % b for b in hash_prefix) + + +@git.memoize_one(threadsafe=False) +def get_number_tree(prefix_bytes): + """Returns a dictionary of the git-number registry specified by + |prefix_bytes|. + + This is in the form of {: ...} + + >>> get_number_tree('\x83\xb4') + {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...} + """ + ref = '%s:%s' % (REF, pathlify(prefix_bytes)) + + try: + raw = git.run('cat-file', 'blob', ref, autostrip=False, decode=False) + return dict( + struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE) + for i in range(len(raw) // CHUNK_SIZE)) + except subprocess2.CalledProcessError: + return {} + + +@git.memoize_one(threadsafe=False) +def get_num(commit_hash): + """Returns the generation number for a commit. + + Returns None if the generation number for this commit hasn't been calculated + yet (see load_generation_numbers()). + """ + return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash) + + +def clear_caches(on_disk=False): + """Clears in-process caches for e.g. unit testing.""" + get_number_tree.clear() + get_num.clear() + if on_disk: + git.run('update-ref', '-d', REF) + + +def intern_number_tree(tree): + """Transforms a number tree (in the form returned by |get_number_tree|) into + a git blob. + + Returns the git blob id as hex-encoded string. + + >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169} + >>> intern_number_tree(d) + 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce' + """ + with tempfile.TemporaryFile() as f: + for k, v in sorted(tree.items()): + f.write(struct.pack(CHUNK_FMT, k, v)) + f.seek(0) + return git.intern_f(f) + + +def leaf_map_fn(pre_tree): + """Converts a prefix and number tree into a git index line.""" + pre, tree = pre_tree + return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre)) + + +def finalize(targets): + """Saves all cache data to the git repository. + + After calculating the generation number for |targets|, call finalize() to + save all the work to the git repository. + + This in particular saves the trees referred to by DIRTY_TREES. + """ + if not DIRTY_TREES: + return + + msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.values()) + + idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx') + env = os.environ.copy() + env['GIT_INDEX_FILE'] = str(idx) + + progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES) + with git.ProgressPrinter(progress_message) as inc: + git.run('read-tree', REF, env=env) + + prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES)) + updater = subprocess2.Popen( + ['git', 'update-index', '-z', '--index-info'], + stdin=subprocess2.PIPE, + env=env) + + with git.ScopedPool(kind=POOL_KIND) as leaf_pool: + for item in leaf_pool.imap(leaf_map_fn, prefixes_trees): + updater.stdin.write(item.encode()) + inc() + + updater.stdin.close() + updater.wait() + assert updater.returncode == 0 + + tree_id = git.run('write-tree', env=env) + commit_cmd = [ + # Git user.name and/or user.email may not be configured, so + # specifying them explicitly. They are not used, but required by + # Git. + '-c', + 'user.name=%s' % AUTHOR_NAME, + '-c', + 'user.email=%s' % AUTHOR_EMAIL, + 'commit-tree', + '-m', + msg, + '-p' + ] + git.hash_multi(REF) + for t in targets: + commit_cmd.extend(['-p', binascii.hexlify(t).decode()]) + commit_cmd.append(tree_id) + commit_hash = git.run(*commit_cmd) + git.run('update-ref', REF, commit_hash) + DIRTY_TREES.clear() + + +def preload_tree(prefix): + """Returns the prefix and parsed tree object for the specified prefix.""" + return prefix, get_number_tree(prefix) + + +def all_prefixes(depth=PREFIX_LEN): + prefixes = [bytes([i]) for i in range(255)] + for x in prefixes: + # This isn't covered because PREFIX_LEN currently == 1 + if depth > 1: # pragma: no cover + for r in all_prefixes(depth - 1): + yield x + r + else: + yield x + + +def load_generation_numbers(targets): + """Populates the caches of get_num and get_number_tree so they contain + the results for |targets|. + + Loads cached numbers from disk, and calculates missing numbers if one or + more of |targets| is newer than the cached calculations. + + Args: + targets - An iterable of binary-encoded full git commit hashes. + """ + # In case they pass us a generator, listify targets. + targets = list(targets) + + if all(get_num(t) is not None for t in targets): + return + + if git.tree(REF) is None: + empty = git.mktree({}) + commit_hash = git.run( + # Git user.name and/or user.email may not be configured, so + # specifying them explicitly. They are not used, but required by + # Git. + '-c', + 'user.name=%s' % AUTHOR_NAME, + '-c', + 'user.email=%s' % AUTHOR_EMAIL, + 'commit-tree', + '-m', + 'Initial commit from git-number', + empty) + git.run('update-ref', REF, commit_hash) + + with git.ScopedPool(kind=POOL_KIND) as pool: + preload_iter = pool.imap_unordered(preload_tree, all_prefixes()) + + rev_list = [] + + with git.ProgressPrinter('Loading commits: %(count)d') as inc: + # Curiously, buffering the list into memory seems to be the fastest + # approach in python (as opposed to iterating over the lines in the + # stdout as they're produced). GIL strikes again :/ + cmd = [ + 'rev-list', + '--topo-order', + '--parents', + '--reverse', + '^' + REF, + ] + [binascii.hexlify(target).decode() for target in targets] + for line in git.run(*cmd).splitlines(): + tokens = [binascii.unhexlify(token) for token in line.split()] + rev_list.append((tokens[0], tokens[1:])) + inc() + + get_number_tree.update(preload_iter) + + with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc: + for commit_hash, pars in rev_list: + num = max(map(get_num, pars)) + 1 if pars else 0 + + prefix = commit_hash[:PREFIX_LEN] + get_number_tree(prefix)[commit_hash] = num + DIRTY_TREES[prefix] += 1 + get_num.set(commit_hash, num) + + inc() + + +def main(): # pragma: no cover + parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__) + parser.add_option('--no-cache', + action='store_true', + help='Do not actually cache anything we calculate.') + parser.add_option('--reset', + action='store_true', + help='Reset the generation number cache and quit.') + parser.add_option('-v', + '--verbose', + action='count', + default=0, + help='Be verbose. Use more times for more verbosity.') + opts, args = parser.parse_args() + + levels = [logging.ERROR, logging.INFO, logging.DEBUG] + logging.basicConfig(level=levels[min(opts.verbose, len(levels) - 1)]) + + # 'git number' should only be used on bots. + if os.getenv('CHROME_HEADLESS') != '1': + logging.error( + "'git-number' is an infrastructure tool that is only " + "intended to be used internally by bots. Developers should " + "use the 'Cr-Commit-Position' value in the commit's message.") + return 1 + + if opts.reset: + clear_caches(on_disk=True) + return + + try: + targets = git.parse_commitrefs(*(args or ['HEAD'])) + except git.BadCommitRefException as e: + parser.error(e) + + load_generation_numbers(targets) + if not opts.no_cache: + finalize(targets) + + print('\n'.join(map(str, map(get_num, targets)))) + return 0 + + +if __name__ == '__main__': # pragma: no cover + try: + sys.exit(main()) + except KeyboardInterrupt: + sys.stderr.write('interrupted\n') + sys.exit(1) diff --git a/tests/git_number_test.py b/tests/git_number_test.py new file mode 100755 index 000000000..c97d40449 --- /dev/null +++ b/tests/git_number_test.py @@ -0,0 +1,84 @@ +#!/usr/bin/env vpython3 +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Unit tests for git_number.py""" + +import binascii +import os +import sys + +DEPOT_TOOLS_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, DEPOT_TOOLS_ROOT) + +from testing_support import git_test_utils +from testing_support import coverage_utils + + +class Basic(git_test_utils.GitRepoReadWriteTestBase): + REPO_SCHEMA = """ + A B C D E + B F E + X Y E + """ + + @classmethod + def setUpClass(cls): + super(Basic, cls).setUpClass() + import git_number + cls.gn = git_number + cls.old_POOL_KIND = cls.gn.POOL_KIND + cls.gn.POOL_KIND = 'threads' + + @classmethod + def tearDownClass(cls): + cls.gn.POOL_KIND = cls.old_POOL_KIND + super(Basic, cls).tearDownClass() + + def tearDown(self): + self.gn.clear_caches() + super(Basic, self).tearDown() + + def _git_number(self, refs, cache=False): + refs = [binascii.unhexlify(ref) for ref in refs] + self.repo.run(self.gn.load_generation_numbers, refs) + if cache: + self.repo.run(self.gn.finalize, refs) + return [self.gn.get_num(ref) for ref in refs] + + def testBasic(self): + self.assertEqual([0], self._git_number([self.repo['A']])) + self.assertEqual([2], self._git_number([self.repo['F']])) + self.assertEqual([0], self._git_number([self.repo['X']])) + self.assertEqual([4], self._git_number([self.repo['E']])) + + def testInProcessCache(self): + self.assertEqual( + None, + self.repo.run(self.gn.get_num, binascii.unhexlify(self.repo['A']))) + self.assertEqual([4], self._git_number([self.repo['E']])) + self.assertEqual( + 0, self.repo.run(self.gn.get_num, + binascii.unhexlify(self.repo['A']))) + + def testOnDiskCache(self): + self.assertEqual( + None, + self.repo.run(self.gn.get_num, binascii.unhexlify(self.repo['A']))) + self.assertEqual([4], self._git_number([self.repo['E']], cache=True)) + self.assertEqual([4], self._git_number([self.repo['E']], cache=True)) + self.gn.clear_caches() + self.assertEqual( + 0, self.repo.run(self.gn.get_num, + binascii.unhexlify(self.repo['A']))) + self.gn.clear_caches() + self.repo.run(self.gn.clear_caches, True) + self.assertEqual( + None, + self.repo.run(self.gn.get_num, binascii.unhexlify(self.repo['A']))) + + +if __name__ == '__main__': + sys.exit( + coverage_utils.covered_main( + os.path.join(DEPOT_TOOLS_ROOT, 'git_number.py'), '3.7'))