diff --git a/git_number.py b/git_number.py deleted file mode 100755 index 541a15841..000000000 --- a/git_number.py +++ /dev/null @@ -1,313 +0,0 @@ -#!/usr/bin/env vpython3 -# Copyright 2013 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. -"""Usage: %prog [options] []* - -If no 's are supplied, it defaults to HEAD. - -Calculates the generation number for one or more commits in a git repo. - -Generation number of a commit C with parents P is defined as: - generation_number(C, []) = 0 - generation_number(C, P) = max(map(generation_number, P)) + 1 - -This number can be used to order commits relative to each other, as long as for -any pair of the commits, one is an ancestor of the other. - -Since calculating the generation number of a commit requires walking that -commit's entire history, this script caches all calculated data inside the git -repo that it operates on in the ref 'refs/number/commits'. -""" - -import binascii -import collections -import logging -import optparse -import os -import struct -import sys -import tempfile - -import git_common as git -import subprocess2 - -CHUNK_FMT = '!20sL' -CHUNK_SIZE = struct.calcsize(CHUNK_FMT) -DIRTY_TREES = collections.defaultdict(int) -REF = 'refs/number/commits' -AUTHOR_NAME = 'git-number' -AUTHOR_EMAIL = 'chrome-infrastructure-team@google.com' - -# Number of bytes to use for the prefix on our internal number structure. -# 0 is slow to deserialize. 2 creates way too much bookkeeping overhead (would -# need to reimplement cache data structures to be a bit more sophisticated than -# dicts. 1 seems to be just right. -PREFIX_LEN = 1 - -# Set this to 'threads' to gather coverage data while testing. -POOL_KIND = 'procs' - - -def pathlify(hash_prefix): - """Converts a binary object hash prefix into a posix path, one folder per - byte. - - >>> pathlify('\xDE\xAD') - 'de/ad' - """ - return '/'.join('%02x' % b for b in hash_prefix) - - -@git.memoize_one(threadsafe=False) -def get_number_tree(prefix_bytes): - """Returns a dictionary of the git-number registry specified by - |prefix_bytes|. - - This is in the form of {: ...} - - >>> get_number_tree('\x83\xb4') - {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...} - """ - ref = '%s:%s' % (REF, pathlify(prefix_bytes)) - - try: - raw = git.run('cat-file', 'blob', ref, autostrip=False, decode=False) - return dict( - struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE) - for i in range(len(raw) // CHUNK_SIZE)) - except subprocess2.CalledProcessError: - return {} - - -@git.memoize_one(threadsafe=False) -def get_num(commit_hash): - """Returns the generation number for a commit. - - Returns None if the generation number for this commit hasn't been calculated - yet (see load_generation_numbers()). - """ - return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash) - - -def clear_caches(on_disk=False): - """Clears in-process caches for e.g. unit testing.""" - get_number_tree.clear() - get_num.clear() - if on_disk: - git.run('update-ref', '-d', REF) - - -def intern_number_tree(tree): - """Transforms a number tree (in the form returned by |get_number_tree|) into - a git blob. - - Returns the git blob id as hex-encoded string. - - >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169} - >>> intern_number_tree(d) - 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce' - """ - with tempfile.TemporaryFile() as f: - for k, v in sorted(tree.items()): - f.write(struct.pack(CHUNK_FMT, k, v)) - f.seek(0) - return git.intern_f(f) - - -def leaf_map_fn(pre_tree): - """Converts a prefix and number tree into a git index line.""" - pre, tree = pre_tree - return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre)) - - -def finalize(targets): - """Saves all cache data to the git repository. - - After calculating the generation number for |targets|, call finalize() to - save all the work to the git repository. - - This in particular saves the trees referred to by DIRTY_TREES. - """ - if not DIRTY_TREES: - return - - msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.values()) - - idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx') - env = os.environ.copy() - env['GIT_INDEX_FILE'] = str(idx) - - progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES) - with git.ProgressPrinter(progress_message) as inc: - git.run('read-tree', REF, env=env) - - prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES)) - updater = subprocess2.Popen( - ['git', 'update-index', '-z', '--index-info'], - stdin=subprocess2.PIPE, - env=env) - - with git.ScopedPool(kind=POOL_KIND) as leaf_pool: - for item in leaf_pool.imap(leaf_map_fn, prefixes_trees): - updater.stdin.write(item.encode()) - inc() - - updater.stdin.close() - updater.wait() - assert updater.returncode == 0 - - tree_id = git.run('write-tree', env=env) - commit_cmd = [ - # Git user.name and/or user.email may not be configured, so - # specifying them explicitly. They are not used, but required by - # Git. - '-c', - 'user.name=%s' % AUTHOR_NAME, - '-c', - 'user.email=%s' % AUTHOR_EMAIL, - 'commit-tree', - '-m', - msg, - '-p' - ] + git.hash_multi(REF) - for t in targets: - commit_cmd.extend(['-p', binascii.hexlify(t).decode()]) - commit_cmd.append(tree_id) - commit_hash = git.run(*commit_cmd) - git.run('update-ref', REF, commit_hash) - DIRTY_TREES.clear() - - -def preload_tree(prefix): - """Returns the prefix and parsed tree object for the specified prefix.""" - return prefix, get_number_tree(prefix) - - -def all_prefixes(depth=PREFIX_LEN): - prefixes = [bytes([i]) for i in range(255)] - for x in prefixes: - # This isn't covered because PREFIX_LEN currently == 1 - if depth > 1: # pragma: no cover - for r in all_prefixes(depth - 1): - yield x + r - else: - yield x - - -def load_generation_numbers(targets): - """Populates the caches of get_num and get_number_tree so they contain - the results for |targets|. - - Loads cached numbers from disk, and calculates missing numbers if one or - more of |targets| is newer than the cached calculations. - - Args: - targets - An iterable of binary-encoded full git commit hashes. - """ - # In case they pass us a generator, listify targets. - targets = list(targets) - - if all(get_num(t) is not None for t in targets): - return - - if git.tree(REF) is None: - empty = git.mktree({}) - commit_hash = git.run( - # Git user.name and/or user.email may not be configured, so - # specifying them explicitly. They are not used, but required by - # Git. - '-c', - 'user.name=%s' % AUTHOR_NAME, - '-c', - 'user.email=%s' % AUTHOR_EMAIL, - 'commit-tree', - '-m', - 'Initial commit from git-number', - empty) - git.run('update-ref', REF, commit_hash) - - with git.ScopedPool(kind=POOL_KIND) as pool: - preload_iter = pool.imap_unordered(preload_tree, all_prefixes()) - - rev_list = [] - - with git.ProgressPrinter('Loading commits: %(count)d') as inc: - # Curiously, buffering the list into memory seems to be the fastest - # approach in python (as opposed to iterating over the lines in the - # stdout as they're produced). GIL strikes again :/ - cmd = [ - 'rev-list', - '--topo-order', - '--parents', - '--reverse', - '^' + REF, - ] + [binascii.hexlify(target).decode() for target in targets] - for line in git.run(*cmd).splitlines(): - tokens = [binascii.unhexlify(token) for token in line.split()] - rev_list.append((tokens[0], tokens[1:])) - inc() - - get_number_tree.update(preload_iter) - - with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc: - for commit_hash, pars in rev_list: - num = max(map(get_num, pars)) + 1 if pars else 0 - - prefix = commit_hash[:PREFIX_LEN] - get_number_tree(prefix)[commit_hash] = num - DIRTY_TREES[prefix] += 1 - get_num.set(commit_hash, num) - - inc() - - -def main(): # pragma: no cover - parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__) - parser.add_option('--no-cache', - action='store_true', - help='Do not actually cache anything we calculate.') - parser.add_option('--reset', - action='store_true', - help='Reset the generation number cache and quit.') - parser.add_option('-v', - '--verbose', - action='count', - default=0, - help='Be verbose. Use more times for more verbosity.') - opts, args = parser.parse_args() - - levels = [logging.ERROR, logging.INFO, logging.DEBUG] - logging.basicConfig(level=levels[min(opts.verbose, len(levels) - 1)]) - - # 'git number' should only be used on bots. - if os.getenv('CHROME_HEADLESS') != '1': - logging.error( - "'git-number' is an infrastructure tool that is only " - "intended to be used internally by bots. Developers should " - "use the 'Cr-Commit-Position' value in the commit's message.") - return 1 - - if opts.reset: - clear_caches(on_disk=True) - return - - try: - targets = git.parse_commitrefs(*(args or ['HEAD'])) - except git.BadCommitRefException as e: - parser.error(e) - - load_generation_numbers(targets) - if not opts.no_cache: - finalize(targets) - - print('\n'.join(map(str, map(get_num, targets)))) - return 0 - - -if __name__ == '__main__': # pragma: no cover - try: - sys.exit(main()) - except KeyboardInterrupt: - sys.stderr.write('interrupted\n') - sys.exit(1) diff --git a/tests/git_number_test.py b/tests/git_number_test.py deleted file mode 100755 index c97d40449..000000000 --- a/tests/git_number_test.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env vpython3 -# Copyright 2013 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. -"""Unit tests for git_number.py""" - -import binascii -import os -import sys - -DEPOT_TOOLS_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.insert(0, DEPOT_TOOLS_ROOT) - -from testing_support import git_test_utils -from testing_support import coverage_utils - - -class Basic(git_test_utils.GitRepoReadWriteTestBase): - REPO_SCHEMA = """ - A B C D E - B F E - X Y E - """ - - @classmethod - def setUpClass(cls): - super(Basic, cls).setUpClass() - import git_number - cls.gn = git_number - cls.old_POOL_KIND = cls.gn.POOL_KIND - cls.gn.POOL_KIND = 'threads' - - @classmethod - def tearDownClass(cls): - cls.gn.POOL_KIND = cls.old_POOL_KIND - super(Basic, cls).tearDownClass() - - def tearDown(self): - self.gn.clear_caches() - super(Basic, self).tearDown() - - def _git_number(self, refs, cache=False): - refs = [binascii.unhexlify(ref) for ref in refs] - self.repo.run(self.gn.load_generation_numbers, refs) - if cache: - self.repo.run(self.gn.finalize, refs) - return [self.gn.get_num(ref) for ref in refs] - - def testBasic(self): - self.assertEqual([0], self._git_number([self.repo['A']])) - self.assertEqual([2], self._git_number([self.repo['F']])) - self.assertEqual([0], self._git_number([self.repo['X']])) - self.assertEqual([4], self._git_number([self.repo['E']])) - - def testInProcessCache(self): - self.assertEqual( - None, - self.repo.run(self.gn.get_num, binascii.unhexlify(self.repo['A']))) - self.assertEqual([4], self._git_number([self.repo['E']])) - self.assertEqual( - 0, self.repo.run(self.gn.get_num, - binascii.unhexlify(self.repo['A']))) - - def testOnDiskCache(self): - self.assertEqual( - None, - self.repo.run(self.gn.get_num, binascii.unhexlify(self.repo['A']))) - self.assertEqual([4], self._git_number([self.repo['E']], cache=True)) - self.assertEqual([4], self._git_number([self.repo['E']], cache=True)) - self.gn.clear_caches() - self.assertEqual( - 0, self.repo.run(self.gn.get_num, - binascii.unhexlify(self.repo['A']))) - self.gn.clear_caches() - self.repo.run(self.gn.clear_caches, True) - self.assertEqual( - None, - self.repo.run(self.gn.get_num, binascii.unhexlify(self.repo['A']))) - - -if __name__ == '__main__': - sys.exit( - coverage_utils.covered_main( - os.path.join(DEPOT_TOOLS_ROOT, 'git_number.py'), '3.7'))