diff --git a/git-number b/git-number new file mode 100755 index 000000000..6c06f055d --- /dev/null +++ b/git-number @@ -0,0 +1,27 @@ +#!/bin/sh +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# git-number -- a git-command for calculating and displaying the generation +# number of a commit. + +# Test if this script is running under a MSys install. If it is, we will +# hardcode the path to Python where possible. +OUTPUT="$(uname | grep 'MINGW')" +MINGW=$? + +if [ $MINGW = 0 ]; then + base_dir="${0%\\*}" +else + base_dir=$(dirname "$0") +fi + +# Uncomment this line if you never use gclient. +# "$base_dir"/update_depot_tools + +if [ -e "$base_dir/python.bat" -a $MINGW = 0 ]; then + PYTHONDONTWRITEBYTECODE=1 cmd.exe //c "$base_dir\\python.bat" "$base_dir\\git_number.py" "$@" +else + PYTHONDONTWRITEBYTECODE=1 exec "$base_dir/git_number.py" "$@" +fi diff --git a/git_common.py b/git_common.py new file mode 100644 index 000000000..1215d9cd4 --- /dev/null +++ b/git_common.py @@ -0,0 +1,301 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# Monkeypatch IMapIterator so that Ctrl-C can kill everything properly. +# Derived from https://gist.github.com/aljungberg/626518 +import multiprocessing.pool +from multiprocessing.pool import IMapIterator +def wrapper(func): + def wrap(self, timeout=None): + return func(self, timeout=timeout or 1e100) + return wrap +IMapIterator.next = wrapper(IMapIterator.next) +IMapIterator.__next__ = IMapIterator.next +# TODO(iannucci): Monkeypatch all other 'wait' methods too. + + +import binascii +import contextlib +import functools +import logging +import signal +import sys +import tempfile +import threading + +import subprocess2 + + +GIT_EXE = 'git.bat' if sys.platform.startswith('win') else 'git' + + +class BadCommitRefException(Exception): + def __init__(self, refs): + msg = ('one of %s does not seem to be a valid commitref.' % + str(refs)) + super(BadCommitRefException, self).__init__(msg) + + +def memoize_one(**kwargs): + """Memoizes a single-argument pure function. + + Values of None are not cached. + + Kwargs: + threadsafe (bool) - REQUIRED. Specifies whether to use locking around + cache manipulation functions. This is a kwarg so that users of memoize_one + are forced to explicitly and verbosely pick True or False. + + Adds three methods to the decorated function: + * get(key, default=None) - Gets the value for this key from the cache. + * set(key, value) - Sets the value for this key from the cache. + * clear() - Drops the entire contents of the cache. Useful for unittests. + * update(other) - Updates the contents of the cache from another dict. + """ + assert 'threadsafe' in kwargs, 'Must specify threadsafe={True,False}' + threadsafe = kwargs['threadsafe'] + + if threadsafe: + def withlock(lock, f): + def inner(*args, **kwargs): + with lock: + return f(*args, **kwargs) + return inner + else: + def withlock(_lock, f): + return f + + def decorator(f): + # Instantiate the lock in decorator, in case users of memoize_one do: + # + # memoizer = memoize_one(threadsafe=True) + # + # @memoizer + # def fn1(val): ... + # + # @memoizer + # def fn2(val): ... + + lock = threading.Lock() if threadsafe else None + cache = {} + _get = withlock(lock, cache.get) + _set = withlock(lock, cache.__setitem__) + + @functools.wraps(f) + def inner(arg): + ret = _get(arg) + if ret is None: + ret = f(arg) + if ret is not None: + _set(arg, ret) + return ret + inner.get = _get + inner.set = _set + inner.clear = withlock(lock, cache.clear) + inner.update = withlock(lock, cache.update) + return inner + return decorator + + +def _ScopedPool_initer(orig, orig_args): # pragma: no cover + """Initializer method for ScopedPool's subprocesses. + + This helps ScopedPool handle Ctrl-C's correctly. + """ + signal.signal(signal.SIGINT, signal.SIG_IGN) + if orig: + orig(*orig_args) + + +@contextlib.contextmanager +def ScopedPool(*args, **kwargs): + """Context Manager which returns a multiprocessing.pool instance which + correctly deals with thrown exceptions. + + *args - Arguments to multiprocessing.pool + + Kwargs: + kind ('threads', 'procs') - The type of underlying coprocess to use. + **etc - Arguments to multiprocessing.pool + """ + if kwargs.pop('kind', None) == 'threads': + pool = multiprocessing.pool.ThreadPool(*args, **kwargs) + else: + orig, orig_args = kwargs.get('initializer'), kwargs.get('initargs', ()) + kwargs['initializer'] = _ScopedPool_initer + kwargs['initargs'] = orig, orig_args + pool = multiprocessing.pool.Pool(*args, **kwargs) + + try: + yield pool + pool.close() + except: + pool.terminate() + raise + finally: + pool.join() + + +class ProgressPrinter(object): + """Threaded single-stat status message printer.""" + def __init__(self, fmt, enabled=None, stream=sys.stderr, period=0.5): + """Create a ProgressPrinter. + + Use it as a context manager which produces a simple 'increment' method: + + with ProgressPrinter('(%%(count)d/%d)' % 1000) as inc: + for i in xrange(1000): + # do stuff + if i % 10 == 0: + inc(10) + + Args: + fmt - String format with a single '%(count)d' where the counter value + should go. + enabled (bool) - If this is None, will default to True if + logging.getLogger() is set to INFO or more verbose. + stream (file-like) - The stream to print status messages to. + period (float) - The time in seconds for the printer thread to wait + between printing. + """ + self.fmt = fmt + if enabled is None: # pragma: no cover + self.enabled = logging.getLogger().isEnabledFor(logging.INFO) + else: + self.enabled = enabled + + self._count = 0 + self._dead = False + self._dead_cond = threading.Condition() + self._stream = stream + self._thread = threading.Thread(target=self._run) + self._period = period + + def _emit(self, s): + if self.enabled: + self._stream.write('\r' + s) + self._stream.flush() + + def _run(self): + with self._dead_cond: + while not self._dead: + self._emit(self.fmt % {'count': self._count}) + self._dead_cond.wait(self._period) + self._emit((self.fmt + '\n') % {'count': self._count}) + + def inc(self, amount=1): + self._count += amount + + def __enter__(self): + self._thread.start() + return self.inc + + def __exit__(self, _exc_type, _exc_value, _traceback): + self._dead = True + with self._dead_cond: + self._dead_cond.notifyAll() + self._thread.join() + del self._thread + + +def parse_commitrefs(*commitrefs): + """Returns binary encoded commit hashes for one or more commitrefs. + + A commitref is anything which can resolve to a commit. Popular examples: + * 'HEAD' + * 'origin/master' + * 'cool_branch~2' + """ + try: + return map(binascii.unhexlify, hashes(*commitrefs)) + except subprocess2.CalledProcessError: + raise BadCommitRefException(commitrefs) + + +def run(*cmd, **kwargs): + """Runs a git command. Returns stdout as a string. + + If logging is DEBUG, we'll print the command before we run it. + + kwargs + autostrip (bool) - Strip the output. Defaults to True. + Output string is always strip()'d. + """ + autostrip = kwargs.pop('autostrip', True) + cmd = (GIT_EXE,) + cmd + logging.debug('Running %s', ' '.join(repr(tok) for tok in cmd)) + ret = subprocess2.check_output(cmd, stderr=subprocess2.PIPE, **kwargs) + if autostrip: + ret = (ret or '').strip() + return ret + + +def hashes(*reflike): + return run('rev-parse', *reflike).splitlines() + + +def intern_f(f, kind='blob'): + """Interns a file object into the git object store. + + Args: + f (file-like object) - The file-like object to intern + kind (git object type) - One of 'blob', 'commit', 'tree', 'tag'. + + Returns the git hash of the interned object (hex encoded). + """ + ret = run('hash-object', '-t', kind, '-w', '--stdin', stdin=f) + f.close() + return ret + + +def tree(treeref, recurse=False): + """Returns a dict representation of a git tree object. + + Args: + treeref (str) - a git ref which resolves to a tree (commits count as trees). + recurse (bool) - include all of the tree's decendants too. File names will + take the form of 'some/path/to/file'. + + Return format: + { 'file_name': (mode, type, ref) } + + mode is an integer where: + * 0040000 - Directory + * 0100644 - Regular non-executable file + * 0100664 - Regular non-executable group-writeable file + * 0100755 - Regular executable file + * 0120000 - Symbolic link + * 0160000 - Gitlink + + type is a string where it's one of 'blob', 'commit', 'tree', 'tag'. + + ref is the hex encoded hash of the entry. + """ + ret = {} + opts = ['ls-tree', '--full-tree'] + if recurse: + opts.append('-r') + opts.append(treeref) + try: + for line in run(*opts).splitlines(): + mode, typ, ref, name = line.split(None, 3) + ret[name] = (mode, typ, ref) + except subprocess2.CalledProcessError: + return None + return ret + + +def mktree(treedict): + """Makes a git tree object and returns its hash. + + See |tree()| for the values of mode, type, and ref. + + Args: + treedict - { name: (mode, type, ref) } + """ + with tempfile.TemporaryFile() as f: + for name, (mode, typ, ref) in treedict.iteritems(): + f.write('%s %s %s\t%s\0' % (mode, typ, ref, name)) + f.seek(0) + return run('mktree', '-z', stdin=f) diff --git a/git_number.py b/git_number.py new file mode 100755 index 000000000..04f676c65 --- /dev/null +++ b/git_number.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Usage: %prog [options] []* + +If no 's are supplied, it defaults to HEAD. + +Calculates the generation number for one or more commits in a git repo. + +Generation number of a commit C with parents P is defined as: + generation_number(C, []) = 0 + generation_number(C, P) = max(map(generation_number, P)) + 1 + +This number can be used to order commits relative to each other, as long as for +any pair of the commits, one is an ancestor of the other. + +Since calculating the generation number of a commit requires walking that +commit's entire history, this script caches all calculated data inside the git +repo that it operates on in the ref 'refs/number/commits'. +""" + +import binascii +import collections +import logging +import optparse +import os +import struct +import sys +import tempfile + +import git_common as git +import subprocess2 + +CHUNK_FMT = '!20sL' +CHUNK_SIZE = struct.calcsize(CHUNK_FMT) +DIRTY_TREES = collections.defaultdict(int) +REF = 'refs/number/commits' + +# Number of bytes to use for the prefix on our internal number structure. +# 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would +# need to reimplement cache data structures to be a bit more sophisticated than +# dicts. 1 seems to be just right. +PREFIX_LEN = 1 + +# Set this to 'threads' to gather coverage data while testing. +POOL_KIND = 'procs' + + +def pathlify(hash_prefix): + """Converts a binary object hash prefix into a posix path, one folder per + byte. + + >>> pathlify('\xDE\xAD') + 'de/ad' + """ + return '/'.join('%02x' % ord(b) for b in hash_prefix) + + +@git.memoize_one(threadsafe=False) +def get_number_tree(prefix_bytes): + """Returns a dictionary of the git-number registry specified by + |prefix_bytes|. + + This is in the form of {: ...} + + >>> get_number_tree('\x83\xb4') + {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...} + """ + ref = '%s:%s' % (REF, pathlify(prefix_bytes)) + + try: + raw = buffer(git.run('cat-file', 'blob', ref, autostrip=False)) + return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE) + for i in xrange(len(raw) / CHUNK_SIZE)) + except subprocess2.CalledProcessError: + return {} + + +@git.memoize_one(threadsafe=False) +def get_num(commit_hash): + """Returns the generation number for a commit. + + Returns None if the generation number for this commit hasn't been calculated + yet (see load_generation_numbers()). + """ + return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash) + + +def clear_caches(on_disk=False): + """Clears in-process caches for e.g. unit testing.""" + get_number_tree.clear() + get_num.clear() + if on_disk: + git.run('update-ref', '-d', REF) + + +def intern_number_tree(tree): + """Transforms a number tree (in the form returned by |get_number_tree|) into + a git blob. + + Returns the git blob id as hex-encoded string. + + >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169} + >>> intern_number_tree(d) + 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce' + """ + with tempfile.TemporaryFile() as f: + for k, v in sorted(tree.iteritems()): + f.write(struct.pack(CHUNK_FMT, k, v)) + f.seek(0) + return git.intern_f(f) + + +def leaf_map_fn((pre, tree)): + """Converts a prefix and number tree into a git index line.""" + return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre)) + + +def finalize(targets): + """Saves all cache data to the git repository. + + After calculating the generation number for |targets|, call finalize() to + save all the work to the git repository. + + This in particular saves the trees referred to by DIRTY_TREES. + """ + if not DIRTY_TREES: + return + + msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues()) + + idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx') + env = os.environ.copy() + env['GIT_INDEX_FILE'] = idx + + progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES) + with git.ProgressPrinter(progress_message) as inc: + git.run('read-tree', REF, env=env) + + prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES)) + updater = subprocess2.Popen(['git', 'update-index', '-z', '--index-info'], + stdin=subprocess2.PIPE, env=env) + + with git.ScopedPool(kind=POOL_KIND) as leaf_pool: + for item in leaf_pool.imap(leaf_map_fn, prefixes_trees): + updater.stdin.write(item) + inc() + + updater.stdin.close() + updater.wait() + assert updater.returncode == 0 + + tree_id = git.run('write-tree', env=env) + commit_cmd = ['commit-tree', '-m', msg, '-p'] + git.hashes(REF) + for t in targets: + commit_cmd.extend(['-p', binascii.hexlify(t)]) + commit_cmd.append(tree_id) + commit_hash = git.run(*commit_cmd) + git.run('update-ref', REF, commit_hash) + DIRTY_TREES.clear() + + +def preload_tree(prefix): + """Returns the prefix and parsed tree object for the specified prefix.""" + return prefix, get_number_tree(prefix) + + +def all_prefixes(depth=PREFIX_LEN): + for x in (chr(i) for i in xrange(255)): + # This isn't covered because PREFIX_LEN currently == 1 + if depth > 1: # pragma: no cover + for r in all_prefixes(depth - 1): + yield x + r + else: + yield x + + +def load_generation_numbers(targets): + """Populates the caches of get_num and get_number_tree so they contain + the results for |targets|. + + Loads cached numbers from disk, and calculates missing numbers if one or + more of |targets| is newer than the cached calculations. + + Args: + targets - An iterable of binary-encoded full git commit hashes. + """ + # In case they pass us a generator, listify targets. + targets = list(targets) + + if all(get_num(t) is not None for t in targets): + return + + if git.tree(REF) is None: + empty = git.mktree({}) + commit_hash = git.run('commit-tree', '-m', 'Initial commit from git-number', + empty) + git.run('update-ref', REF, commit_hash) + + with git.ScopedPool(kind=POOL_KIND) as pool: + preload_iter = pool.imap_unordered(preload_tree, all_prefixes()) + + rev_list = [] + + with git.ProgressPrinter('Loading commits: %(count)d') as inc: + # Curiously, buffering the list into memory seems to be the fastest + # approach in python (as opposed to iterating over the lines in the + # stdout as they're produced). GIL strikes again :/ + cmd = [ + 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF, + ] + map(binascii.hexlify, targets) + for line in git.run(*cmd).splitlines(): + tokens = map(binascii.unhexlify, line.split()) + rev_list.append((tokens[0], tokens[1:])) + inc() + + get_number_tree.update(preload_iter) + + with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc: + for commit_hash, pars in rev_list: + num = max(map(get_num, pars)) + 1 if pars else 0 + + prefix = commit_hash[:PREFIX_LEN] + get_number_tree(prefix)[commit_hash] = num + DIRTY_TREES[prefix] += 1 + get_num.set(commit_hash, num) + + inc() + + +def main(): # pragma: no cover + parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__) + parser.add_option('--no-cache', action='store_true', + help='Do not actually cache anything we calculate.') + parser.add_option('--reset', action='store_true', + help='Reset the generation number cache and quit.') + parser.add_option('-v', '--verbose', action='count', default=0, + help='Be verbose. Use more times for more verbosity.') + opts, args = parser.parse_args() + + levels = [logging.ERROR, logging.INFO, logging.DEBUG] + logging.basicConfig(level=levels[min(opts.verbose, len(levels) - 1)]) + + try: + if opts.reset: + clear_caches(on_disk=True) + return + + try: + targets = git.parse_commitrefs(*(args or ['HEAD'])) + except git.BadCommitRefException as e: + parser.error(e) + + load_generation_numbers(targets) + if not opts.no_cache: + finalize(targets) + + print '\n'.join(map(str, map(get_num, targets))) + return 0 + except KeyboardInterrupt: + return 1 + + +if __name__ == '__main__': # pragma: no cover + sys.exit(main()) diff --git a/testing_support/coverage_utils.py b/testing_support/coverage_utils.py new file mode 100644 index 000000000..15cbffa81 --- /dev/null +++ b/testing_support/coverage_utils.py @@ -0,0 +1,69 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import distutils.version +import os +import sys +import textwrap +import unittest + +ROOT_PATH = os.path.abspath(os.path.join( + os.path.dirname(os.path.dirname(__file__)))) + + +def native_error(msg, version): + print textwrap.dedent("""\ + ERROR: Native python-coverage (version: %s) is required to be + installed on your PYTHONPATH to run this test. Recommendation: + sudo pip install python-coverage + %s""") % (version, msg) + sys.exit(1) + +def covered_main(includes, require_native=None): + """Equivalent of unittest.main(), except that it gathers coverage data, and + asserts if the test is not at 100% coverage. + + Args: + includes (list(str) or str) - List of paths to include in coverage report. + May also be a single path instead of a list. + require_native (str) - If non-None, will require that + at least |require_native| version of coverage is installed on the + system with CTracer. + """ + try: + import coverage + if require_native is not None: + got_ver = coverage.__version__ + if not coverage.collector.CTracer: + native_error(( + "Native python-coverage module required.\n" + "Pure-python implementation (version: %s) found: %s" + ) % (got_ver, coverage), require_native) + if got_ver < distutils.version.LooseVersion(require_native): + native_error("Wrong version (%s) found: %s" % (got_ver, coverage), + require_native) + except ImportError: + if require_native is None: + sys.path.insert(0, os.path.join(ROOT_PATH, 'third_party')) + import coverage + else: + print ("ERROR: python-coverage (%s) is required to be installed on your " + "PYTHONPATH to run this test." % require_native) + sys.exit(1) + + COVERAGE = coverage.coverage(include=includes) + COVERAGE.start() + + retcode = 0 + try: + unittest.main() + except SystemExit as e: + retcode = e.code or retcode + + COVERAGE.stop() + if COVERAGE.report() != 100.0: + print 'FATAL: not at 100% coverage.' + retcode = 2 + + return retcode diff --git a/testing_support/git_test_utils.py b/testing_support/git_test_utils.py new file mode 100644 index 000000000..bf583c684 --- /dev/null +++ b/testing_support/git_test_utils.py @@ -0,0 +1,418 @@ +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import atexit +import collections +import copy +import datetime +import hashlib +import os +import shutil +import subprocess +import tempfile +import unittest + + +def git_hash_data(data, typ='blob'): + """Calculate the git-style SHA1 for some data. + + Only supports 'blob' type data at the moment. + """ + assert typ == 'blob', 'Only support blobs for now' + return hashlib.sha1('blob %s\0%s' % (len(data), data)).hexdigest() + + +class OrderedSet(collections.MutableSet): + # from http://code.activestate.com/recipes/576694/ + def __init__(self, iterable=None): + self.end = end = [] + end += [None, end, end] # sentinel node for doubly linked list + self.data = {} # key --> [key, prev, next] + if iterable is not None: + self |= iterable + + def __contains__(self, key): + return key in self.data + + def __eq__(self, other): + if isinstance(other, OrderedSet): + return len(self) == len(other) and list(self) == list(other) + return set(self) == set(other) + + def __ne__(self, other): + if isinstance(other, OrderedSet): + return len(self) != len(other) or list(self) != list(other) + return set(self) != set(other) + + def __len__(self): + return len(self.data) + + def __iter__(self): + end = self.end + curr = end[2] + while curr is not end: + yield curr[0] + curr = curr[2] + + def __repr__(self): + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, list(self)) + + def __reversed__(self): + end = self.end + curr = end[1] + while curr is not end: + yield curr[0] + curr = curr[1] + + def add(self, key): + if key not in self.data: + end = self.end + curr = end[1] + curr[2] = end[1] = self.data[key] = [key, curr, end] + + def difference_update(self, *others): + for other in others: + for i in other: + self.discard(i) + + def discard(self, key): + if key in self.data: + key, prev, nxt = self.data.pop(key) + prev[2] = nxt + nxt[1] = prev + + def pop(self, last=True): # pylint: disable=W0221 + if not self: + raise KeyError('set is empty') + key = self.end[1][0] if last else self.end[2][0] + self.discard(key) + return key + + +class GitRepoSchema(object): + """A declarative git testing repo. + + Pass a schema to __init__ in the form of: + A B C D + B E D + + This is the repo + + A - B - C - D + \ E / + + Whitespace doesn't matter. Each line is a declaration of which commits come + before which other commits. + + Every commit gets a tag 'tag_%(commit)s' + Every unique terminal commit gets a branch 'branch_%(commit)s' + Last commit in First line is the branch 'master' + Root commits get a ref 'root_%(commit)s' + + Timestamps are in topo order, earlier commits (as indicated by their presence + in the schema) get earlier timestamps. Stamps start at the Unix Epoch, and + increment by 1 day each. + """ + COMMIT = collections.namedtuple('COMMIT', 'name parents is_branch is_root') + + def __init__(self, repo_schema='', + content_fn=lambda v: {v: {'data': v}}): + """Builds a new GitRepoSchema. + + Args: + repo_schema (str) - Initial schema for this repo. See class docstring for + info on the schema format. + content_fn ((commit_name) -> commit_data) - A function which will be + lazily called to obtain data for each commit. The results of this + function are cached (i.e. it will never be called twice for the same + commit_name). See the docstring on the GitRepo class for the format of + the data returned by this function. + """ + self.master = None + self.par_map = {} + self.data_cache = {} + self.content_fn = content_fn + self.add_commits(repo_schema) + + def walk(self): + """(Generator) Walks the repo schema from roots to tips. + + Generates GitRepoSchema.COMMIT objects for each commit. + + Throws an AssertionError if it detects a cycle. + """ + is_root = True + par_map = copy.deepcopy(self.par_map) + while par_map: + empty_keys = set(k for k, v in par_map.iteritems() if not v) + assert empty_keys, 'Cycle detected! %s' % par_map + + for k in sorted(empty_keys): + yield self.COMMIT(k, self.par_map[k], + not any(k in v for v in self.par_map.itervalues()), + is_root) + del par_map[k] + for v in par_map.itervalues(): + v.difference_update(empty_keys) + is_root = False + + def add_commits(self, schema): + """Adds more commits from a schema into the existing Schema. + + Args: + schema (str) - See class docstring for info on schema format. + + Throws an AssertionError if it detects a cycle. + """ + for commits in (l.split() for l in schema.splitlines() if l.strip()): + parent = None + for commit in commits: + if commit not in self.par_map: + self.par_map[commit] = OrderedSet() + if parent is not None: + self.par_map[commit].add(parent) + parent = commit + if parent and not self.master: + self.master = parent + for _ in self.walk(): # This will throw if there are any cycles. + pass + + def reify(self): + """Returns a real GitRepo for this GitRepoSchema""" + return GitRepo(self) + + def data_for(self, commit): + """Obtains the data for |commit|. + + See the docstring on the GitRepo class for the format of the returned data. + + Caches the result on this GitRepoSchema instance. + """ + if commit not in self.data_cache: + self.data_cache[commit] = self.content_fn(commit) + return self.data_cache[commit] + + +class GitRepo(object): + """Creates a real git repo for a GitRepoSchema. + + Obtains schema and content information from the GitRepoSchema. + + The format for the commit data supplied by GitRepoSchema.data_for is: + { + SPECIAL_KEY: special_value, + ... + "path/to/some/file": { 'data': "some data content for this file", + 'mode': 0755 }, + ... + } + + The SPECIAL_KEYs are the following attribues of the GitRepo class: + * AUTHOR_NAME + * AUTHOR_EMAIL + * AUTHOR_DATE - must be a datetime.datetime instance + * COMMITTER_NAME + * COMMITTER_EMAIL + * COMMITTER_DATE - must be a datetime.datetime instance + + For file content, if 'data' is None, then this commit will `git rm` that file. + """ + BASE_TEMP_DIR = tempfile.mkdtemp(suffix='base', prefix='git_repo') + atexit.register(shutil.rmtree, BASE_TEMP_DIR) + + # Singleton objects to specify specific data in a commit dictionary. + AUTHOR_NAME = object() + AUTHOR_EMAIL = object() + AUTHOR_DATE = object() + COMMITTER_NAME = object() + COMMITTER_EMAIL = object() + COMMITTER_DATE = object() + + DEFAULT_AUTHOR_NAME = 'Author McAuthorly' + DEFAULT_AUTHOR_EMAIL = 'author@example.com' + DEFAULT_COMMITTER_NAME = 'Charles Committish' + DEFAULT_COMMITTER_EMAIL = 'commitish@example.com' + + COMMAND_OUTPUT = collections.namedtuple('COMMAND_OUTPUT', 'retcode stdout') + + def __init__(self, schema): + """Makes new GitRepo. + + Automatically creates a temp folder under GitRepo.BASE_TEMP_DIR. It's + recommended that you clean this repo up by calling nuke() on it, but if not, + GitRepo will automatically clean up all allocated repos at the exit of the + program (assuming a normal exit like with sys.exit) + + Args: + schema - An instance of GitRepoSchema + """ + self.repo_path = tempfile.mkdtemp(dir=self.BASE_TEMP_DIR) + self.commit_map = {} + self._date = datetime.datetime(1970, 1, 1) + + self.git('init') + for commit in schema.walk(): + self._add_schema_commit(commit, schema.data_for(commit.name)) + if schema.master: + self.git('update-ref', 'master', self[schema.master]) + + def __getitem__(self, commit_name): + """Gets the hash of a commit by its schema name. + + >>> r = GitRepo(GitRepoSchema('A B C')) + >>> r['B'] + '7381febe1da03b09da47f009963ab7998a974935' + """ + return self.commit_map[commit_name] + + def _add_schema_commit(self, commit, data): + data = data or {} + + if commit.parents: + parents = list(commit.parents) + self.git('checkout', '--detach', '-q', self[parents[0]]) + if len(parents) > 1: + self.git('merge', '--no-commit', '-q', *[self[x] for x in parents[1:]]) + else: + self.git('checkout', '--orphan', 'root_%s' % commit.name) + self.git('rm', '-rf', '.') + + env = {} + for prefix in ('AUTHOR', 'COMMITTER'): + for suffix in ('NAME', 'EMAIL', 'DATE'): + singleton = '%s_%s' % (prefix, suffix) + key = getattr(self, singleton) + if key in data: + val = data[key] + else: + if suffix == 'DATE': + val = self._date + self._date += datetime.timedelta(days=1) + else: + val = getattr(self, 'DEFAULT_%s' % singleton) + env['GIT_%s' % singleton] = str(val) + + for fname, file_data in data.iteritems(): + deleted = False + if 'data' in file_data: + data = file_data.get('data') + if data is None: + deleted = True + self.git('rm', fname) + else: + path = os.path.join(self.repo_path, fname) + pardir = os.path.dirname(path) + if not os.path.exists(pardir): + os.makedirs(pardir) + with open(path, 'wb') as f: + f.write(data) + + mode = file_data.get('mode') + if mode and not deleted: + os.chmod(path, mode) + + self.git('add', fname) + + rslt = self.git('commit', '--allow-empty', '-m', commit.name, env=env) + assert rslt.retcode == 0, 'Failed to commit %s' % str(commit) + self.commit_map[commit.name] = self.git('rev-parse', 'HEAD').stdout.strip() + self.git('tag', 'tag_%s' % commit.name, self[commit.name]) + if commit.is_branch: + self.git('update-ref', 'branch_%s' % commit.name, self[commit.name]) + + def git(self, *args, **kwargs): + """Runs a git command specified by |args| in this repo.""" + assert self.repo_path is not None + try: + with open(os.devnull, 'wb') as devnull: + output = subprocess.check_output( + ('git',) + args, cwd=self.repo_path, stderr=devnull, **kwargs) + return self.COMMAND_OUTPUT(0, output) + except subprocess.CalledProcessError as e: + return self.COMMAND_OUTPUT(e.returncode, e.output) + + def nuke(self): + """Obliterates the git repo on disk. + + Causes this GitRepo to be unusable. + """ + shutil.rmtree(self.repo_path) + self.repo_path = None + + def run(self, fn, *args, **kwargs): + """Run a python function with the given args and kwargs with the cwd set to + the git repo.""" + assert self.repo_path is not None + curdir = os.getcwd() + try: + os.chdir(self.repo_path) + return fn(*args, **kwargs) + finally: + os.chdir(curdir) + + +class GitRepoSchemaTestBase(unittest.TestCase): + """A TestCase with a built-in GitRepoSchema. + + Expects a class variable REPO to be a GitRepoSchema string in the form + described by that class. + + You may also set class variables in the form COMMIT_%(commit_name)s, which + provide the content for the given commit_name commits. + + You probably will end up using either GitRepoReadOnlyTestBase or + GitRepoReadWriteTestBase for real tests. + """ + REPO = None + + @classmethod + def getRepoContent(cls, commit): + return getattr(cls, 'COMMIT_%s' % commit, None) + + @classmethod + def setUpClass(cls): + super(GitRepoSchemaTestBase, cls).setUpClass() + assert cls.REPO is not None + cls.r_schema = GitRepoSchema(cls.REPO, cls.getRepoContent) + + +class GitRepoReadOnlyTestBase(GitRepoSchemaTestBase): + """Injects a GitRepo object given the schema and content from + GitRepoSchemaTestBase into TestCase classes which subclass this. + + This GitRepo will appear as self.repo, and will be deleted and recreated once + for the duration of all the tests in the subclass. + """ + REPO = None + + @classmethod + def setUpClass(cls): + super(GitRepoReadOnlyTestBase, cls).setUpClass() + assert cls.REPO is not None + cls.repo = cls.r_schema.reify() + + @classmethod + def tearDownClass(cls): + cls.repo.nuke() + super(GitRepoReadOnlyTestBase, cls).tearDownClass() + + +class GitRepoReadWriteTestBase(GitRepoSchemaTestBase): + """Injects a GitRepo object given the schema and content from + GitRepoSchemaTestBase into TestCase classes which subclass this. + + This GitRepo will appear as self.repo, and will be deleted and recreated for + each test function in the subclass. + """ + REPO = None + + def setUp(self): + super(GitRepoReadWriteTestBase, self).setUp() + self.repo = self.r_schema.reify() + + def tearDown(self): + self.repo.nuke() + super(GitRepoReadWriteTestBase, self).tearDown() diff --git a/tests/git_common_test.py b/tests/git_common_test.py new file mode 100755 index 000000000..8b961340e --- /dev/null +++ b/tests/git_common_test.py @@ -0,0 +1,281 @@ +#!/usr/bin/env python +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Unit tests for git_common.py""" + +import binascii +import collections +import os +import signal +import sys +import tempfile +import time +import unittest + +DEPOT_TOOLS_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, DEPOT_TOOLS_ROOT) + +from testing_support import coverage_utils +from testing_support import git_test_utils + + +class GitCommonTestBase(unittest.TestCase): + @classmethod + def setUpClass(cls): + super(GitCommonTestBase, cls).setUpClass() + import git_common + cls.gc = git_common + + +class Support(GitCommonTestBase): + def _testMemoizeOneBody(self, threadsafe): + calls = collections.defaultdict(int) + def double_if_even(val): + calls[val] += 1 + return val * 2 if val % 2 == 0 else None + # Use this explicitly as a wrapper fn instead of a decorator. Otherwise + # pylint crashes (!!) + double_if_even = self.gc.memoize_one(threadsafe=threadsafe)(double_if_even) + + self.assertEqual(4, double_if_even(2)) + self.assertEqual(4, double_if_even(2)) + self.assertEqual(None, double_if_even(1)) + self.assertEqual(None, double_if_even(1)) + self.assertDictEqual({1: 2, 2: 1}, calls) + + double_if_even.set(10, 20) + self.assertEqual(20, double_if_even(10)) + self.assertDictEqual({1: 2, 2: 1}, calls) + + double_if_even.clear() + self.assertEqual(4, double_if_even(2)) + self.assertEqual(4, double_if_even(2)) + self.assertEqual(None, double_if_even(1)) + self.assertEqual(None, double_if_even(1)) + self.assertEqual(20, double_if_even(10)) + self.assertDictEqual({1: 4, 2: 2, 10: 1}, calls) + + def testMemoizeOne(self): + self._testMemoizeOneBody(threadsafe=False) + + def testMemoizeOneThreadsafe(self): + self._testMemoizeOneBody(threadsafe=True) + + +def slow_square(i): + """Helper for ScopedPoolTest. + + Must be global because non top-level functions aren't pickleable. + """ + return i ** 2 + + +class ScopedPoolTest(GitCommonTestBase): + CTRL_C = signal.CTRL_C_EVENT if sys.platform == 'win32' else signal.SIGINT + + def testThreads(self): + result = [] + with self.gc.ScopedPool(kind='threads') as pool: + result = list(pool.imap(slow_square, xrange(10))) + self.assertEqual([0, 1, 4, 9, 16, 25, 36, 49, 64, 81], result) + + def testThreadsCtrlC(self): + result = [] + with self.assertRaises(KeyboardInterrupt): + with self.gc.ScopedPool(kind='threads') as pool: + # Make sure this pool is interrupted in mid-swing + for i in pool.imap(slow_square, xrange(1000000)): + if i > 32: + os.kill(os.getpid(), self.CTRL_C) + result.append(i) + self.assertEqual([0, 1, 4, 9, 16, 25], result) + + def testProcs(self): + result = [] + with self.gc.ScopedPool() as pool: + result = list(pool.imap(slow_square, xrange(10))) + self.assertEqual([0, 1, 4, 9, 16, 25, 36, 49, 64, 81], result) + + def testProcsCtrlC(self): + result = [] + with self.assertRaises(KeyboardInterrupt): + with self.gc.ScopedPool() as pool: + # Make sure this pool is interrupted in mid-swing + for i in pool.imap(slow_square, xrange(1000000)): + if i > 32: + os.kill(os.getpid(), self.CTRL_C) + result.append(i) + self.assertEqual([0, 1, 4, 9, 16, 25], result) + + +class ProgressPrinterTest(GitCommonTestBase): + class FakeStream(object): + def __init__(self): + self.data = set() + self.count = 0 + + def write(self, line): + self.data.add(line) + + def flush(self): + self.count += 1 + + @unittest.expectedFailure + def testBasic(self): + """This test is probably racy, but I don't have a better alternative.""" + fmt = '%(count)d/10' + stream = self.FakeStream() + + pp = self.gc.ProgressPrinter(fmt, enabled=True, stream=stream, period=0.01) + with pp as inc: + for _ in xrange(10): + time.sleep(0.02) + inc() + + filtered = set(x.strip() for x in stream.data) + rslt = set(fmt % {'count': i} for i in xrange(11)) + self.assertSetEqual(filtered, rslt) + self.assertGreaterEqual(stream.count, 10) + + +class GitReadOnlyFunctionsTest(git_test_utils.GitRepoReadOnlyTestBase, + GitCommonTestBase): + REPO = """ + A B C D + B E D + """ + + COMMIT_A = { + 'some/files/file1': {'data': 'file1'}, + 'some/files/file2': {'data': 'file2'}, + 'some/files/file3': {'data': 'file3'}, + 'some/other/file': {'data': 'otherfile'}, + } + + COMMIT_C = { + 'some/files/file2': { + 'mode': 0755, + 'data': 'file2 - vanilla'}, + } + + COMMIT_E = { + 'some/files/file2': {'data': 'file2 - merged'}, + } + + COMMIT_D = { + 'some/files/file2': {'data': 'file2 - vanilla\nfile2 - merged'}, + } + + def testHashes(self): + ret = self.repo.run( + self.gc.hashes, *[ + 'master', + 'master~3', + self.repo['E']+'~', + self.repo['D']+'^2', + 'tag_C^{}', + ] + ) + self.assertEqual([ + self.repo['D'], + self.repo['A'], + self.repo['B'], + self.repo['E'], + self.repo['C'], + ], ret) + + def testParseCommitrefs(self): + ret = self.repo.run( + self.gc.parse_commitrefs, *[ + 'master', + 'master~3', + self.repo['E']+'~', + self.repo['D']+'^2', + 'tag_C^{}', + ] + ) + self.assertEqual(ret, map(binascii.unhexlify, [ + self.repo['D'], + self.repo['A'], + self.repo['B'], + self.repo['E'], + self.repo['C'], + ])) + + with self.assertRaisesRegexp(Exception, r"one of \('master', 'bananas'\)"): + self.repo.run(self.gc.parse_commitrefs, 'master', 'bananas') + + def testTree(self): + tree = self.repo.run(self.gc.tree, 'master:some/files') + file1 = self.COMMIT_A['some/files/file1']['data'] + file2 = self.COMMIT_D['some/files/file2']['data'] + file3 = self.COMMIT_A['some/files/file3']['data'] + self.assertEquals( + tree['file1'], + ('100644', 'blob', git_test_utils.git_hash_data(file1))) + self.assertEquals( + tree['file2'], + ('100755', 'blob', git_test_utils.git_hash_data(file2))) + self.assertEquals( + tree['file3'], + ('100644', 'blob', git_test_utils.git_hash_data(file3))) + + tree = self.repo.run(self.gc.tree, 'master:some') + self.assertEquals(len(tree), 2) + # Don't check the tree hash because we're lazy :) + self.assertEquals(tree['files'][:2], ('040000', 'tree')) + + tree = self.repo.run(self.gc.tree, 'master:wat') + self.assertEqual(tree, None) + + def testTreeRecursive(self): + tree = self.repo.run(self.gc.tree, 'master:some', recurse=True) + file1 = self.COMMIT_A['some/files/file1']['data'] + file2 = self.COMMIT_D['some/files/file2']['data'] + file3 = self.COMMIT_A['some/files/file3']['data'] + other = self.COMMIT_A['some/other/file']['data'] + self.assertEquals( + tree['files/file1'], + ('100644', 'blob', git_test_utils.git_hash_data(file1))) + self.assertEquals( + tree['files/file2'], + ('100755', 'blob', git_test_utils.git_hash_data(file2))) + self.assertEquals( + tree['files/file3'], + ('100644', 'blob', git_test_utils.git_hash_data(file3))) + self.assertEquals( + tree['other/file'], + ('100644', 'blob', git_test_utils.git_hash_data(other))) + + +class GitMutableFunctionsTest(git_test_utils.GitRepoReadWriteTestBase, + GitCommonTestBase): + REPO = '' + + def _intern_data(self, data): + with tempfile.TemporaryFile() as f: + f.write(data) + f.seek(0) + return self.repo.run(self.gc.intern_f, f) + + def testInternF(self): + data = 'CoolBobcatsBro' + data_hash = self._intern_data(data) + self.assertEquals(git_test_utils.git_hash_data(data), data_hash) + self.assertEquals(data, self.repo.git('cat-file', 'blob', data_hash).stdout) + + def testMkTree(self): + tree = {} + for i in 1, 2, 3: + name = 'file%d' % i + tree[name] = ('100644', 'blob', self._intern_data(name)) + tree_hash = self.repo.run(self.gc.mktree, tree) + self.assertEquals('37b61866d6e061c4ba478e7eb525be7b5752737d', tree_hash) + + +if __name__ == '__main__': + sys.exit(coverage_utils.covered_main( + os.path.join(DEPOT_TOOLS_ROOT, 'git_common.py') + )) diff --git a/tests/git_number_test.py b/tests/git_number_test.py new file mode 100755 index 000000000..789eddd31 --- /dev/null +++ b/tests/git_number_test.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# Copyright 2013 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Unit tests for git_number.py""" + +import binascii +import os +import sys + +DEPOT_TOOLS_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, DEPOT_TOOLS_ROOT) + +from testing_support import git_test_utils +from testing_support import coverage_utils + + +class Basic(git_test_utils.GitRepoReadWriteTestBase): + REPO = """ + A B C D E + B F E + X Y E + """ + + @classmethod + def setUpClass(cls): + super(Basic, cls).setUpClass() + import git_number + cls.gn = git_number + cls.old_POOL_KIND = cls.gn.POOL_KIND + cls.gn.POOL_KIND = 'threads' + + @classmethod + def tearDownClass(cls): + cls.gn.POOL_KIND = cls.old_POOL_KIND + super(Basic, cls).tearDownClass() + + def tearDown(self): + self.gn.clear_caches() + super(Basic, self).tearDown() + + def _git_number(self, refs, cache=False): + refs = map(binascii.unhexlify, refs) + self.repo.run(self.gn.load_generation_numbers, refs) + if cache: + self.repo.run(self.gn.finalize, refs) + return map(self.gn.get_num, refs) + + def testBasic(self): + self.assertEqual([0], self._git_number([self.repo['A']])) + self.assertEqual([2], self._git_number([self.repo['F']])) + self.assertEqual([0], self._git_number([self.repo['X']])) + self.assertEqual([4], self._git_number([self.repo['E']])) + + def testInProcessCache(self): + self.assertEqual( + None, + self.repo.run(self.gn.get_num, binascii.unhexlify(self.repo['A']))) + self.assertEqual([4], self._git_number([self.repo['E']])) + self.assertEqual( + 0, + self.repo.run(self.gn.get_num, binascii.unhexlify(self.repo['A']))) + + def testOnDiskCache(self): + self.assertEqual( + None, + self.repo.run(self.gn.get_num, binascii.unhexlify(self.repo['A']))) + self.assertEqual([4], self._git_number([self.repo['E']], cache=True)) + self.assertEqual([4], self._git_number([self.repo['E']], cache=True)) + self.gn.clear_caches() + self.assertEqual( + 0, + self.repo.run(self.gn.get_num, binascii.unhexlify(self.repo['A']))) + self.gn.clear_caches() + self.repo.run(self.gn.clear_caches, True) + self.assertEqual( + None, + self.repo.run(self.gn.get_num, binascii.unhexlify(self.repo['A']))) + + +if __name__ == '__main__': + sys.exit(coverage_utils.covered_main( + os.path.join(DEPOT_TOOLS_ROOT, 'git_number.py'), + '3.7' + )) \ No newline at end of file