From 819375653b8ac2d747ec5dd246ab96ad6130f3e7 Mon Sep 17 00:00:00 2001
From: "mgiuca@chromium.org"
Date: Wed, 3 Feb 2016 08:00:53 +0000
Subject: [PATCH] Added git hyper-blame, a tool that skips unwanted commits in
git blame.
Currently, the script requires you to pass the unwanted commits on the
command line, but eventually, you could configure it with a file
(checked into the repo) that provides a fixed set of commits to always
skip (such as commits that do a huge amount of renaming and nothing
else).
BUG=574290
Review URL: https://codereview.chromium.org/1559943003
git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@298544 0039d316-1c4b-4281-b951-d872f2087c98
---
git-hyper-blame | 6 +
git_common.py | 39 +
git_dates.py | 62 ++
git_hyper_blame.py | 266 +++++++
man/html/depot_tools.html | 10 +-
man/html/git-hyper-blame.html | 871 +++++++++++++++++++++++
man/man1/git-hyper-blame.1 | 142 ++++
man/man7/depot_tools.7 | 13 +-
man/src/_git-hyper-blame_desc.helper.txt | 1 +
man/src/git-hyper-blame.demo.1.sh | 3 +
man/src/git-hyper-blame.demo.2.sh | 4 +
man/src/git-hyper-blame.demo.common.sh | 57 ++
man/src/git-hyper-blame.txt | 72 ++
tests/git_common_test.py | 51 +-
tests/git_dates_test.py | 78 ++
tests/git_hyper_blame_test.py | 367 ++++++++++
16 files changed, 2034 insertions(+), 8 deletions(-)
create mode 100755 git-hyper-blame
create mode 100644 git_dates.py
create mode 100755 git_hyper_blame.py
create mode 100644 man/html/git-hyper-blame.html
create mode 100644 man/man1/git-hyper-blame.1
create mode 100644 man/src/_git-hyper-blame_desc.helper.txt
create mode 100755 man/src/git-hyper-blame.demo.1.sh
create mode 100755 man/src/git-hyper-blame.demo.2.sh
create mode 100755 man/src/git-hyper-blame.demo.common.sh
create mode 100644 man/src/git-hyper-blame.txt
create mode 100755 tests/git_dates_test.py
create mode 100755 tests/git_hyper_blame_test.py
diff --git a/git-hyper-blame b/git-hyper-blame
new file mode 100755
index 000000000..4abb969a8
--- /dev/null
+++ b/git-hyper-blame
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+. $(type -P python_runner.sh)
diff --git a/git_common.py b/git_common.py
index 506365257..d571f820b 100644
--- a/git_common.py
+++ b/git_common.py
@@ -281,6 +281,16 @@ def once(function):
## Git functions
+def blame(filename, revision=None, porcelain=False, *args):
+ command = ['blame']
+ if porcelain:
+ command.append('-p')
+ if revision is not None:
+ command.append(revision)
+ command.extend(['--', filename])
+ return run(*command)
+
+
def branch_config(branch, option, default=None):
return config('branch.%s.%s' % (branch, option), default=default)
@@ -546,10 +556,39 @@ def remove_merge_base(branch):
del_branch_config(branch, 'base-upstream')
+def repo_root():
+ """Returns the absolute path to the repository root."""
+ return run('rev-parse', '--show-toplevel')
+
+
def root():
return config('depot-tools.upstream', 'origin/master')
+@contextlib.contextmanager
+def less(): # pragma: no cover
+ """Runs 'less' as context manager yielding its stdin as a PIPE.
+
+ Automatically checks if sys.stdout is a non-TTY stream. If so, it avoids
+ running less and just yields sys.stdout.
+ """
+ if not sys.stdout.isatty():
+ yield sys.stdout
+ return
+
+ # Run with the same options that git uses (see setup_pager in git repo).
+ # -F: Automatically quit if the output is less than one screen.
+ # -R: Don't escape ANSI color codes.
+ # -X: Don't clear the screen before starting.
+ cmd = ('less', '-FRX')
+ try:
+ proc = subprocess2.Popen(cmd, stdin=subprocess2.PIPE)
+ yield proc.stdin
+ finally:
+ proc.stdin.close()
+ proc.wait()
+
+
def run(*cmd, **kwargs):
"""The same as run_with_stderr, except it only returns stdout."""
return run_with_stderr(*cmd, **kwargs)[0]
diff --git a/git_dates.py b/git_dates.py
new file mode 100644
index 000000000..41cf3acf9
--- /dev/null
+++ b/git_dates.py
@@ -0,0 +1,62 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Utility module for dealing with Git timestamps."""
+
+import datetime
+
+
+def timestamp_offset_to_datetime(timestamp, offset):
+ """Converts a timestamp + offset into a datetime.datetime.
+
+ Useful for dealing with the output of porcelain commands, which provide times
+ as timestamp and offset strings.
+
+ Args:
+ timestamp: An int UTC timestamp, or a string containing decimal digits.
+ offset: A str timezone offset. e.g., '-0800'.
+
+ Returns:
+ A tz-aware datetime.datetime for this timestamp.
+ """
+ timestamp = int(timestamp)
+ tz = FixedOffsetTZ.from_offset_string(offset)
+ return datetime.datetime.fromtimestamp(timestamp, tz)
+
+
+def datetime_string(dt):
+ """Converts a tz-aware datetime.datetime into a string in git format."""
+ return dt.strftime('%Y-%m-%d %H:%M:%S %z')
+
+
+# Adapted from: https://docs.python.org/2/library/datetime.html#tzinfo-objects
+class FixedOffsetTZ(datetime.tzinfo):
+ def __init__(self, offset, name):
+ datetime.tzinfo.__init__(self)
+ self.__offset = offset
+ self.__name = name
+
+ def __repr__(self): # pragma: no cover
+ return '{}({!r}, {!r})'.format(type(self).__name__, self.__offset,
+ self.__name)
+
+ @classmethod
+ def from_offset_string(cls, offset):
+ try:
+ hours = int(offset[:-2])
+ minutes = int(offset[-2:])
+ except ValueError:
+ return cls(datetime.timedelta(0), 'UTC')
+
+ delta = datetime.timedelta(hours=hours, minutes=minutes)
+ return cls(delta, offset)
+
+ def utcoffset(self, dt):
+ return self.__offset
+
+ def tzname(self, dt):
+ return self.__name
+
+ def dst(self, dt):
+ return datetime.timedelta(0)
diff --git a/git_hyper_blame.py b/git_hyper_blame.py
new file mode 100755
index 000000000..17424511a
--- /dev/null
+++ b/git_hyper_blame.py
@@ -0,0 +1,266 @@
+#!/usr/bin/env python
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Wrapper around git blame that ignores certain commits.
+"""
+
+from __future__ import print_function
+
+import argparse
+import collections
+import logging
+import os
+import subprocess2
+import sys
+
+import git_common
+import git_dates
+
+
+logging.getLogger().setLevel(logging.INFO)
+
+
+class Commit(object):
+ """Info about a commit."""
+ def __init__(self, commithash):
+ self.commithash = commithash
+ self.author = None
+ self.author_mail = None
+ self.author_time = None
+ self.author_tz = None
+ self.committer = None
+ self.committer_mail = None
+ self.committer_time = None
+ self.committer_tz = None
+ self.summary = None
+ self.boundary = None
+ self.previous = None
+ self.filename = None
+
+ def __repr__(self): # pragma: no cover
+ return '' % self.commithash
+
+
+BlameLine = collections.namedtuple(
+ 'BlameLine',
+ 'commit context lineno_then lineno_now modified')
+
+
+def parse_blame(blameoutput):
+ """Parses the output of git blame -p into a data structure."""
+ lines = blameoutput.split('\n')
+ i = 0
+ commits = {}
+
+ while i < len(lines):
+ # Read a commit line and parse it.
+ line = lines[i]
+ i += 1
+ if not line.strip():
+ continue
+ commitline = line.split()
+ commithash = commitline[0]
+ lineno_then = int(commitline[1])
+ lineno_now = int(commitline[2])
+
+ try:
+ commit = commits[commithash]
+ except KeyError:
+ commit = Commit(commithash)
+ commits[commithash] = commit
+
+ # Read commit details until we find a context line.
+ while i < len(lines):
+ line = lines[i]
+ i += 1
+ if line.startswith('\t'):
+ break
+
+ try:
+ key, value = line.split(' ', 1)
+ except ValueError:
+ key = line
+ value = True
+ setattr(commit, key.replace('-', '_'), value)
+
+ context = line[1:]
+
+ yield BlameLine(commit, context, lineno_then, lineno_now, False)
+
+
+def print_table(table, colsep=' ', rowsep='\n', align=None, out=sys.stdout):
+ """Print a 2D rectangular array, aligning columns with spaces.
+
+ Args:
+ align: Optional string of 'l' and 'r', designating whether each column is
+ left- or right-aligned. Defaults to left aligned.
+ """
+ if len(table) == 0:
+ return
+
+ colwidths = None
+ for row in table:
+ if colwidths is None:
+ colwidths = [len(x) for x in row]
+ else:
+ colwidths = [max(colwidths[i], len(x)) for i, x in enumerate(row)]
+
+ if align is None: # pragma: no cover
+ align = 'l' * len(colwidths)
+
+ for row in table:
+ cells = []
+ for i, cell in enumerate(row):
+ padding = ' ' * (colwidths[i] - len(cell))
+ if align[i] == 'r':
+ cell = padding + cell
+ elif i < len(row) - 1:
+ # Do not pad the final column if left-aligned.
+ cell += padding
+ cells.append(cell)
+ try:
+ print(*cells, sep=colsep, end=rowsep, file=out)
+ except IOError: # pragma: no cover
+ # Can happen on Windows if the pipe is closed early.
+ pass
+
+
+def pretty_print(parsedblame, show_filenames=False, out=sys.stdout):
+ """Pretty-prints the output of parse_blame."""
+ table = []
+ for line in parsedblame:
+ author_time = git_dates.timestamp_offset_to_datetime(
+ line.commit.author_time, line.commit.author_tz)
+ row = [line.commit.commithash[:8],
+ '(' + line.commit.author,
+ git_dates.datetime_string(author_time),
+ str(line.lineno_now) + ('*' if line.modified else '') + ')',
+ line.context]
+ if show_filenames:
+ row.insert(1, line.commit.filename)
+ table.append(row)
+ print_table(table, align='llllrl' if show_filenames else 'lllrl', out=out)
+
+
+def get_parsed_blame(filename, revision='HEAD'):
+ blame = git_common.blame(filename, revision=revision, porcelain=True)
+ return list(parse_blame(blame))
+
+
+def hyper_blame(ignored, filename, revision='HEAD', out=sys.stdout,
+ err=sys.stderr):
+ # Map from commit to parsed blame from that commit.
+ blame_from = {}
+
+ def cache_blame_from(filename, commithash):
+ try:
+ return blame_from[commithash]
+ except KeyError:
+ parsed = get_parsed_blame(filename, commithash)
+ blame_from[commithash] = parsed
+ return parsed
+
+ try:
+ parsed = cache_blame_from(filename, git_common.hash_one(revision))
+ except subprocess2.CalledProcessError as e:
+ err.write(e.stderr)
+ return e.returncode
+
+ new_parsed = []
+
+ # We don't show filenames in blame output unless we have to.
+ show_filenames = False
+
+ for line in parsed:
+ # If a line references an ignored commit, blame that commit's parent
+ # repeatedly until we find a non-ignored commit.
+ while line.commit.commithash in ignored:
+ if line.commit.previous is None:
+ # You can't ignore the commit that added this file.
+ break
+
+ previouscommit, previousfilename = line.commit.previous.split(' ', 1)
+ parent_blame = cache_blame_from(previousfilename, previouscommit)
+
+ if len(parent_blame) == 0:
+ # The previous version of this file was empty, therefore, you can't
+ # ignore this commit.
+ break
+
+ # line.lineno_then is the line number in question at line.commit.
+ # TODO(mgiuca): This will be incorrect if line.commit added or removed
+ # lines. Translate that line number so that it refers to the position of
+ # the same line on previouscommit.
+ lineno_previous = line.lineno_then
+ logging.debug('ignore commit %s on line p%d/t%d/n%d',
+ line.commit.commithash, lineno_previous, line.lineno_then,
+ line.lineno_now)
+
+ # Get the line at lineno_previous in the parent commit.
+ assert lineno_previous > 0
+ try:
+ newline = parent_blame[lineno_previous - 1]
+ except IndexError:
+ # lineno_previous is a guess, so it may be past the end of the file.
+ # Just grab the last line in the file.
+ newline = parent_blame[-1]
+
+ # Replace the commit and lineno_then, but not the lineno_now or context.
+ logging.debug(' replacing with %r', newline)
+ line = BlameLine(newline.commit, line.context, lineno_previous,
+ line.lineno_now, True)
+
+ # If any line has a different filename to the file's current name, turn on
+ # filename display for the entire blame output.
+ if line.commit.filename != filename:
+ show_filenames = True
+
+ new_parsed.append(line)
+
+ pretty_print(new_parsed, show_filenames=show_filenames, out=out)
+
+ return 0
+
+def main(args, stdout=sys.stdout, stderr=sys.stderr):
+ parser = argparse.ArgumentParser(
+ prog='git hyper-blame',
+ description='git blame with support for ignoring certain commits.')
+ parser.add_argument('-i', metavar='REVISION', action='append', dest='ignored',
+ default=[], help='a revision to ignore')
+ parser.add_argument('revision', nargs='?', default='HEAD', metavar='REVISION',
+ help='revision to look at')
+ parser.add_argument('filename', metavar='FILE', help='filename to blame')
+
+ args = parser.parse_args(args)
+ try:
+ repo_root = git_common.repo_root()
+ except subprocess2.CalledProcessError as e:
+ stderr.write(e.stderr)
+ return e.returncode
+
+ # Make filename relative to the repository root, and cd to the root dir (so
+ # all filenames throughout this script are relative to the root).
+ filename = os.path.relpath(args.filename, repo_root)
+ os.chdir(repo_root)
+
+ # Normalize filename so we can compare it to other filenames git gives us.
+ filename = os.path.normpath(filename)
+ filename = os.path.normcase(filename)
+
+ ignored = set()
+ for c in args.ignored:
+ try:
+ ignored.add(git_common.hash_one(c))
+ except subprocess2.CalledProcessError as e:
+ # Custom error message (the message from git-rev-parse is inappropriate).
+ stderr.write('fatal: unknown revision \'%s\'.\n' % c)
+ return e.returncode
+
+ return hyper_blame(ignored, filename, args.revision, out=stdout, err=stderr)
+
+
+if __name__ == '__main__': # pragma: no cover
+ with git_common.less() as less_input:
+ sys.exit(main(sys.argv[1:], stdout=less_input))
diff --git a/man/html/depot_tools.html b/man/html/depot_tools.html
index d690e14a4..8676c0c3d 100644
--- a/man/html/depot_tools.html
+++ b/man/html/depot_tools.html
@@ -804,6 +804,14 @@ Freeze all changes on a branch (indexed and unindexed).
+git-hyper-blame(1)
+
+
+
+Like git blame, but with the ability to ignore or bypass certain commits.
+
+
+
git-map-branches(1)
@@ -919,7 +927,7 @@ Print a diff of the current branch, compared to its upstream.
+
+
+
+
SYNOPSIS
+
+
+
git hyper-blame [-i <rev> [-i <rev> …]] [<rev>] [--] <file>
+
+
+
+
+
+
DESCRIPTION
+
+
git hyper-blame
is like git blame
but it can ignore or "look through" a
+given set of commits, to find the real culprit.
+
This is useful if you have a commit that makes sweeping changes that are
+unlikely to be what you are looking for in a blame, such as mass reformatting or
+renaming. By adding these commits to the hyper-blame ignore list, git
+hyper-blame
will look past these commits to find the previous commit that
+touched a given line.
+
Follows the normal blame
syntax: annotates <file>
with the revision that
+last modified each line. Optional <rev>
specifies the revision of <file>
to
+start from.
+
+
+
+
OPTIONS
+
+
+-
+-i <rev>
+
+-
+
+ A revision to ignore. Can be specified as many times as needed.
+
+
+
+
+
+
+
EXAMPLE
+
+
Let’s run git blame
on a file:
+
$ git blame ipsum.txt
+c6eb3bfa (lorem 2014-08-11 23:15:57 +0000 1) LOREM IPSUM DOLOR SIT AMET, CONSECTETUR
+3ddda43c (auto-uppercaser 2014-07-05 02:05:18 +0000 2) ADIPISCING ELIT, SED DO EIUSMOD TEMPOR
+3ddda43c (auto-uppercaser 2014-07-05 02:05:18 +0000 3) INCIDIDUNT UT LABORE ET DOLORE MAGNA
+3ddda43c (auto-uppercaser 2014-07-05 02:05:18 +0000 4) ALIQUA. UT ENIM AD MINIM VENIAM, QUIS
+c6eb3bfa (lorem 2014-08-11 23:15:57 +0000 5) NOSTRUD EXERCITATION ULLAMCO LABORIS
+3ddda43c (auto-uppercaser 2014-07-05 02:05:18 +0000 6) NISI UT ALIQUIP EX EA COMMODO CONSEQUAT.
+
+
Notice that almost the entire file has been blamed on a formatting change? You
+aren’t interested in the uppercasing of the file. You want to know who
+wrote/modified those lines in the first place. Just tell hyper-blame
to ignore
+that commit:
+
$ git hyper-blame -i 3ddda43c ipsum.txt
+c6eb3bfa (lorem 2014-08-11 23:15:57 +0000 1) LOREM IPSUM DOLOR SIT AMET, CONSECTETUR
+134200d1 (lorem 2014-04-10 08:54:46 +0000 2*) ADIPISCING ELIT, SED DO EIUSMOD TEMPOR
+a34a1d0d (ipsum 2014-04-11 11:25:04 +0000 3*) INCIDIDUNT UT LABORE ET DOLORE MAGNA
+134200d1 (lorem 2014-04-10 08:54:46 +0000 4*) ALIQUA. UT ENIM AD MINIM VENIAM, QUIS
+c6eb3bfa (lorem 2014-08-11 23:15:57 +0000 5) NOSTRUD EXERCITATION ULLAMCO LABORIS
+0f0d17bd (dolor 2014-06-02 11:31:48 +0000 6*) NISI UT ALIQUIP EX EA COMMODO CONSEQUAT.
+
+
hyper-blame
places a *
next to any line where it has skipped over an ignored
+commit, so you know that the line in question has been changed (by an ignored
+commit) since the given person wrote it.
+
+
+
+
BUGS
+
+
+-
+
+When a commit is ignored, hyper-blame currently just blames the same line in
+ the previous version of the file. This can be wildly inaccurate if the ignored
+ commit adds or removes lines, resulting in a completely wrong commit being
+ blamed.
+
+
+-
+
+There is currently no way to pass the ignore list as a file.
+
+
+-
+
+It should be possible for a git repository to configure an automatic list of
+ commits to ignore (like .gitignore
), so that project owners can maintain a
+ list of "big change" commits that are ignored by hyper-blame by default.
+
+
+
+
+
+
+
+
+
+
Part of the chromium depot_tools(7) suite. These tools are meant to
+assist with the development of chromium and related projects. Download the tools
+from here.
+
+
+
+
+
+
diff --git a/man/html/git-hyper-blame.html b/man/html/git-hyper-blame.html
new file mode 100644
index 000000000..a08de12f5
--- /dev/null
+++ b/man/html/git-hyper-blame.html
@@ -0,0 +1,871 @@
+
+
+