presubmit_support: Call git once per change, to get a diff for all files. Parse this result to generate per-file diffs, which go into a cache that's shared between the AffectedFile instances.

Greatly improves presubmit performance on Blink where there may be rule violations (my test case went from 50s to 5s).

BUG=236206
TEST=new test in presubmit_unittest.py; manual performance test on Mac and Windows after touching hundreds of files in webkit; testing included add/move/edit/delete on both binary and text files.

Review URL: https://chromiumcodereview.appspot.com/15898005

git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@205275 0039d316-1c4b-4281-b951-d872f2087c98
experimental/szager/collated-output
nick@chromium.org 12 years ago
parent 0bdc2650d4
commit ff526198c4

@ -318,7 +318,7 @@ class FilePatchDiff(FilePatchBase):
Expects the following format: Expects the following format:
<garbagge> <garbage>
diff --git (|a/)<filename> (|b/)<filename> diff --git (|a/)<filename> (|b/)<filename>
<similarity> <similarity>
<filemode changes> <filemode changes>

@ -328,7 +328,7 @@ def CheckLongLines(input_api, output_api, maxlen, source_file_filter=None):
SPECIAL_JAVA_STARTS = ('package ', 'import ') SPECIAL_JAVA_STARTS = ('package ', 'import ')
def no_long_lines(file_extension, line): def no_long_lines(file_extension, line):
# Allow special java statements to be as long as neccessary. # Allow special java statements to be as long as necessary.
if file_extension == 'java' and line.startswith(SPECIAL_JAVA_STARTS): if file_extension == 'java' and line.startswith(SPECIAL_JAVA_STARTS):
return True return True
@ -487,7 +487,7 @@ def GetUnitTestsInDirectory(
input_api, output_api, directory, whitelist=None, blacklist=None): input_api, output_api, directory, whitelist=None, blacklist=None):
"""Lists all files in a directory and runs them. Doesn't recurse. """Lists all files in a directory and runs them. Doesn't recurse.
It's mainly a wrapper for RunUnitTests. USe whitelist and blacklist to filter It's mainly a wrapper for RunUnitTests. Use whitelist and blacklist to filter
tests accordingly. tests accordingly.
""" """
unit_tests = [] unit_tests = []
@ -547,6 +547,7 @@ def GetUnitTests(input_api, output_api, unit_tests):
message=message_type)) message=message_type))
return results return results
def GetPythonUnitTests(input_api, output_api, unit_tests): def GetPythonUnitTests(input_api, output_api, unit_tests):
"""Run the unit tests out of process, capture the output and use the result """Run the unit tests out of process, capture the output and use the result
code to determine success. code to determine success.

@ -489,11 +489,76 @@ class InputApi(object):
return [m for m in msgs if m] return [m for m in msgs if m]
class _DiffCache(object):
"""Caches diffs retrieved from a particular SCM."""
def GetDiff(self, path, local_root):
"""Get the diff for a particular path."""
raise NotImplementedError()
class _SvnDiffCache(_DiffCache):
"""DiffCache implementation for subversion."""
def __init__(self):
super(_SvnDiffCache, self).__init__()
self._diffs_by_file = {}
def GetDiff(self, path, local_root):
if path not in self._diffs_by_file:
self._diffs_by_file[path] = scm.SVN.GenerateDiff([path], local_root,
False, None)
return self._diffs_by_file[path]
class _GitDiffCache(_DiffCache):
"""DiffCache implementation for git; gets all file diffs at once."""
def __init__(self):
super(_GitDiffCache, self).__init__()
self._diffs_by_file = None
def GetDiff(self, path, local_root):
if not self._diffs_by_file:
# Compute a single diff for all files and parse the output; should
# with git this is much faster than computing one diff for each file.
diffs = {}
# Don't specify any filenames below, because there are command line length
# limits on some platforms and GenerateDiff would fail.
unified_diff = scm.GIT.GenerateDiff(local_root, files=[], full_move=True)
# This regex matches the path twice, separated by a space. Note that
# filename itself may contain spaces.
file_marker = re.compile('^diff --git (?P<filename>.*) (?P=filename)$')
current_diff = []
keep_line_endings = True
for x in unified_diff.splitlines(keep_line_endings):
match = file_marker.match(x)
if match:
# Marks the start of a new per-file section.
diffs[match.group('filename')] = current_diff = [x]
elif x.startswith('diff --git'):
raise PresubmitFailure('Unexpected diff line: %s' % x)
else:
current_diff.append(x)
self._diffs_by_file = dict(
(normpath(path), ''.join(diff)) for path, diff in diffs.items())
if path not in self._diffs_by_file:
raise PresubmitFailure(
'Unified diff did not contain entry for file %s' % path)
return self._diffs_by_file[path]
class AffectedFile(object): class AffectedFile(object):
"""Representation of a file in a change.""" """Representation of a file in a change."""
DIFF_CACHE = _DiffCache
# Method could be a function # Method could be a function
# pylint: disable=R0201 # pylint: disable=R0201
def __init__(self, path, action, repository_root): def __init__(self, path, action, repository_root, diff_cache=None):
self._path = path self._path = path
self._action = action self._action = action
self._local_root = repository_root self._local_root = repository_root
@ -501,6 +566,10 @@ class AffectedFile(object):
self._properties = {} self._properties = {}
self._cached_changed_contents = None self._cached_changed_contents = None
self._cached_new_contents = None self._cached_new_contents = None
if diff_cache:
self._diff_cache = diff_cache
else:
self._diff_cache = self.DIFF_CACHE()
logging.debug('%s(%s)' % (self.__class__.__name__, self._path)) logging.debug('%s(%s)' % (self.__class__.__name__, self._path))
def ServerPath(self): def ServerPath(self):
@ -508,7 +577,7 @@ class AffectedFile(object):
Returns the empty string if the file does not exist in SCM. Returns the empty string if the file does not exist in SCM.
""" """
return "" return ''
def LocalPath(self): def LocalPath(self):
"""Returns the path of this file on the local disk relative to client root. """Returns the path of this file on the local disk relative to client root.
@ -565,22 +634,6 @@ class AffectedFile(object):
pass # File not found? That's fine; maybe it was deleted. pass # File not found? That's fine; maybe it was deleted.
return self._cached_new_contents[:] return self._cached_new_contents[:]
def OldContents(self):
"""Returns an iterator over the lines in the old version of file.
The old version is the file in depot, i.e. the "left hand side".
"""
raise NotImplementedError() # Implement when needed
def OldFileTempPath(self):
"""Returns the path on local disk where the old contents resides.
The old version is the file in depot, i.e. the "left hand side".
This is a read-only cached copy of the old contents. *DO NOT* try to
modify this file.
"""
raise NotImplementedError() # Implement if/when needed.
def ChangedContents(self): def ChangedContents(self):
"""Returns a list of tuples (line number, line text) of all new lines. """Returns a list of tuples (line number, line text) of all new lines.
@ -612,7 +665,7 @@ class AffectedFile(object):
return self.LocalPath() return self.LocalPath()
def GenerateScmDiff(self): def GenerateScmDiff(self):
raise NotImplementedError() # Implemented in derived classes. return self._diff_cache.GetDiff(self.LocalPath(), self._local_root)
class SvnAffectedFile(AffectedFile): class SvnAffectedFile(AffectedFile):
@ -620,11 +673,12 @@ class SvnAffectedFile(AffectedFile):
# Method 'NNN' is abstract in class 'NNN' but is not overridden # Method 'NNN' is abstract in class 'NNN' but is not overridden
# pylint: disable=W0223 # pylint: disable=W0223
DIFF_CACHE = _SvnDiffCache
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
AffectedFile.__init__(self, *args, **kwargs) AffectedFile.__init__(self, *args, **kwargs)
self._server_path = None self._server_path = None
self._is_text_file = None self._is_text_file = None
self._diff = None
def ServerPath(self): def ServerPath(self):
if self._server_path is None: if self._server_path is None:
@ -664,23 +718,18 @@ class SvnAffectedFile(AffectedFile):
self._is_text_file = (not mime_type or mime_type.startswith('text/')) self._is_text_file = (not mime_type or mime_type.startswith('text/'))
return self._is_text_file return self._is_text_file
def GenerateScmDiff(self):
if self._diff is None:
self._diff = scm.SVN.GenerateDiff(
[self.LocalPath()], self._local_root, False, None)
return self._diff
class GitAffectedFile(AffectedFile): class GitAffectedFile(AffectedFile):
"""Representation of a file in a change out of a git checkout.""" """Representation of a file in a change out of a git checkout."""
# Method 'NNN' is abstract in class 'NNN' but is not overridden # Method 'NNN' is abstract in class 'NNN' but is not overridden
# pylint: disable=W0223 # pylint: disable=W0223
DIFF_CACHE = _GitDiffCache
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
AffectedFile.__init__(self, *args, **kwargs) AffectedFile.__init__(self, *args, **kwargs)
self._server_path = None self._server_path = None
self._is_text_file = None self._is_text_file = None
self._diff = None
def ServerPath(self): def ServerPath(self):
if self._server_path is None: if self._server_path is None:
@ -714,12 +763,6 @@ class GitAffectedFile(AffectedFile):
self._is_text_file = os.path.isfile(self.AbsoluteLocalPath()) self._is_text_file = os.path.isfile(self.AbsoluteLocalPath())
return self._is_text_file return self._is_text_file
def GenerateScmDiff(self):
if self._diff is None:
self._diff = scm.GIT.GenerateDiff(
self._local_root, files=[self.LocalPath(),])
return self._diff
class Change(object): class Change(object):
"""Describe a change. """Describe a change.
@ -728,7 +771,7 @@ class Change(object):
tested. tested.
Instance members: Instance members:
tags: Dictionnary of KEY=VALUE pairs found in the change description. tags: Dictionary of KEY=VALUE pairs found in the change description.
self.KEY: equivalent to tags['KEY'] self.KEY: equivalent to tags['KEY']
""" """
@ -769,9 +812,10 @@ class Change(object):
assert all( assert all(
(isinstance(f, (list, tuple)) and len(f) == 2) for f in files), files (isinstance(f, (list, tuple)) and len(f) == 2) for f in files), files
diff_cache = self._AFFECTED_FILES.DIFF_CACHE()
self._affected_files = [ self._affected_files = [
self._AFFECTED_FILES(info[1], info[0].strip(), self._local_root) self._AFFECTED_FILES(path, action.strip(), self._local_root, diff_cache)
for info in files for action, path in files
] ]
def Name(self): def Name(self):

@ -147,6 +147,7 @@ def GetPreferredTrySlaves(project):
self.mox.StubOutWithMock(presubmit.gclient_utils, 'FileRead') self.mox.StubOutWithMock(presubmit.gclient_utils, 'FileRead')
self.mox.StubOutWithMock(presubmit.gclient_utils, 'FileWrite') self.mox.StubOutWithMock(presubmit.gclient_utils, 'FileWrite')
self.mox.StubOutWithMock(presubmit.scm.SVN, 'GenerateDiff') self.mox.StubOutWithMock(presubmit.scm.SVN, 'GenerateDiff')
self.mox.StubOutWithMock(presubmit.scm.GIT, 'GenerateDiff')
class PresubmitUnittest(PresubmitTestsBase): class PresubmitUnittest(PresubmitTestsBase):
@ -385,6 +386,193 @@ class PresubmitUnittest(PresubmitTestsBase):
self.assertEquals(rhs_lines[13][1], 49) self.assertEquals(rhs_lines[13][1], 49)
self.assertEquals(rhs_lines[13][2], 'this is line number 48.1') self.assertEquals(rhs_lines[13][2], 'this is line number 48.1')
def testGitChange(self):
description_lines = ('Hello there',
'this is a change',
'BUG=123',
' STORY =http://foo/ \t',
'and some more regular text \t')
unified_diff = [
'diff --git binary_a.png binary_a.png',
'new file mode 100644',
'index 0000000..6fbdd6d',
'Binary files /dev/null and binary_a.png differ',
'diff --git binary_d.png binary_d.png',
'deleted file mode 100644',
'index 6fbdd6d..0000000',
'Binary files binary_d.png and /dev/null differ',
'diff --git binary_md.png binary_md.png',
'index 6fbdd6..be3d5d8 100644',
'GIT binary patch',
'delta 109',
'zcmeyihjs5>)(Opwi4&WXB~yyi6N|G`(i5|?i<2_a@)OH5N{Um`D-<SM@g!_^W9;SR',
'zO9b*W5{pxTM0slZ=F42indK9U^MTyVQlJ2s%1BMmEKMv1Q^gtS&9nHn&*Ede;|~CU',
'CMJxLN',
'',
'delta 34',
'scmV+-0Nww+y#@BX1(1W0gkzIp3}CZh0gVZ>`wGVcgW(Rh;SK@ZPa9GXlK=n!',
'',
'diff --git binary_m.png binary_m.png',
'index 6fbdd6d..be3d5d8 100644',
'Binary files binary_m.png and binary_m.png differ',
'diff --git boo/blat.cc boo/blat.cc',
'new file mode 100644',
'index 0000000..37d18ad',
'--- boo/blat.cc',
'+++ boo/blat.cc',
'@@ -0,0 +1,5 @@',
'+This is some text',
'+which lacks a copyright warning',
'+but it is nonetheless interesting',
'+and worthy of your attention.',
'+Its freshness factor is through the roof.',
'diff --git floo/delburt.cc floo/delburt.cc',
'deleted file mode 100644',
'index e06377a..0000000',
'--- floo/delburt.cc',
'+++ /dev/null',
'@@ -1,14 +0,0 @@',
'-This text used to be here',
'-but someone, probably you,',
'-having consumed the text',
'- (absorbed its meaning)',
'-decided that it should be made to not exist',
'-that others would not read it.',
'- (What happened here?',
'-was the author incompetent?',
'-or is the world today so different from the world',
'- the author foresaw',
'-and past imaginination',
'- amounts to rubble, insignificant,',
'-something to be tripped over',
'-and frustrated by)',
'diff --git foo/TestExpectations foo/TestExpectations',
'index c6e12ab..d1c5f23 100644',
'--- foo/TestExpectations',
'+++ foo/TestExpectations',
'@@ -1,12 +1,24 @@',
'-Stranger, behold:',
'+Strange to behold:',
' This is a text',
' Its contents existed before.',
'',
'-It is written:',
'+Weasel words suggest:',
' its contents shall exist after',
' and its contents',
' with the progress of time',
' will evolve,',
'- snaillike,',
'+ erratically,',
' into still different texts',
'-from this.',
'\ No newline at end of file',
'+from this.',
'+',
'+For the most part,',
'+I really think unified diffs',
'+are elegant: the way you can type',
'+diff --git inside/a/text inside/a/text',
'+or something silly like',
'+@@ -278,6 +278,10 @@',
'+and have this not be interpreted',
'+as the start of a new file',
'+or anything messed up like that,',
'+because you parsed the header',
'+correctly.',
'\ No newline at end of file',
'']
files = [('A ', 'binary_a.png'),
('D ', 'binary_d.png'),
('M ', 'binary_m.png'),
('M ', 'binary_md.png'), # Binary w/ diff
('A ', 'boo/blat.cc'),
('D ', 'floo/delburt.cc'),
('M ', 'foo/TestExpectations')]
for op, path in files:
full_path = presubmit.os.path.join(self.fake_root_dir, *path.split('/'))
if op.startswith('D'):
os.path.exists(full_path).AndReturn(False)
else:
os.path.exists(full_path).AndReturn(False)
os.path.isfile(full_path).AndReturn(True)
presubmit.scm.GIT.GenerateDiff(self.fake_root_dir, files=[], full_move=True
).AndReturn('\n'.join(unified_diff))
self.mox.ReplayAll()
change = presubmit.GitChange(
'mychange',
'\n'.join(description_lines),
self.fake_root_dir,
files,
0,
0,
None)
self.failUnless(change.Name() == 'mychange')
self.failUnless(change.DescriptionText() ==
'Hello there\nthis is a change\nand some more regular text')
self.failUnless(change.FullDescriptionText() ==
'\n'.join(description_lines))
self.failUnless(change.BUG == '123')
self.failUnless(change.STORY == 'http://foo/')
self.failUnless(change.BLEH == None)
self.failUnless(len(change.AffectedFiles()) == 7)
self.failUnless(len(change.AffectedFiles(include_dirs=True)) == 7)
self.failUnless(len(change.AffectedFiles(include_deletes=False)) == 5)
self.failUnless(len(change.AffectedFiles(include_dirs=True,
include_deletes=False)) == 5)
# Note that on git, there's no distinction between binary files and text
# files; everything that's not a delete is a text file.
affected_text_files = change.AffectedTextFiles()
self.failUnless(len(affected_text_files) == 5)
local_paths = change.LocalPaths()
expected_paths = [os.path.normpath(f) for op, f in files]
self.assertEqual(local_paths, expected_paths)
try:
_ = change.ServerPaths()
self.fail("ServerPaths implemented.")
except NotImplementedError:
pass
actual_rhs_lines = []
for f, linenum, line in change.RightHandSideLines():
actual_rhs_lines.append((f.LocalPath(), linenum, line))
f_blat = os.path.normpath('boo/blat.cc')
f_test_expectations = os.path.normpath('foo/TestExpectations')
expected_rhs_lines = [
(f_blat, 1, 'This is some text'),
(f_blat, 2, 'which lacks a copyright warning'),
(f_blat, 3, 'but it is nonetheless interesting'),
(f_blat, 4, 'and worthy of your attention.'),
(f_blat, 5, 'Its freshness factor is through the roof.'),
(f_test_expectations, 1, 'Strange to behold:'),
(f_test_expectations, 5, 'Weasel words suggest:'),
(f_test_expectations, 10, ' erratically,'),
(f_test_expectations, 13, 'from this.'),
(f_test_expectations, 14, ''),
(f_test_expectations, 15, 'For the most part,'),
(f_test_expectations, 16, 'I really think unified diffs'),
(f_test_expectations, 17, 'are elegant: the way you can type'),
(f_test_expectations, 18, 'diff --git inside/a/text inside/a/text'),
(f_test_expectations, 19, 'or something silly like'),
(f_test_expectations, 20, '@@ -278,6 +278,10 @@'),
(f_test_expectations, 21, 'and have this not be interpreted'),
(f_test_expectations, 22, 'as the start of a new file'),
(f_test_expectations, 23, 'or anything messed up like that,'),
(f_test_expectations, 24, 'because you parsed the header'),
(f_test_expectations, 25, 'correctly.')]
self.assertEquals(expected_rhs_lines, actual_rhs_lines)
def testInvalidChange(self): def testInvalidChange(self):
try: try:
presubmit.SvnChange( presubmit.SvnChange(
@ -1251,7 +1439,8 @@ class InputApiUnittest(PresubmitTestsBase):
input_api.ReadFile(path, 'x') input_api.ReadFile(path, 'x')
def testReadFileAffectedFileDenied(self): def testReadFileAffectedFileDenied(self):
fileobj = presubmit.AffectedFile('boo', 'M', 'Unrelated') fileobj = presubmit.AffectedFile('boo', 'M', 'Unrelated',
diff_cache=mox.IsA(presubmit._DiffCache))
self.mox.ReplayAll() self.mox.ReplayAll()
change = presubmit.Change( change = presubmit.Change(
@ -1262,7 +1451,8 @@ class InputApiUnittest(PresubmitTestsBase):
self.assertRaises(IOError, input_api.ReadFile, fileobj, 'x') self.assertRaises(IOError, input_api.ReadFile, fileobj, 'x')
def testReadFileAffectedFileAccepted(self): def testReadFileAffectedFileAccepted(self):
fileobj = presubmit.AffectedFile('AA/boo', 'M', self.fake_root_dir) fileobj = presubmit.AffectedFile('AA/boo', 'M', self.fake_root_dir,
diff_cache=mox.IsA(presubmit._DiffCache))
presubmit.gclient_utils.FileRead(fileobj.AbsoluteLocalPath(), 'x' presubmit.gclient_utils.FileRead(fileobj.AbsoluteLocalPath(), 'x'
).AndReturn(None) ).AndReturn(None)
self.mox.ReplayAll() self.mox.ReplayAll()
@ -1358,19 +1548,22 @@ class OutputApiUnittest(PresubmitTestsBase):
self.failIf(output.should_continue()) self.failIf(output.should_continue())
self.failUnless(output.getvalue().count('???')) self.failUnless(output.getvalue().count('???'))
class AffectedFileUnittest(PresubmitTestsBase): class AffectedFileUnittest(PresubmitTestsBase):
def testMembersChanged(self): def testMembersChanged(self):
self.mox.ReplayAll() self.mox.ReplayAll()
members = [ members = [
'AbsoluteLocalPath', 'Action', 'ChangedContents', 'GenerateScmDiff', 'AbsoluteLocalPath', 'Action', 'ChangedContents', 'DIFF_CACHE',
'IsDirectory', 'IsTextFile', 'LocalPath', 'NewContents', 'OldContents', 'GenerateScmDiff', 'IsDirectory', 'IsTextFile', 'LocalPath',
'OldFileTempPath', 'Property', 'ServerPath', 'NewContents', 'Property', 'ServerPath',
] ]
# If this test fails, you should add the relevant test. # If this test fails, you should add the relevant test.
self.compareMembers( self.compareMembers(
presubmit.AffectedFile('a', 'b', self.fake_root_dir), members) presubmit.AffectedFile('a', 'b', self.fake_root_dir), members)
self.compareMembers( self.compareMembers(
presubmit.SvnAffectedFile('a', 'b', self.fake_root_dir), members) presubmit.SvnAffectedFile('a', 'b', self.fake_root_dir), members)
self.compareMembers(
presubmit.GitAffectedFile('a', 'b', self.fake_root_dir), members)
def testAffectedFile(self): def testAffectedFile(self):
path = presubmit.os.path.join('foo', 'blat.cc') path = presubmit.os.path.join('foo', 'blat.cc')

Loading…
Cancel
Save