presubmit_support: Call git once per change, to get a diff for all files. Parse this result to generate per-file diffs, which go into a cache that's shared between the AffectedFile instances.

Greatly improves presubmit performance on Blink where there may be rule violations (my test case went from 50s to 5s).

BUG=236206
TEST=new test in presubmit_unittest.py; manual performance test on Mac and Windows after touching hundreds of files in webkit; testing included add/move/edit/delete on both binary and text files.

Review URL: https://chromiumcodereview.appspot.com/15898005

git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@205275 0039d316-1c4b-4281-b951-d872f2087c98
experimental/szager/collated-output
nick@chromium.org 12 years ago
parent 0bdc2650d4
commit ff526198c4

@ -318,7 +318,7 @@ class FilePatchDiff(FilePatchBase):
Expects the following format:
<garbagge>
<garbage>
diff --git (|a/)<filename> (|b/)<filename>
<similarity>
<filemode changes>

@ -328,7 +328,7 @@ def CheckLongLines(input_api, output_api, maxlen, source_file_filter=None):
SPECIAL_JAVA_STARTS = ('package ', 'import ')
def no_long_lines(file_extension, line):
# Allow special java statements to be as long as neccessary.
# Allow special java statements to be as long as necessary.
if file_extension == 'java' and line.startswith(SPECIAL_JAVA_STARTS):
return True
@ -487,7 +487,7 @@ def GetUnitTestsInDirectory(
input_api, output_api, directory, whitelist=None, blacklist=None):
"""Lists all files in a directory and runs them. Doesn't recurse.
It's mainly a wrapper for RunUnitTests. USe whitelist and blacklist to filter
It's mainly a wrapper for RunUnitTests. Use whitelist and blacklist to filter
tests accordingly.
"""
unit_tests = []
@ -547,6 +547,7 @@ def GetUnitTests(input_api, output_api, unit_tests):
message=message_type))
return results
def GetPythonUnitTests(input_api, output_api, unit_tests):
"""Run the unit tests out of process, capture the output and use the result
code to determine success.

@ -489,11 +489,76 @@ class InputApi(object):
return [m for m in msgs if m]
class _DiffCache(object):
"""Caches diffs retrieved from a particular SCM."""
def GetDiff(self, path, local_root):
"""Get the diff for a particular path."""
raise NotImplementedError()
class _SvnDiffCache(_DiffCache):
"""DiffCache implementation for subversion."""
def __init__(self):
super(_SvnDiffCache, self).__init__()
self._diffs_by_file = {}
def GetDiff(self, path, local_root):
if path not in self._diffs_by_file:
self._diffs_by_file[path] = scm.SVN.GenerateDiff([path], local_root,
False, None)
return self._diffs_by_file[path]
class _GitDiffCache(_DiffCache):
"""DiffCache implementation for git; gets all file diffs at once."""
def __init__(self):
super(_GitDiffCache, self).__init__()
self._diffs_by_file = None
def GetDiff(self, path, local_root):
if not self._diffs_by_file:
# Compute a single diff for all files and parse the output; should
# with git this is much faster than computing one diff for each file.
diffs = {}
# Don't specify any filenames below, because there are command line length
# limits on some platforms and GenerateDiff would fail.
unified_diff = scm.GIT.GenerateDiff(local_root, files=[], full_move=True)
# This regex matches the path twice, separated by a space. Note that
# filename itself may contain spaces.
file_marker = re.compile('^diff --git (?P<filename>.*) (?P=filename)$')
current_diff = []
keep_line_endings = True
for x in unified_diff.splitlines(keep_line_endings):
match = file_marker.match(x)
if match:
# Marks the start of a new per-file section.
diffs[match.group('filename')] = current_diff = [x]
elif x.startswith('diff --git'):
raise PresubmitFailure('Unexpected diff line: %s' % x)
else:
current_diff.append(x)
self._diffs_by_file = dict(
(normpath(path), ''.join(diff)) for path, diff in diffs.items())
if path not in self._diffs_by_file:
raise PresubmitFailure(
'Unified diff did not contain entry for file %s' % path)
return self._diffs_by_file[path]
class AffectedFile(object):
"""Representation of a file in a change."""
DIFF_CACHE = _DiffCache
# Method could be a function
# pylint: disable=R0201
def __init__(self, path, action, repository_root):
def __init__(self, path, action, repository_root, diff_cache=None):
self._path = path
self._action = action
self._local_root = repository_root
@ -501,6 +566,10 @@ class AffectedFile(object):
self._properties = {}
self._cached_changed_contents = None
self._cached_new_contents = None
if diff_cache:
self._diff_cache = diff_cache
else:
self._diff_cache = self.DIFF_CACHE()
logging.debug('%s(%s)' % (self.__class__.__name__, self._path))
def ServerPath(self):
@ -508,7 +577,7 @@ class AffectedFile(object):
Returns the empty string if the file does not exist in SCM.
"""
return ""
return ''
def LocalPath(self):
"""Returns the path of this file on the local disk relative to client root.
@ -565,22 +634,6 @@ class AffectedFile(object):
pass # File not found? That's fine; maybe it was deleted.
return self._cached_new_contents[:]
def OldContents(self):
"""Returns an iterator over the lines in the old version of file.
The old version is the file in depot, i.e. the "left hand side".
"""
raise NotImplementedError() # Implement when needed
def OldFileTempPath(self):
"""Returns the path on local disk where the old contents resides.
The old version is the file in depot, i.e. the "left hand side".
This is a read-only cached copy of the old contents. *DO NOT* try to
modify this file.
"""
raise NotImplementedError() # Implement if/when needed.
def ChangedContents(self):
"""Returns a list of tuples (line number, line text) of all new lines.
@ -612,7 +665,7 @@ class AffectedFile(object):
return self.LocalPath()
def GenerateScmDiff(self):
raise NotImplementedError() # Implemented in derived classes.
return self._diff_cache.GetDiff(self.LocalPath(), self._local_root)
class SvnAffectedFile(AffectedFile):
@ -620,11 +673,12 @@ class SvnAffectedFile(AffectedFile):
# Method 'NNN' is abstract in class 'NNN' but is not overridden
# pylint: disable=W0223
DIFF_CACHE = _SvnDiffCache
def __init__(self, *args, **kwargs):
AffectedFile.__init__(self, *args, **kwargs)
self._server_path = None
self._is_text_file = None
self._diff = None
def ServerPath(self):
if self._server_path is None:
@ -664,23 +718,18 @@ class SvnAffectedFile(AffectedFile):
self._is_text_file = (not mime_type or mime_type.startswith('text/'))
return self._is_text_file
def GenerateScmDiff(self):
if self._diff is None:
self._diff = scm.SVN.GenerateDiff(
[self.LocalPath()], self._local_root, False, None)
return self._diff
class GitAffectedFile(AffectedFile):
"""Representation of a file in a change out of a git checkout."""
# Method 'NNN' is abstract in class 'NNN' but is not overridden
# pylint: disable=W0223
DIFF_CACHE = _GitDiffCache
def __init__(self, *args, **kwargs):
AffectedFile.__init__(self, *args, **kwargs)
self._server_path = None
self._is_text_file = None
self._diff = None
def ServerPath(self):
if self._server_path is None:
@ -714,12 +763,6 @@ class GitAffectedFile(AffectedFile):
self._is_text_file = os.path.isfile(self.AbsoluteLocalPath())
return self._is_text_file
def GenerateScmDiff(self):
if self._diff is None:
self._diff = scm.GIT.GenerateDiff(
self._local_root, files=[self.LocalPath(),])
return self._diff
class Change(object):
"""Describe a change.
@ -728,7 +771,7 @@ class Change(object):
tested.
Instance members:
tags: Dictionnary of KEY=VALUE pairs found in the change description.
tags: Dictionary of KEY=VALUE pairs found in the change description.
self.KEY: equivalent to tags['KEY']
"""
@ -769,9 +812,10 @@ class Change(object):
assert all(
(isinstance(f, (list, tuple)) and len(f) == 2) for f in files), files
diff_cache = self._AFFECTED_FILES.DIFF_CACHE()
self._affected_files = [
self._AFFECTED_FILES(info[1], info[0].strip(), self._local_root)
for info in files
self._AFFECTED_FILES(path, action.strip(), self._local_root, diff_cache)
for action, path in files
]
def Name(self):

@ -147,6 +147,7 @@ def GetPreferredTrySlaves(project):
self.mox.StubOutWithMock(presubmit.gclient_utils, 'FileRead')
self.mox.StubOutWithMock(presubmit.gclient_utils, 'FileWrite')
self.mox.StubOutWithMock(presubmit.scm.SVN, 'GenerateDiff')
self.mox.StubOutWithMock(presubmit.scm.GIT, 'GenerateDiff')
class PresubmitUnittest(PresubmitTestsBase):
@ -385,6 +386,193 @@ class PresubmitUnittest(PresubmitTestsBase):
self.assertEquals(rhs_lines[13][1], 49)
self.assertEquals(rhs_lines[13][2], 'this is line number 48.1')
def testGitChange(self):
description_lines = ('Hello there',
'this is a change',
'BUG=123',
' STORY =http://foo/ \t',
'and some more regular text \t')
unified_diff = [
'diff --git binary_a.png binary_a.png',
'new file mode 100644',
'index 0000000..6fbdd6d',
'Binary files /dev/null and binary_a.png differ',
'diff --git binary_d.png binary_d.png',
'deleted file mode 100644',
'index 6fbdd6d..0000000',
'Binary files binary_d.png and /dev/null differ',
'diff --git binary_md.png binary_md.png',
'index 6fbdd6..be3d5d8 100644',
'GIT binary patch',
'delta 109',
'zcmeyihjs5>)(Opwi4&WXB~yyi6N|G`(i5|?i<2_a@)OH5N{Um`D-<SM@g!_^W9;SR',
'zO9b*W5{pxTM0slZ=F42indK9U^MTyVQlJ2s%1BMmEKMv1Q^gtS&9nHn&*Ede;|~CU',
'CMJxLN',
'',
'delta 34',
'scmV+-0Nww+y#@BX1(1W0gkzIp3}CZh0gVZ>`wGVcgW(Rh;SK@ZPa9GXlK=n!',
'',
'diff --git binary_m.png binary_m.png',
'index 6fbdd6d..be3d5d8 100644',
'Binary files binary_m.png and binary_m.png differ',
'diff --git boo/blat.cc boo/blat.cc',
'new file mode 100644',
'index 0000000..37d18ad',
'--- boo/blat.cc',
'+++ boo/blat.cc',
'@@ -0,0 +1,5 @@',
'+This is some text',
'+which lacks a copyright warning',
'+but it is nonetheless interesting',
'+and worthy of your attention.',
'+Its freshness factor is through the roof.',
'diff --git floo/delburt.cc floo/delburt.cc',
'deleted file mode 100644',
'index e06377a..0000000',
'--- floo/delburt.cc',
'+++ /dev/null',
'@@ -1,14 +0,0 @@',
'-This text used to be here',
'-but someone, probably you,',
'-having consumed the text',
'- (absorbed its meaning)',
'-decided that it should be made to not exist',
'-that others would not read it.',
'- (What happened here?',
'-was the author incompetent?',
'-or is the world today so different from the world',
'- the author foresaw',
'-and past imaginination',
'- amounts to rubble, insignificant,',
'-something to be tripped over',
'-and frustrated by)',
'diff --git foo/TestExpectations foo/TestExpectations',
'index c6e12ab..d1c5f23 100644',
'--- foo/TestExpectations',
'+++ foo/TestExpectations',
'@@ -1,12 +1,24 @@',
'-Stranger, behold:',
'+Strange to behold:',
' This is a text',
' Its contents existed before.',
'',
'-It is written:',
'+Weasel words suggest:',
' its contents shall exist after',
' and its contents',
' with the progress of time',
' will evolve,',
'- snaillike,',
'+ erratically,',
' into still different texts',
'-from this.',
'\ No newline at end of file',
'+from this.',
'+',
'+For the most part,',
'+I really think unified diffs',
'+are elegant: the way you can type',
'+diff --git inside/a/text inside/a/text',
'+or something silly like',
'+@@ -278,6 +278,10 @@',
'+and have this not be interpreted',
'+as the start of a new file',
'+or anything messed up like that,',
'+because you parsed the header',
'+correctly.',
'\ No newline at end of file',
'']
files = [('A ', 'binary_a.png'),
('D ', 'binary_d.png'),
('M ', 'binary_m.png'),
('M ', 'binary_md.png'), # Binary w/ diff
('A ', 'boo/blat.cc'),
('D ', 'floo/delburt.cc'),
('M ', 'foo/TestExpectations')]
for op, path in files:
full_path = presubmit.os.path.join(self.fake_root_dir, *path.split('/'))
if op.startswith('D'):
os.path.exists(full_path).AndReturn(False)
else:
os.path.exists(full_path).AndReturn(False)
os.path.isfile(full_path).AndReturn(True)
presubmit.scm.GIT.GenerateDiff(self.fake_root_dir, files=[], full_move=True
).AndReturn('\n'.join(unified_diff))
self.mox.ReplayAll()
change = presubmit.GitChange(
'mychange',
'\n'.join(description_lines),
self.fake_root_dir,
files,
0,
0,
None)
self.failUnless(change.Name() == 'mychange')
self.failUnless(change.DescriptionText() ==
'Hello there\nthis is a change\nand some more regular text')
self.failUnless(change.FullDescriptionText() ==
'\n'.join(description_lines))
self.failUnless(change.BUG == '123')
self.failUnless(change.STORY == 'http://foo/')
self.failUnless(change.BLEH == None)
self.failUnless(len(change.AffectedFiles()) == 7)
self.failUnless(len(change.AffectedFiles(include_dirs=True)) == 7)
self.failUnless(len(change.AffectedFiles(include_deletes=False)) == 5)
self.failUnless(len(change.AffectedFiles(include_dirs=True,
include_deletes=False)) == 5)
# Note that on git, there's no distinction between binary files and text
# files; everything that's not a delete is a text file.
affected_text_files = change.AffectedTextFiles()
self.failUnless(len(affected_text_files) == 5)
local_paths = change.LocalPaths()
expected_paths = [os.path.normpath(f) for op, f in files]
self.assertEqual(local_paths, expected_paths)
try:
_ = change.ServerPaths()
self.fail("ServerPaths implemented.")
except NotImplementedError:
pass
actual_rhs_lines = []
for f, linenum, line in change.RightHandSideLines():
actual_rhs_lines.append((f.LocalPath(), linenum, line))
f_blat = os.path.normpath('boo/blat.cc')
f_test_expectations = os.path.normpath('foo/TestExpectations')
expected_rhs_lines = [
(f_blat, 1, 'This is some text'),
(f_blat, 2, 'which lacks a copyright warning'),
(f_blat, 3, 'but it is nonetheless interesting'),
(f_blat, 4, 'and worthy of your attention.'),
(f_blat, 5, 'Its freshness factor is through the roof.'),
(f_test_expectations, 1, 'Strange to behold:'),
(f_test_expectations, 5, 'Weasel words suggest:'),
(f_test_expectations, 10, ' erratically,'),
(f_test_expectations, 13, 'from this.'),
(f_test_expectations, 14, ''),
(f_test_expectations, 15, 'For the most part,'),
(f_test_expectations, 16, 'I really think unified diffs'),
(f_test_expectations, 17, 'are elegant: the way you can type'),
(f_test_expectations, 18, 'diff --git inside/a/text inside/a/text'),
(f_test_expectations, 19, 'or something silly like'),
(f_test_expectations, 20, '@@ -278,6 +278,10 @@'),
(f_test_expectations, 21, 'and have this not be interpreted'),
(f_test_expectations, 22, 'as the start of a new file'),
(f_test_expectations, 23, 'or anything messed up like that,'),
(f_test_expectations, 24, 'because you parsed the header'),
(f_test_expectations, 25, 'correctly.')]
self.assertEquals(expected_rhs_lines, actual_rhs_lines)
def testInvalidChange(self):
try:
presubmit.SvnChange(
@ -1251,7 +1439,8 @@ class InputApiUnittest(PresubmitTestsBase):
input_api.ReadFile(path, 'x')
def testReadFileAffectedFileDenied(self):
fileobj = presubmit.AffectedFile('boo', 'M', 'Unrelated')
fileobj = presubmit.AffectedFile('boo', 'M', 'Unrelated',
diff_cache=mox.IsA(presubmit._DiffCache))
self.mox.ReplayAll()
change = presubmit.Change(
@ -1262,7 +1451,8 @@ class InputApiUnittest(PresubmitTestsBase):
self.assertRaises(IOError, input_api.ReadFile, fileobj, 'x')
def testReadFileAffectedFileAccepted(self):
fileobj = presubmit.AffectedFile('AA/boo', 'M', self.fake_root_dir)
fileobj = presubmit.AffectedFile('AA/boo', 'M', self.fake_root_dir,
diff_cache=mox.IsA(presubmit._DiffCache))
presubmit.gclient_utils.FileRead(fileobj.AbsoluteLocalPath(), 'x'
).AndReturn(None)
self.mox.ReplayAll()
@ -1358,19 +1548,22 @@ class OutputApiUnittest(PresubmitTestsBase):
self.failIf(output.should_continue())
self.failUnless(output.getvalue().count('???'))
class AffectedFileUnittest(PresubmitTestsBase):
def testMembersChanged(self):
self.mox.ReplayAll()
members = [
'AbsoluteLocalPath', 'Action', 'ChangedContents', 'GenerateScmDiff',
'IsDirectory', 'IsTextFile', 'LocalPath', 'NewContents', 'OldContents',
'OldFileTempPath', 'Property', 'ServerPath',
'AbsoluteLocalPath', 'Action', 'ChangedContents', 'DIFF_CACHE',
'GenerateScmDiff', 'IsDirectory', 'IsTextFile', 'LocalPath',
'NewContents', 'Property', 'ServerPath',
]
# If this test fails, you should add the relevant test.
self.compareMembers(
presubmit.AffectedFile('a', 'b', self.fake_root_dir), members)
self.compareMembers(
presubmit.SvnAffectedFile('a', 'b', self.fake_root_dir), members)
self.compareMembers(
presubmit.GitAffectedFile('a', 'b', self.fake_root_dir), members)
def testAffectedFile(self):
path = presubmit.os.path.join('foo', 'blat.cc')

Loading…
Cancel
Save