diff --git a/presubmit_support.py b/presubmit_support.py index a6d1adfff..a1568c7c1 100755 --- a/presubmit_support.py +++ b/presubmit_support.py @@ -21,8 +21,10 @@ import inspect import itertools import json # Exposed through the API. import logging +import mimetypes import multiprocessing import os # Somewhat exposed through the API. +import pathlib import random import re # Exposed through the API. import signal @@ -935,36 +937,15 @@ class _GitDiffCache(_DiffCache): # Compare against None to distinguish between None and an initialized # but empty dictionary. if self._diffs_by_file == None: - # Compute a single diff for all files and parse the output; should - # with git this is much faster than computing one diff for each - # file. - diffs = {} - # Don't specify any filenames below, because there are command line # length limits on some platforms and GenerateDiff would fail. unified_diff = scm.GIT.GenerateDiff(local_root, files=[], full_move=True, branch=self._upstream) - - # This regex matches the path twice, separated by a space. Note that - # filename itself may contain spaces. - file_marker = re.compile( - '^diff --git (?P.*) (?P=filename)$') - current_diff = [] - keep_line_endings = True - for x in unified_diff.splitlines(keep_line_endings): - match = file_marker.match(x) - if match: - # Marks the start of a new per-file section. - diffs[match.group('filename')] = current_diff = [x] - elif x.startswith('diff --git'): - raise PresubmitFailure('Unexpected diff line: %s' % x) - else: - current_diff.append(x) - - self._diffs_by_file = dict( - (normpath(path), ''.join(diff)) for path, diff in diffs.items()) + # Compute a single diff for all files and parse the output; with git + # this is much faster than computing one diff for each file. + self._diffs_by_file = _parse_unified_diff(unified_diff) if path not in self._diffs_by_file: # SCM didn't have any diff on this file. It could be that the file @@ -979,6 +960,27 @@ class _GitDiffCache(_DiffCache): return scm.GIT.GetOldContents(local_root, path, branch=self._upstream) +class _ProvidedDiffCache(_DiffCache): + """Caches diffs from the provided diff file.""" + + def __init__(self, diff): + """Stores all diffs and diffs per file.""" + super(_ProvidedDiffCache, self).__init__() + self._diffs_by_file = None + self._diff = diff + + def GetDiff(self, path, local_root): + """Get the diff for a particular path.""" + if self._diffs_by_file == None: + self._diffs_by_file = _parse_unified_diff(self._diff) + return self._diffs_by_file.get(path, '') + + def GetOldContents(self, path, local_root): + """Get the old version for a particular path.""" + # TODO(gavinmak): Implement with self._diff. + return '' + + class AffectedFile(object): """Representation of a file in a change.""" @@ -994,6 +996,7 @@ class AffectedFile(object): self._cached_changed_contents = None self._cached_new_contents = None self._diff_cache = diff_cache + self._is_testable_file = None logging.debug('%s(%s)', self.__class__.__name__, self._path) def LocalPath(self): @@ -1017,7 +1020,14 @@ class AffectedFile(object): """Returns True if the file is a text file and not a binary file. Deleted files are not text file.""" - raise NotImplementedError() # Implement when needed + if self._is_testable_file is None: + if self.Action() == 'D': + # A deleted file is not testable. + self._is_testable_file = False + else: + t, _ = mimetypes.guess_type(self.AbsoluteLocalPath()) + self._is_testable_file = bool(t and t.startswith('text/')) + return self._is_testable_file def IsTextFile(self): """An alias to IsTestableFile for backwards compatibility.""" @@ -1106,8 +1116,6 @@ class GitAffectedFile(AffectedFile): def __init__(self, *args, **kwargs): AffectedFile.__init__(self, *args, **kwargs) - self._server_path = None - self._is_testable_file = None def IsTestableFile(self): if self._is_testable_file is None: @@ -1120,6 +1128,11 @@ class GitAffectedFile(AffectedFile): return self._is_testable_file +class ProvidedDiffAffectedFile(AffectedFile): + """Representation of a file in a change described by a diff.""" + DIFF_CACHE = _ProvidedDiffCache + + class Change(object): """Describe a change. @@ -1437,6 +1450,25 @@ class GitChange(Change): ] +class ProvidedDiffChange(Change): + _AFFECTED_FILES = ProvidedDiffAffectedFile + + def __init__(self, *args, diff, **kwargs): + self._diff = diff + super(ProvidedDiffChange, self).__init__(*args) + + def _diff_cache(self): + return self._AFFECTED_FILES.DIFF_CACHE(self._diff) + + def AllFiles(self, root=None): + """List all files under source control in the repo. + + There is no SCM, so return all files under the repo root. + """ + root = root or self.RepositoryRoot() + return [str(p) for p in pathlib.Path(root).rglob("*")] + + def ListRelevantPresubmitFiles(files, root): """Finds all presubmit files that apply to a given set of source files. @@ -1989,15 +2021,23 @@ def _parse_change(parser, options): parser: The parser used to parse the arguments from command line. options: The arguments parsed from command line. Returns: - A GitChange if the change root is a git repository, or a Change otherwise. + A GitChange if the change root is a git repository, a ProvidedDiffChange + if a diff file is specified, or a Change otherwise. """ - if options.files and options.all_files: - parser.error(' cannot be specified when --all-files is set.') - + if options.all_files: + if options.files: + parser.error(' cannot be specified when --all-files is set.') + if options.diff_file: + parser.error( + ' cannot be specified when --all-files is set.') + + # TODO(b/323243527): Consider adding a SCM for provided diff. change_scm = scm.determine_scm(options.root) - if change_scm != 'git' and not options.files: - parser.error(' is not optional for unversioned directories.') + if change_scm != 'git' and not options.files and not options.diff_file: + parser.error( + 'unversioned directories must specify or .') + diff = None if options.files: if options.source_controlled_only: # Get the filtered set of files from SCM. @@ -2012,6 +2052,8 @@ def _parse_change(parser, options): change_files = _parse_files(options.files, options.recursive) elif options.all_files: change_files = [('M', f) for f in scm.GIT.GetAllFiles(options.root)] + elif options.diff_file: + diff, change_files = _process_diff_file(options.diff_file) else: change_files = scm.GIT.CaptureStatus(options.root, options.upstream or None, @@ -2024,6 +2066,8 @@ def _parse_change(parser, options): ] if change_scm == 'git': return GitChange(*change_args, upstream=options.upstream) + if diff: + return ProvidedDiffChange(*change_args, diff=diff) return Change(*change_args) @@ -2062,6 +2106,64 @@ def _parse_gerrit_options(parser, options): return gerrit_obj +def _parse_unified_diff(diff): + """Parses a unified git diff and returns a list of (path, diff) tuples.""" + diffs = {} + + # This regex matches the path twice, separated by a space. Note that + # filename itself may contain spaces. + file_marker = re.compile( + '^diff --git (?:a/)?(?P.*) (?:b/)?(?P=filename)$') + current_diff = [] + keep_line_endings = True + for x in diff.splitlines(keep_line_endings): + match = file_marker.match(x) + if match: + # Marks the start of a new per-file section. + diffs[match.group('filename')] = current_diff = [x] + elif x.startswith('diff --git'): + raise PresubmitFailure('Unexpected diff line: %s' % x) + else: + current_diff.append(x) + + return dict((normpath(path), ''.join(diff)) for path, diff in diffs.items()) + + +def _process_diff_file(diff_file): + """Validates a git diff file and processes it into a list of change files. + + Each change file is a tuple of (action, path) where action is one of: + * A: newly added file + * M: modified file + * D: deleted file + + Args: + diff_file: Path to file containing unified git diff. + + Returns: + Contents of diff_file and a list of change file tuples from the diff. + + Raises: + PresubmitFailure: If the provided diff is empty or otherwise invalid. + """ + diff = gclient_utils.FileRead(diff_file) + if not diff: + raise PresubmitFailure('diff file is empty') + + change_files = [] + for file, file_diff in _parse_unified_diff(diff).items(): + header_line = file_diff.splitlines()[1] + if not header_line: + raise PresubmitFailure('diff header is empty') + if header_line.startswith('new'): + action = 'A' + elif header_line.startswith('deleted'): + action = 'D' + else: + action = 'M' + change_files.append((action, file)) + return diff, change_files + @contextlib.contextmanager def setup_environ(kv: Mapping[str, str]): """Update environment while in context, and reset back to original on exit. @@ -2133,6 +2235,7 @@ def main(argv=None): help='The change description.') desc.add_argument('--description_file', help='File to read change description from.') + parser.add_argument('--diff_file', help='File to read change diff from.') parser.add_argument('--issue', type=int, default=0) parser.add_argument('--patchset', type=int, default=0) parser.add_argument('--root', diff --git a/tests/presubmit_support_test.py b/tests/presubmit_support_test.py index 59d0c18be..91147da8e 100755 --- a/tests/presubmit_support_test.py +++ b/tests/presubmit_support_test.py @@ -4,13 +4,14 @@ # found in the LICENSE file. import os.path -import subprocess import sys +import tempfile import unittest ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, ROOT_DIR) +import gclient_utils import presubmit_support @@ -24,5 +25,159 @@ class PresubmitSupportTest(unittest.TestCase): self.assertIsNone(os.environ.get('PRESUBMIT_FOO_ENV', None)) +class TestParseDiff(unittest.TestCase): + """A suite of tests related to diff parsing and processing.""" + + def _test_diff_to_change_files(self, diff, expected): + with gclient_utils.temporary_file() as tmp: + gclient_utils.FileWrite(tmp, diff, mode='w+') + content, change_files = presubmit_support._process_diff_file(tmp) + self.assertCountEqual(content, diff) + self.assertCountEqual(change_files, expected) + + def test_diff_to_change_files_raises_on_empty_file(self): + with self.assertRaises(presubmit_support.PresubmitFailure): + self._test_diff_to_change_files(diff='', expected=[]) + + def test_diff_to_change_files_raises_on_empty_diff_header(self): + diff = """ +diff --git a/foo b/foo + +""" + with self.assertRaises(presubmit_support.PresubmitFailure): + self._test_diff_to_change_files(diff=diff, expected=[]) + + def test_diff_to_change_files_simple_add(self): + diff = """ +diff --git a/foo b/foo +new file mode 100644 +index 0000000..9daeafb +--- /dev/null ++++ b/foo +@@ -0,0 +1 @@ ++add +""" + self._test_diff_to_change_files(diff=diff, expected=[('A', 'foo')]) + + def test_diff_to_change_files_simple_delete(self): + diff = """ +diff --git a/foo b/foo +deleted file mode 100644 +index f675c2a..0000000 +--- a/foo ++++ /dev/null +@@ -1,1 +0,0 @@ +-delete +""" + self._test_diff_to_change_files(diff=diff, expected=[('D', 'foo')]) + + def test_diff_to_change_files_simple_modification(self): + diff = """ +diff --git a/foo b/foo +index d7ba659f..b7957f3 100644 +--- a/foo ++++ b/foo +@@ -29,7 +29,7 @@ +other +random +text +- foo1 ++ foo2 +other +random +text +""" + self._test_diff_to_change_files(diff=diff, expected=[('M', 'foo')]) + + def test_diff_to_change_files_multiple_changes(self): + diff = """ +diff --git a/foo b/foo +index d7ba659f..b7957f3 100644 +--- a/foo ++++ b/foo +@@ -29,7 +29,7 @@ +other +random +text +- foo1 ++ foo2 +other +random +text +diff --git a/bar b/bar +new file mode 100644 +index 0000000..9daeafb +--- /dev/null ++++ b/bar +@@ -0,0 +1 @@ ++add +diff --git a/baz b/baz +deleted file mode 100644 +index f675c2a..0000000 +--- a/baz ++++ /dev/null +@@ -1,1 +0,0 @@ +-delete +""" + self._test_diff_to_change_files(diff=diff, + expected=[('M', 'foo'), ('A', 'bar'), + ('D', 'baz')]) + + def test_parse_unified_diff_with_valid_diff(self): + diff = """ +diff --git a/foo b/foo +new file mode 100644 +index 0000000..9daeafb +--- /dev/null ++++ b/foo +@@ -0,0 +1 @@ ++add +""" + res = presubmit_support._parse_unified_diff(diff) + self.assertCountEqual( + res, { + 'foo': + """ +new file mode 100644 +index 0000000..9daeafb +--- /dev/null ++++ b/foo +@@ -0,0 +1 @@ ++add +""" + }) + + def test_parse_unified_diff_with_valid_diff_noprefix(self): + diff = """ +diff --git foo foo +new file mode 100644 +index 0000000..9daeafb +--- /dev/null ++++ foo +@@ -0,0 +1 @@ ++add +""" + res = presubmit_support._parse_unified_diff(diff) + self.assertCountEqual( + res, { + 'foo': + """ +new file mode 100644 +index 0000000..9daeafb +--- /dev/null ++++ foo +@@ -0,0 +1 @@ ++add +""" + }) + + def test_parse_unified_diff_with_invalid_diff(self): + diff = """ +diff --git a/ffoo b/foo +""" + with self.assertRaises(presubmit_support.PresubmitFailure): + presubmit_support._parse_unified_diff(diff) + + if __name__ == "__main__": unittest.main() diff --git a/tests/presubmit_unittest.py b/tests/presubmit_unittest.py index de9026b71..1af40ddab 100755 --- a/tests/presubmit_unittest.py +++ b/tests/presubmit_unittest.py @@ -959,8 +959,8 @@ def CheckChangeOnCommit(input_api, output_api): self.assertEqual( sys.stderr.getvalue(), 'usage: presubmit_unittest.py [options] \n' - 'presubmit_unittest.py: error: is not optional for unversioned ' - 'directories.\n') + 'presubmit_unittest.py: error: unversioned directories must ' + 'specify or .\n') @mock.patch('presubmit_support.Change', mock.Mock()) def testParseChange_Files(self): @@ -982,12 +982,12 @@ def CheckChangeOnCommit(input_api, output_api): scm.determine_scm.return_value = None parser = mock.Mock() parser.error.side_effect = [SystemExit] - options = mock.Mock(files=[], all_files=False) + options = mock.Mock(files=[], diff_file='', all_files=False) with self.assertRaises(SystemExit): presubmit._parse_change(parser, options) parser.error.assert_called_once_with( - ' is not optional for unversioned directories.') + 'unversioned directories must specify or .') def testParseChange_FilesAndAllFiles(self): parser = mock.Mock() @@ -999,6 +999,16 @@ def CheckChangeOnCommit(input_api, output_api): parser.error.assert_called_once_with( ' cannot be specified when --all-files is set.') + def testParseChange_DiffAndAllFiles(self): + parser = mock.Mock() + parser.error.side_effect = [SystemExit] + options = mock.Mock(files=[], all_files=True, diff_file='foo.diff') + + with self.assertRaises(SystemExit): + presubmit._parse_change(parser, options) + parser.error.assert_called_once_with( + ' cannot be specified when --all-files is set.') + @mock.patch('presubmit_support.GitChange', mock.Mock()) def testParseChange_FilesAndGit(self): scm.determine_scm.return_value = 'git' @@ -1023,7 +1033,7 @@ def CheckChangeOnCommit(input_api, output_api): def testParseChange_NoFilesAndGit(self): scm.determine_scm.return_value = 'git' scm.GIT.CaptureStatus.return_value = [('A', 'added.txt')] - options = mock.Mock(all_files=False, files=[]) + options = mock.Mock(all_files=False, files=[], diff_file='') change = presubmit._parse_change(None, options) self.assertEqual(presubmit.GitChange.return_value, change) @@ -1044,7 +1054,7 @@ def CheckChangeOnCommit(input_api, output_api): def testParseChange_AllFilesAndGit(self): scm.determine_scm.return_value = 'git' scm.GIT.GetAllFiles.return_value = ['foo.txt', 'bar.txt'] - options = mock.Mock(all_files=True, files=[]) + options = mock.Mock(all_files=True, files=[], diff_file='') change = presubmit._parse_change(None, options) self.assertEqual(presubmit.GitChange.return_value, change) @@ -1059,6 +1069,49 @@ def CheckChangeOnCommit(input_api, output_api): upstream=options.upstream) scm.GIT.GetAllFiles.assert_called_once_with(options.root) + def testParseChange_EmptyDiffFile(self): + gclient_utils.FileRead.return_value = '' + options = mock.Mock(all_files=False, files=[], diff_file='foo.diff') + with self.assertRaises(presubmit.PresubmitFailure): + presubmit._parse_change(None, options) + + @mock.patch('presubmit_support.ProvidedDiffChange', mock.Mock()) + def testParseChange_ProvidedDiffFile(self): + diff = """ +diff --git a/foo b/foo +new file mode 100644 +index 0000000..9daeafb +--- /dev/null ++++ b/foo +@@ -0,0 +1 @@ ++test +diff --git a/bar b/bar +deleted file mode 100644 +index f675c2a..0000000 +--- a/bar ++++ /dev/null +@@ -1,1 +0,0 @@ +-bar +diff --git a/baz b/baz +index d7ba659f..b7957f3 100644 +--- a/baz ++++ b/baz +@@ -1,1 +1,1 @@ +-baz ++bat""" + gclient_utils.FileRead.return_value = diff + options = mock.Mock(all_files=False, files=[], diff_file='foo.diff') + change = presubmit._parse_change(None, options) + self.assertEqual(presubmit.ProvidedDiffChange.return_value, change) + presubmit.ProvidedDiffChange.assert_called_once_with( + options.name, + options.description, + options.root, [('A', 'foo'), ('D', 'bar'), ('M', 'baz')], + options.issue, + options.patchset, + options.author, + diff=diff) + def testParseGerritOptions_NoGerritUrl(self): options = mock.Mock(gerrit_url=None, gerrit_fetch=False,