Implement support for change info thru diff file

This will be used in cases where git workspace is unavailable.

Bug: b/323243527
Change-Id: I51056cfbac50f76a0de1b98cfec488d8f133e353
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5263021
Commit-Queue: Gavin Mak <gavinmak@google.com>
Reviewed-by: Joanna Wang <jojwang@chromium.org>
changes/21/5263021/22
Gavin Mak 1 year ago committed by LUCI CQ
parent 280bb93210
commit 62dc9f462e

@ -21,8 +21,10 @@ import inspect
import itertools
import json # Exposed through the API.
import logging
import mimetypes
import multiprocessing
import os # Somewhat exposed through the API.
import pathlib
import random
import re # Exposed through the API.
import signal
@ -935,36 +937,15 @@ class _GitDiffCache(_DiffCache):
# Compare against None to distinguish between None and an initialized
# but empty dictionary.
if self._diffs_by_file == None:
# Compute a single diff for all files and parse the output; should
# with git this is much faster than computing one diff for each
# file.
diffs = {}
# Don't specify any filenames below, because there are command line
# length limits on some platforms and GenerateDiff would fail.
unified_diff = scm.GIT.GenerateDiff(local_root,
files=[],
full_move=True,
branch=self._upstream)
# This regex matches the path twice, separated by a space. Note that
# filename itself may contain spaces.
file_marker = re.compile(
'^diff --git (?P<filename>.*) (?P=filename)$')
current_diff = []
keep_line_endings = True
for x in unified_diff.splitlines(keep_line_endings):
match = file_marker.match(x)
if match:
# Marks the start of a new per-file section.
diffs[match.group('filename')] = current_diff = [x]
elif x.startswith('diff --git'):
raise PresubmitFailure('Unexpected diff line: %s' % x)
else:
current_diff.append(x)
self._diffs_by_file = dict(
(normpath(path), ''.join(diff)) for path, diff in diffs.items())
# Compute a single diff for all files and parse the output; with git
# this is much faster than computing one diff for each file.
self._diffs_by_file = _parse_unified_diff(unified_diff)
if path not in self._diffs_by_file:
# SCM didn't have any diff on this file. It could be that the file
@ -979,6 +960,27 @@ class _GitDiffCache(_DiffCache):
return scm.GIT.GetOldContents(local_root, path, branch=self._upstream)
class _ProvidedDiffCache(_DiffCache):
"""Caches diffs from the provided diff file."""
def __init__(self, diff):
"""Stores all diffs and diffs per file."""
super(_ProvidedDiffCache, self).__init__()
self._diffs_by_file = None
self._diff = diff
def GetDiff(self, path, local_root):
"""Get the diff for a particular path."""
if self._diffs_by_file == None:
self._diffs_by_file = _parse_unified_diff(self._diff)
return self._diffs_by_file.get(path, '')
def GetOldContents(self, path, local_root):
"""Get the old version for a particular path."""
# TODO(gavinmak): Implement with self._diff.
return ''
class AffectedFile(object):
"""Representation of a file in a change."""
@ -994,6 +996,7 @@ class AffectedFile(object):
self._cached_changed_contents = None
self._cached_new_contents = None
self._diff_cache = diff_cache
self._is_testable_file = None
logging.debug('%s(%s)', self.__class__.__name__, self._path)
def LocalPath(self):
@ -1017,7 +1020,14 @@ class AffectedFile(object):
"""Returns True if the file is a text file and not a binary file.
Deleted files are not text file."""
raise NotImplementedError() # Implement when needed
if self._is_testable_file is None:
if self.Action() == 'D':
# A deleted file is not testable.
self._is_testable_file = False
else:
t, _ = mimetypes.guess_type(self.AbsoluteLocalPath())
self._is_testable_file = bool(t and t.startswith('text/'))
return self._is_testable_file
def IsTextFile(self):
"""An alias to IsTestableFile for backwards compatibility."""
@ -1106,8 +1116,6 @@ class GitAffectedFile(AffectedFile):
def __init__(self, *args, **kwargs):
AffectedFile.__init__(self, *args, **kwargs)
self._server_path = None
self._is_testable_file = None
def IsTestableFile(self):
if self._is_testable_file is None:
@ -1120,6 +1128,11 @@ class GitAffectedFile(AffectedFile):
return self._is_testable_file
class ProvidedDiffAffectedFile(AffectedFile):
"""Representation of a file in a change described by a diff."""
DIFF_CACHE = _ProvidedDiffCache
class Change(object):
"""Describe a change.
@ -1437,6 +1450,25 @@ class GitChange(Change):
]
class ProvidedDiffChange(Change):
_AFFECTED_FILES = ProvidedDiffAffectedFile
def __init__(self, *args, diff, **kwargs):
self._diff = diff
super(ProvidedDiffChange, self).__init__(*args)
def _diff_cache(self):
return self._AFFECTED_FILES.DIFF_CACHE(self._diff)
def AllFiles(self, root=None):
"""List all files under source control in the repo.
There is no SCM, so return all files under the repo root.
"""
root = root or self.RepositoryRoot()
return [str(p) for p in pathlib.Path(root).rglob("*")]
def ListRelevantPresubmitFiles(files, root):
"""Finds all presubmit files that apply to a given set of source files.
@ -1989,15 +2021,23 @@ def _parse_change(parser, options):
parser: The parser used to parse the arguments from command line.
options: The arguments parsed from command line.
Returns:
A GitChange if the change root is a git repository, or a Change otherwise.
A GitChange if the change root is a git repository, a ProvidedDiffChange
if a diff file is specified, or a Change otherwise.
"""
if options.files and options.all_files:
if options.all_files:
if options.files:
parser.error('<files> cannot be specified when --all-files is set.')
if options.diff_file:
parser.error(
'<diff_file> cannot be specified when --all-files is set.')
# TODO(b/323243527): Consider adding a SCM for provided diff.
change_scm = scm.determine_scm(options.root)
if change_scm != 'git' and not options.files:
parser.error('<files> is not optional for unversioned directories.')
if change_scm != 'git' and not options.files and not options.diff_file:
parser.error(
'unversioned directories must specify <files> or <diff_file>.')
diff = None
if options.files:
if options.source_controlled_only:
# Get the filtered set of files from SCM.
@ -2012,6 +2052,8 @@ def _parse_change(parser, options):
change_files = _parse_files(options.files, options.recursive)
elif options.all_files:
change_files = [('M', f) for f in scm.GIT.GetAllFiles(options.root)]
elif options.diff_file:
diff, change_files = _process_diff_file(options.diff_file)
else:
change_files = scm.GIT.CaptureStatus(options.root,
options.upstream or None,
@ -2024,6 +2066,8 @@ def _parse_change(parser, options):
]
if change_scm == 'git':
return GitChange(*change_args, upstream=options.upstream)
if diff:
return ProvidedDiffChange(*change_args, diff=diff)
return Change(*change_args)
@ -2062,6 +2106,64 @@ def _parse_gerrit_options(parser, options):
return gerrit_obj
def _parse_unified_diff(diff):
"""Parses a unified git diff and returns a list of (path, diff) tuples."""
diffs = {}
# This regex matches the path twice, separated by a space. Note that
# filename itself may contain spaces.
file_marker = re.compile(
'^diff --git (?:a/)?(?P<filename>.*) (?:b/)?(?P=filename)$')
current_diff = []
keep_line_endings = True
for x in diff.splitlines(keep_line_endings):
match = file_marker.match(x)
if match:
# Marks the start of a new per-file section.
diffs[match.group('filename')] = current_diff = [x]
elif x.startswith('diff --git'):
raise PresubmitFailure('Unexpected diff line: %s' % x)
else:
current_diff.append(x)
return dict((normpath(path), ''.join(diff)) for path, diff in diffs.items())
def _process_diff_file(diff_file):
"""Validates a git diff file and processes it into a list of change files.
Each change file is a tuple of (action, path) where action is one of:
* A: newly added file
* M: modified file
* D: deleted file
Args:
diff_file: Path to file containing unified git diff.
Returns:
Contents of diff_file and a list of change file tuples from the diff.
Raises:
PresubmitFailure: If the provided diff is empty or otherwise invalid.
"""
diff = gclient_utils.FileRead(diff_file)
if not diff:
raise PresubmitFailure('diff file is empty')
change_files = []
for file, file_diff in _parse_unified_diff(diff).items():
header_line = file_diff.splitlines()[1]
if not header_line:
raise PresubmitFailure('diff header is empty')
if header_line.startswith('new'):
action = 'A'
elif header_line.startswith('deleted'):
action = 'D'
else:
action = 'M'
change_files.append((action, file))
return diff, change_files
@contextlib.contextmanager
def setup_environ(kv: Mapping[str, str]):
"""Update environment while in context, and reset back to original on exit.
@ -2133,6 +2235,7 @@ def main(argv=None):
help='The change description.')
desc.add_argument('--description_file',
help='File to read change description from.')
parser.add_argument('--diff_file', help='File to read change diff from.')
parser.add_argument('--issue', type=int, default=0)
parser.add_argument('--patchset', type=int, default=0)
parser.add_argument('--root',

@ -4,13 +4,14 @@
# found in the LICENSE file.
import os.path
import subprocess
import sys
import tempfile
import unittest
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, ROOT_DIR)
import gclient_utils
import presubmit_support
@ -24,5 +25,159 @@ class PresubmitSupportTest(unittest.TestCase):
self.assertIsNone(os.environ.get('PRESUBMIT_FOO_ENV', None))
class TestParseDiff(unittest.TestCase):
"""A suite of tests related to diff parsing and processing."""
def _test_diff_to_change_files(self, diff, expected):
with gclient_utils.temporary_file() as tmp:
gclient_utils.FileWrite(tmp, diff, mode='w+')
content, change_files = presubmit_support._process_diff_file(tmp)
self.assertCountEqual(content, diff)
self.assertCountEqual(change_files, expected)
def test_diff_to_change_files_raises_on_empty_file(self):
with self.assertRaises(presubmit_support.PresubmitFailure):
self._test_diff_to_change_files(diff='', expected=[])
def test_diff_to_change_files_raises_on_empty_diff_header(self):
diff = """
diff --git a/foo b/foo
"""
with self.assertRaises(presubmit_support.PresubmitFailure):
self._test_diff_to_change_files(diff=diff, expected=[])
def test_diff_to_change_files_simple_add(self):
diff = """
diff --git a/foo b/foo
new file mode 100644
index 0000000..9daeafb
--- /dev/null
+++ b/foo
@@ -0,0 +1 @@
+add
"""
self._test_diff_to_change_files(diff=diff, expected=[('A', 'foo')])
def test_diff_to_change_files_simple_delete(self):
diff = """
diff --git a/foo b/foo
deleted file mode 100644
index f675c2a..0000000
--- a/foo
+++ /dev/null
@@ -1,1 +0,0 @@
-delete
"""
self._test_diff_to_change_files(diff=diff, expected=[('D', 'foo')])
def test_diff_to_change_files_simple_modification(self):
diff = """
diff --git a/foo b/foo
index d7ba659f..b7957f3 100644
--- a/foo
+++ b/foo
@@ -29,7 +29,7 @@
other
random
text
- foo1
+ foo2
other
random
text
"""
self._test_diff_to_change_files(diff=diff, expected=[('M', 'foo')])
def test_diff_to_change_files_multiple_changes(self):
diff = """
diff --git a/foo b/foo
index d7ba659f..b7957f3 100644
--- a/foo
+++ b/foo
@@ -29,7 +29,7 @@
other
random
text
- foo1
+ foo2
other
random
text
diff --git a/bar b/bar
new file mode 100644
index 0000000..9daeafb
--- /dev/null
+++ b/bar
@@ -0,0 +1 @@
+add
diff --git a/baz b/baz
deleted file mode 100644
index f675c2a..0000000
--- a/baz
+++ /dev/null
@@ -1,1 +0,0 @@
-delete
"""
self._test_diff_to_change_files(diff=diff,
expected=[('M', 'foo'), ('A', 'bar'),
('D', 'baz')])
def test_parse_unified_diff_with_valid_diff(self):
diff = """
diff --git a/foo b/foo
new file mode 100644
index 0000000..9daeafb
--- /dev/null
+++ b/foo
@@ -0,0 +1 @@
+add
"""
res = presubmit_support._parse_unified_diff(diff)
self.assertCountEqual(
res, {
'foo':
"""
new file mode 100644
index 0000000..9daeafb
--- /dev/null
+++ b/foo
@@ -0,0 +1 @@
+add
"""
})
def test_parse_unified_diff_with_valid_diff_noprefix(self):
diff = """
diff --git foo foo
new file mode 100644
index 0000000..9daeafb
--- /dev/null
+++ foo
@@ -0,0 +1 @@
+add
"""
res = presubmit_support._parse_unified_diff(diff)
self.assertCountEqual(
res, {
'foo':
"""
new file mode 100644
index 0000000..9daeafb
--- /dev/null
+++ foo
@@ -0,0 +1 @@
+add
"""
})
def test_parse_unified_diff_with_invalid_diff(self):
diff = """
diff --git a/ffoo b/foo
"""
with self.assertRaises(presubmit_support.PresubmitFailure):
presubmit_support._parse_unified_diff(diff)
if __name__ == "__main__":
unittest.main()

@ -959,8 +959,8 @@ def CheckChangeOnCommit(input_api, output_api):
self.assertEqual(
sys.stderr.getvalue(),
'usage: presubmit_unittest.py [options] <files...>\n'
'presubmit_unittest.py: error: <files> is not optional for unversioned '
'directories.\n')
'presubmit_unittest.py: error: unversioned directories must '
'specify <files> or <diff_file>.\n')
@mock.patch('presubmit_support.Change', mock.Mock())
def testParseChange_Files(self):
@ -982,12 +982,12 @@ def CheckChangeOnCommit(input_api, output_api):
scm.determine_scm.return_value = None
parser = mock.Mock()
parser.error.side_effect = [SystemExit]
options = mock.Mock(files=[], all_files=False)
options = mock.Mock(files=[], diff_file='', all_files=False)
with self.assertRaises(SystemExit):
presubmit._parse_change(parser, options)
parser.error.assert_called_once_with(
'<files> is not optional for unversioned directories.')
'unversioned directories must specify <files> or <diff_file>.')
def testParseChange_FilesAndAllFiles(self):
parser = mock.Mock()
@ -999,6 +999,16 @@ def CheckChangeOnCommit(input_api, output_api):
parser.error.assert_called_once_with(
'<files> cannot be specified when --all-files is set.')
def testParseChange_DiffAndAllFiles(self):
parser = mock.Mock()
parser.error.side_effect = [SystemExit]
options = mock.Mock(files=[], all_files=True, diff_file='foo.diff')
with self.assertRaises(SystemExit):
presubmit._parse_change(parser, options)
parser.error.assert_called_once_with(
'<diff_file> cannot be specified when --all-files is set.')
@mock.patch('presubmit_support.GitChange', mock.Mock())
def testParseChange_FilesAndGit(self):
scm.determine_scm.return_value = 'git'
@ -1023,7 +1033,7 @@ def CheckChangeOnCommit(input_api, output_api):
def testParseChange_NoFilesAndGit(self):
scm.determine_scm.return_value = 'git'
scm.GIT.CaptureStatus.return_value = [('A', 'added.txt')]
options = mock.Mock(all_files=False, files=[])
options = mock.Mock(all_files=False, files=[], diff_file='')
change = presubmit._parse_change(None, options)
self.assertEqual(presubmit.GitChange.return_value, change)
@ -1044,7 +1054,7 @@ def CheckChangeOnCommit(input_api, output_api):
def testParseChange_AllFilesAndGit(self):
scm.determine_scm.return_value = 'git'
scm.GIT.GetAllFiles.return_value = ['foo.txt', 'bar.txt']
options = mock.Mock(all_files=True, files=[])
options = mock.Mock(all_files=True, files=[], diff_file='')
change = presubmit._parse_change(None, options)
self.assertEqual(presubmit.GitChange.return_value, change)
@ -1059,6 +1069,49 @@ def CheckChangeOnCommit(input_api, output_api):
upstream=options.upstream)
scm.GIT.GetAllFiles.assert_called_once_with(options.root)
def testParseChange_EmptyDiffFile(self):
gclient_utils.FileRead.return_value = ''
options = mock.Mock(all_files=False, files=[], diff_file='foo.diff')
with self.assertRaises(presubmit.PresubmitFailure):
presubmit._parse_change(None, options)
@mock.patch('presubmit_support.ProvidedDiffChange', mock.Mock())
def testParseChange_ProvidedDiffFile(self):
diff = """
diff --git a/foo b/foo
new file mode 100644
index 0000000..9daeafb
--- /dev/null
+++ b/foo
@@ -0,0 +1 @@
+test
diff --git a/bar b/bar
deleted file mode 100644
index f675c2a..0000000
--- a/bar
+++ /dev/null
@@ -1,1 +0,0 @@
-bar
diff --git a/baz b/baz
index d7ba659f..b7957f3 100644
--- a/baz
+++ b/baz
@@ -1,1 +1,1 @@
-baz
+bat"""
gclient_utils.FileRead.return_value = diff
options = mock.Mock(all_files=False, files=[], diff_file='foo.diff')
change = presubmit._parse_change(None, options)
self.assertEqual(presubmit.ProvidedDiffChange.return_value, change)
presubmit.ProvidedDiffChange.assert_called_once_with(
options.name,
options.description,
options.root, [('A', 'foo'), ('D', 'bar'), ('M', 'baz')],
options.issue,
options.patchset,
options.author,
diff=diff)
def testParseGerritOptions_NoGerritUrl(self):
options = mock.Mock(gerrit_url=None,
gerrit_fetch=False,

Loading…
Cancel
Save