Create scm.DIFF.GetAllFiles

For a ProvidedDiffChange, the AllFiles method naively returns all files
with rglob("*"). The returned list includes files in nested submodules.
This does not match the behavior of GitChange's AllFiles which uses
git ls-files to find all files.

Implement a new SCM that stops iterating recursively when it
sees a submodule from .gitmodules in the repo root.

Bug: b/323243527
Change-Id: I170d0f1bc4a838acea04779dee3df7fca0bce359
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5648616
Reviewed-by: Josip Sokcevic <sokcevic@chromium.org>
Commit-Queue: Gavin Mak <gavinmak@google.com>
changes/16/5648616/6
Gavin Mak 10 months ago committed by LUCI CQ
parent 6359e82d13
commit 8ac3425344

@ -23,7 +23,6 @@ import logging
import mimetypes
import multiprocessing
import os # Somewhat exposed through the API.
import pathlib
import random
import re # Exposed through the API.
import shutil
@ -1486,12 +1485,9 @@ class ProvidedDiffChange(Change):
return self._AFFECTED_FILES.DIFF_CACHE(self._diff)
def AllFiles(self, root=None):
"""List all files under source control in the repo.
There is no SCM, so return all files under the repo root.
"""
"""List all files under source control in the repo."""
root = root or self.RepositoryRoot()
return [str(p) for p in pathlib.Path(root).rglob("*")]
return scm.DIFF.GetAllFiles(root)
def ListRelevantPresubmitFiles(files, root):
@ -2060,11 +2056,11 @@ def _parse_change(parser, options):
parser.error(
'<diff_file> cannot be specified when <generate_diff> is set.')
# TODO(b/323243527): Consider adding a SCM for provided diff.
change_scm = scm.determine_scm(options.root)
if change_scm != 'git' and not options.files and not options.diff_file:
parser.error(
'unversioned directories must specify <files> or <diff_file>.')
if change_scm == 'diff' and not (options.files or options.all_files
or options.diff_file):
parser.error('unversioned directories must specify '
'<files>, <all_files>, or <diff_file>.')
diff = None
if options.files:
@ -2091,7 +2087,11 @@ def _parse_change(parser, options):
# Get the filtered set of files from a directory scan.
change_files = _parse_files(options.files, options.recursive)
elif options.all_files:
change_files = [('M', f) for f in scm.GIT.GetAllFiles(options.root)]
if change_scm == 'git':
all_files = scm.GIT.GetAllFiles(options.root)
else:
all_files = scm.DIFF.GetAllFiles(options.root)
change_files = [('M', f) for f in all_files]
elif options.diff_file:
diff, change_files = _process_diff_file(options.diff_file)
else:

@ -5,6 +5,7 @@
from collections import defaultdict
import os
import pathlib
import platform
import re
from typing import Mapping, List
@ -25,7 +26,7 @@ VERSIONED_SUBMODULE = 2
def determine_scm(root):
"""Similar to upload.py's version but much simpler.
Returns 'git' or None.
Returns 'git' or 'diff'.
"""
if os.path.isdir(os.path.join(root, '.git')):
return 'git'
@ -37,7 +38,7 @@ def determine_scm(root):
cwd=root)
return 'git'
except (OSError, subprocess2.CalledProcessError):
return None
return 'diff'
class GIT(object):
@ -529,3 +530,34 @@ class GIT(object):
if sha_only:
return sha == rev.lower()
return True
class DIFF(object):
@staticmethod
def GetAllFiles(cwd):
"""Return all files under the repo at cwd.
If .gitmodules exists in cwd, use it to determine which folders are
submodules and don't recurse into them. Submodule paths are returned.
"""
# `git config --file` works outside of a git workspace.
submodules = GIT.ListSubmodules(cwd)
if not submodules:
return [
str(p.relative_to(cwd)) for p in pathlib.Path(cwd).rglob("*")
if p.is_file()
]
full_path_submodules = {os.path.join(cwd, s) for s in submodules}
def should_recurse(dirpath, dirname):
full_path = os.path.join(dirpath, dirname)
return full_path not in full_path_submodules
paths = list(full_path_submodules)
for dirpath, dirnames, filenames in os.walk(cwd):
paths.extend([os.path.join(dirpath, f) for f in filenames])
dirnames[:] = [d for d in dirnames if should_recurse(dirpath, d)]
return [os.path.relpath(p, cwd) for p in paths]

@ -950,7 +950,7 @@ def CheckChangeOnCommit(input_api, output_api):
'random_file.txt']))
def testMainUnversionedFail(self):
scm.determine_scm.return_value = None
scm.determine_scm.return_value = 'diff'
with self.assertRaises(SystemExit) as e:
presubmit.main(['--root', self.fake_root_dir])
@ -960,7 +960,7 @@ def CheckChangeOnCommit(input_api, output_api):
sys.stderr.getvalue(),
'usage: presubmit_unittest.py [options] <files...>\n'
'presubmit_unittest.py: error: unversioned directories must '
'specify <files> or <diff_file>.\n')
'specify <files>, <all_files>, or <diff_file>.\n')
@mock.patch('presubmit_support.Change', mock.Mock())
def testParseChange_Files(self):
@ -979,9 +979,9 @@ def CheckChangeOnCommit(input_api, output_api):
presubmit._parse_files.assert_called_once_with(options.files,
options.recursive)
def testParseChange_NoFilesAndNoScm(self):
def testParseChange_NoFilesAndDiff(self):
presubmit._parse_files.return_value = []
scm.determine_scm.return_value = None
scm.determine_scm.return_value = 'diff'
parser = mock.Mock()
parser.error.side_effect = [SystemExit]
options = mock.Mock(files=[], diff_file='', all_files=False)
@ -989,7 +989,8 @@ def CheckChangeOnCommit(input_api, output_api):
with self.assertRaises(SystemExit):
presubmit._parse_change(parser, options)
parser.error.assert_called_once_with(
'unversioned directories must specify <files> or <diff_file>.')
'unversioned directories must specify '
'<files>, <all_files>, or <diff_file>.')
def testParseChange_FilesAndAllFiles(self):
parser = mock.Mock()

@ -7,6 +7,7 @@
import logging
import os
import sys
import tempfile
import unittest
from unittest import mock
@ -352,6 +353,48 @@ class RealGitTest(fake_repos.FakeReposTestBase):
scm.GIT.Capture(['checkout', 'main'], cwd=self.cwd)
class DiffTestCase(unittest.TestCase):
def setUp(self):
self.root = tempfile.mkdtemp()
os.makedirs(os.path.join(self.root, "foo", "dir"))
with open(os.path.join(self.root, "foo", "file.txt"), "w") as f:
f.write("foo\n")
with open(os.path.join(self.root, "foo", "dir", "file.txt"), "w") as f:
f.write("foo dir\n")
os.makedirs(os.path.join(self.root, "baz_repo"))
with open(os.path.join(self.root, "baz_repo", "file.txt"), "w") as f:
f.write("baz\n")
@mock.patch('scm.GIT.ListSubmodules')
def testGetAllFiles_ReturnsAllFilesIfNoSubmodules(self, mockListSubmodules):
mockListSubmodules.return_value = []
files = scm.DIFF.GetAllFiles(self.root)
if sys.platform.startswith('win'):
self.assertCountEqual(
files,
["foo\\file.txt", "foo\\dir\\file.txt", "baz_repo\\file.txt"])
else:
self.assertCountEqual(
files,
["foo/file.txt", "foo/dir/file.txt", "baz_repo/file.txt"])
@mock.patch('scm.GIT.ListSubmodules')
def testGetAllFiles_IgnoresFilesInSubmodules(self, mockListSubmodules):
mockListSubmodules.return_value = ['baz_repo']
files = scm.DIFF.GetAllFiles(self.root)
if sys.platform.startswith('win'):
self.assertCountEqual(
files, ["foo\\file.txt", "foo\\dir\\file.txt", "baz_repo"])
else:
self.assertCountEqual(
files, ["foo/file.txt", "foo/dir/file.txt", "baz_repo"])
if __name__ == '__main__':
if '-v' in sys.argv:
logging.basicConfig(level=logging.DEBUG)

Loading…
Cancel
Save