From 8ac3425344e1e0713daafde92fdf15b7ad5ee390 Mon Sep 17 00:00:00 2001 From: Gavin Mak Date: Mon, 24 Jun 2024 22:41:35 +0000 Subject: [PATCH] Create scm.DIFF.GetAllFiles For a ProvidedDiffChange, the AllFiles method naively returns all files with rglob("*"). The returned list includes files in nested submodules. This does not match the behavior of GitChange's AllFiles which uses git ls-files to find all files. Implement a new SCM that stops iterating recursively when it sees a submodule from .gitmodules in the repo root. Bug: b/323243527 Change-Id: I170d0f1bc4a838acea04779dee3df7fca0bce359 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5648616 Reviewed-by: Josip Sokcevic Commit-Queue: Gavin Mak --- presubmit_support.py | 22 +++++++++---------- scm.py | 36 +++++++++++++++++++++++++++++-- tests/presubmit_unittest.py | 11 +++++----- tests/scm_unittest.py | 43 +++++++++++++++++++++++++++++++++++++ 4 files changed, 94 insertions(+), 18 deletions(-) diff --git a/presubmit_support.py b/presubmit_support.py index 6a78176c4..dabcc5d30 100755 --- a/presubmit_support.py +++ b/presubmit_support.py @@ -23,7 +23,6 @@ import logging import mimetypes import multiprocessing import os # Somewhat exposed through the API. -import pathlib import random import re # Exposed through the API. import shutil @@ -1486,12 +1485,9 @@ class ProvidedDiffChange(Change): return self._AFFECTED_FILES.DIFF_CACHE(self._diff) def AllFiles(self, root=None): - """List all files under source control in the repo. - - There is no SCM, so return all files under the repo root. - """ + """List all files under source control in the repo.""" root = root or self.RepositoryRoot() - return [str(p) for p in pathlib.Path(root).rglob("*")] + return scm.DIFF.GetAllFiles(root) def ListRelevantPresubmitFiles(files, root): @@ -2060,11 +2056,11 @@ def _parse_change(parser, options): parser.error( ' cannot be specified when is set.') - # TODO(b/323243527): Consider adding a SCM for provided diff. change_scm = scm.determine_scm(options.root) - if change_scm != 'git' and not options.files and not options.diff_file: - parser.error( - 'unversioned directories must specify or .') + if change_scm == 'diff' and not (options.files or options.all_files + or options.diff_file): + parser.error('unversioned directories must specify ' + ', , or .') diff = None if options.files: @@ -2091,7 +2087,11 @@ def _parse_change(parser, options): # Get the filtered set of files from a directory scan. change_files = _parse_files(options.files, options.recursive) elif options.all_files: - change_files = [('M', f) for f in scm.GIT.GetAllFiles(options.root)] + if change_scm == 'git': + all_files = scm.GIT.GetAllFiles(options.root) + else: + all_files = scm.DIFF.GetAllFiles(options.root) + change_files = [('M', f) for f in all_files] elif options.diff_file: diff, change_files = _process_diff_file(options.diff_file) else: diff --git a/scm.py b/scm.py index bb9a68050..c9105a4ae 100644 --- a/scm.py +++ b/scm.py @@ -5,6 +5,7 @@ from collections import defaultdict import os +import pathlib import platform import re from typing import Mapping, List @@ -25,7 +26,7 @@ VERSIONED_SUBMODULE = 2 def determine_scm(root): """Similar to upload.py's version but much simpler. - Returns 'git' or None. + Returns 'git' or 'diff'. """ if os.path.isdir(os.path.join(root, '.git')): return 'git' @@ -37,7 +38,7 @@ def determine_scm(root): cwd=root) return 'git' except (OSError, subprocess2.CalledProcessError): - return None + return 'diff' class GIT(object): @@ -529,3 +530,34 @@ class GIT(object): if sha_only: return sha == rev.lower() return True + + +class DIFF(object): + + @staticmethod + def GetAllFiles(cwd): + """Return all files under the repo at cwd. + + If .gitmodules exists in cwd, use it to determine which folders are + submodules and don't recurse into them. Submodule paths are returned. + """ + # `git config --file` works outside of a git workspace. + submodules = GIT.ListSubmodules(cwd) + if not submodules: + return [ + str(p.relative_to(cwd)) for p in pathlib.Path(cwd).rglob("*") + if p.is_file() + ] + + full_path_submodules = {os.path.join(cwd, s) for s in submodules} + + def should_recurse(dirpath, dirname): + full_path = os.path.join(dirpath, dirname) + return full_path not in full_path_submodules + + paths = list(full_path_submodules) + for dirpath, dirnames, filenames in os.walk(cwd): + paths.extend([os.path.join(dirpath, f) for f in filenames]) + dirnames[:] = [d for d in dirnames if should_recurse(dirpath, d)] + + return [os.path.relpath(p, cwd) for p in paths] diff --git a/tests/presubmit_unittest.py b/tests/presubmit_unittest.py index 8b10c7812..7becaeb1c 100755 --- a/tests/presubmit_unittest.py +++ b/tests/presubmit_unittest.py @@ -950,7 +950,7 @@ def CheckChangeOnCommit(input_api, output_api): 'random_file.txt'])) def testMainUnversionedFail(self): - scm.determine_scm.return_value = None + scm.determine_scm.return_value = 'diff' with self.assertRaises(SystemExit) as e: presubmit.main(['--root', self.fake_root_dir]) @@ -960,7 +960,7 @@ def CheckChangeOnCommit(input_api, output_api): sys.stderr.getvalue(), 'usage: presubmit_unittest.py [options] \n' 'presubmit_unittest.py: error: unversioned directories must ' - 'specify or .\n') + 'specify , , or .\n') @mock.patch('presubmit_support.Change', mock.Mock()) def testParseChange_Files(self): @@ -979,9 +979,9 @@ def CheckChangeOnCommit(input_api, output_api): presubmit._parse_files.assert_called_once_with(options.files, options.recursive) - def testParseChange_NoFilesAndNoScm(self): + def testParseChange_NoFilesAndDiff(self): presubmit._parse_files.return_value = [] - scm.determine_scm.return_value = None + scm.determine_scm.return_value = 'diff' parser = mock.Mock() parser.error.side_effect = [SystemExit] options = mock.Mock(files=[], diff_file='', all_files=False) @@ -989,7 +989,8 @@ def CheckChangeOnCommit(input_api, output_api): with self.assertRaises(SystemExit): presubmit._parse_change(parser, options) parser.error.assert_called_once_with( - 'unversioned directories must specify or .') + 'unversioned directories must specify ' + ', , or .') def testParseChange_FilesAndAllFiles(self): parser = mock.Mock() diff --git a/tests/scm_unittest.py b/tests/scm_unittest.py index e298c5cdf..9dd0b5a3f 100755 --- a/tests/scm_unittest.py +++ b/tests/scm_unittest.py @@ -7,6 +7,7 @@ import logging import os import sys +import tempfile import unittest from unittest import mock @@ -352,6 +353,48 @@ class RealGitTest(fake_repos.FakeReposTestBase): scm.GIT.Capture(['checkout', 'main'], cwd=self.cwd) +class DiffTestCase(unittest.TestCase): + + def setUp(self): + self.root = tempfile.mkdtemp() + + os.makedirs(os.path.join(self.root, "foo", "dir")) + with open(os.path.join(self.root, "foo", "file.txt"), "w") as f: + f.write("foo\n") + with open(os.path.join(self.root, "foo", "dir", "file.txt"), "w") as f: + f.write("foo dir\n") + + os.makedirs(os.path.join(self.root, "baz_repo")) + with open(os.path.join(self.root, "baz_repo", "file.txt"), "w") as f: + f.write("baz\n") + + @mock.patch('scm.GIT.ListSubmodules') + def testGetAllFiles_ReturnsAllFilesIfNoSubmodules(self, mockListSubmodules): + mockListSubmodules.return_value = [] + files = scm.DIFF.GetAllFiles(self.root) + + if sys.platform.startswith('win'): + self.assertCountEqual( + files, + ["foo\\file.txt", "foo\\dir\\file.txt", "baz_repo\\file.txt"]) + else: + self.assertCountEqual( + files, + ["foo/file.txt", "foo/dir/file.txt", "baz_repo/file.txt"]) + + @mock.patch('scm.GIT.ListSubmodules') + def testGetAllFiles_IgnoresFilesInSubmodules(self, mockListSubmodules): + mockListSubmodules.return_value = ['baz_repo'] + files = scm.DIFF.GetAllFiles(self.root) + + if sys.platform.startswith('win'): + self.assertCountEqual( + files, ["foo\\file.txt", "foo\\dir\\file.txt", "baz_repo"]) + else: + self.assertCountEqual( + files, ["foo/file.txt", "foo/dir/file.txt", "baz_repo"]) + + if __name__ == '__main__': if '-v' in sys.argv: logging.basicConfig(level=logging.DEBUG)