From b9d7c8558207b5cbe5bc7fa41a0f46832cc35d6a Mon Sep 17 00:00:00 2001 From: Anne Redulla Date: Tue, 29 Aug 2023 23:54:27 +0000 Subject: [PATCH] [ssci] Added CheckChromiumDependencyMetadata in presubmit_canned_checks This CL adds a new function `CheckChromiumDependencyMetadata` in `presubmit_canned_checks.py`. It can be used to check that files satisfy the format defined by `README.chromium.template` (https://chromium.googlesource.com/chromium/src/+/main/third_party/README.chromium.template). The code for metadata validation can be found in `//metadata`. Note that all metadata validation issues will be returned as warnings only for now, while the quality of metadata is being uplifted. Bug: b:277147404 Change-Id: Iacf1b3a11219ab752549f6dc6e882c93c0fbe780 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4812578 Commit-Queue: Anne Redulla Reviewed-by: Rachael Newitt Reviewed-by: Gavin Mak Reviewed-by: Bruce Dawson --- metadata/discover.py | 15 ++++ metadata/parse.py | 14 ++-- metadata/tests/parse_test.py | 7 +- metadata/tests/validate_test.py | 105 +++++++++++++++++++----- metadata/validate.py | 111 +++++++++++++++++++------- presubmit_canned_checks.py | 41 +++++++++- tests/presubmit_canned_checks_test.py | 63 +++++++++++++++ 7 files changed, 300 insertions(+), 56 deletions(-) create mode 100644 metadata/discover.py diff --git a/metadata/discover.py b/metadata/discover.py new file mode 100644 index 000000000..f9915da86 --- /dev/null +++ b/metadata/discover.py @@ -0,0 +1,15 @@ +# Copyright 2023 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import os + +# The base names that are known to be Chromium metadata files. +_METADATA_FILES = { + "README.chromium", +} + + +def is_metadata_file(path: str) -> bool: + """Filter for metadata files.""" + return os.path.basename(path) in _METADATA_FILES diff --git a/metadata/parse.py b/metadata/parse.py index 2d673501f..7399b55e9 100644 --- a/metadata/parse.py +++ b/metadata/parse.py @@ -31,23 +31,21 @@ _PATTERN_FIELD_DECLARATION = re.compile( ) -def parse_file(filepath: str) -> List[dm.DependencyMetadata]: - """Reads and parses the metadata in the given file. +def parse_content(content: str) -> List[dm.DependencyMetadata]: + """Reads and parses the metadata from the given string. Args: - filepath: path to metadata file. + content: the string to parse metadata from. Returns: - each dependency's metadata described in the file. + all the metadata, which may be for zero or more dependencies, from the + given string. """ - with open(filepath, "r", encoding="utf-8") as f: - lines = f.readlines() - dependencies = [] current_metadata = dm.DependencyMetadata() current_field_name = None current_field_value = "" - for line in lines: + for line in content.splitlines(keepends=True): # Check if a new dependency is being described. if DEPENDENCY_DIVIDER.match(line): if current_field_name: diff --git a/metadata/tests/parse_test.py b/metadata/tests/parse_test.py index 23819e154..792321532 100644 --- a/metadata/tests/parse_test.py +++ b/metadata/tests/parse_test.py @@ -14,6 +14,7 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..")) # Add the repo's root directory for clearer imports. sys.path.insert(0, _ROOT_DIR) +import gclient_utils import metadata.parse @@ -22,7 +23,8 @@ class ParseTest(unittest.TestCase): """Check parsing works for a single dependency's metadata.""" filepath = os.path.join(_THIS_DIR, "data", "README.chromium.test.single-valid") - all_metadata = metadata.parse.parse_file(filepath) + content = gclient_utils.FileRead(filepath) + all_metadata = metadata.parse.parse_content(content) self.assertEqual(len(all_metadata), 1) self.assertListEqual( @@ -49,7 +51,8 @@ class ParseTest(unittest.TestCase): """Check parsing works for multiple dependencies' metadata.""" filepath = os.path.join(_THIS_DIR, "data", "README.chromium.test.multi-invalid") - all_metadata = metadata.parse.parse_file(filepath) + content = gclient_utils.FileRead(filepath) + all_metadata = metadata.parse.parse_content(content) # Dependency metadata with no entries at all are ignored. self.assertEqual(len(all_metadata), 3) diff --git a/metadata/tests/validate_test.py b/metadata/tests/validate_test.py index 33a6978a5..656e9ca69 100644 --- a/metadata/tests/validate_test.py +++ b/metadata/tests/validate_test.py @@ -14,25 +14,81 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..")) # Add the repo's root directory for clearer imports. sys.path.insert(0, _ROOT_DIR) +import gclient_utils import metadata.validate +# Common paths for tests. +_SOURCE_FILE_DIR = os.path.join(_THIS_DIR, "data") +_VALID_METADATA_FILEPATH = os.path.join(_THIS_DIR, "data", + "README.chromium.test.multi-valid") +_INVALID_METADATA_FILEPATH = os.path.join(_THIS_DIR, "data", + "README.chromium.test.multi-invalid") -class ValidateTest(unittest.TestCase): - def test_validate_file(self): + +class ValidateContentTest(unittest.TestCase): + """Tests for the validate_content function.""" + def test_empty(self): + # Validate empty content (should result in a validation error). + results = metadata.validate.validate_content( + content="", + source_file_dir=_SOURCE_FILE_DIR, + repo_root_dir=_THIS_DIR, + ) + self.assertEqual(len(results), 1) + self.assertTrue(results[0].is_fatal()) + + def test_valid(self): + # Validate valid file content (no errors or warnings). + results = metadata.validate.validate_content( + content=gclient_utils.FileRead(_VALID_METADATA_FILEPATH), + source_file_dir=_SOURCE_FILE_DIR, + repo_root_dir=_THIS_DIR, + ) + self.assertEqual(len(results), 0) + + def test_invalid(self): + # Validate invalid file content (both errors and warnings). + results = metadata.validate.validate_content( + content=gclient_utils.FileRead(_INVALID_METADATA_FILEPATH), + source_file_dir=_SOURCE_FILE_DIR, + repo_root_dir=_THIS_DIR, + ) + self.assertEqual(len(results), 11) + error_count = 0 + warning_count = 0 + for result in results: + if result.is_fatal(): + error_count += 1 + else: + warning_count += 1 + self.assertEqual(error_count, 9) + self.assertEqual(warning_count, 2) + + +class ValidateFileTest(unittest.TestCase): + """Tests for the validate_file function.""" + def test_missing(self): + # Validate a file that does not exist. + results = metadata.validate.validate_file( + filepath=os.path.join(_THIS_DIR, "data", "MISSING.chromium"), + repo_root_dir=_THIS_DIR, + ) + # There should be exactly 1 error returned. + self.assertEqual(len(results), 1) + self.assertTrue(results[0].is_fatal()) + + def test_valid(self): # Validate a valid file (no errors or warnings). - test_filepath = os.path.join(_THIS_DIR, "data", - "README.chromium.test.multi-valid") results = metadata.validate.validate_file( - filepath=test_filepath, + filepath=_VALID_METADATA_FILEPATH, repo_root_dir=_THIS_DIR, ) self.assertEqual(len(results), 0) + def test_invalid(self): # Validate an invalid file (both errors and warnings). - test_filepath = os.path.join(_THIS_DIR, "data", - "README.chromium.test.multi-invalid") results = metadata.validate.validate_file( - filepath=test_filepath, + filepath=_INVALID_METADATA_FILEPATH, repo_root_dir=_THIS_DIR, ) self.assertEqual(len(results), 11) @@ -46,26 +102,39 @@ class ValidateTest(unittest.TestCase): self.assertEqual(error_count, 9) self.assertEqual(warning_count, 2) - def test_check_file(self): - # Check a valid file (no errors or warnings). - test_filepath = os.path.join(_THIS_DIR, "data", - "README.chromium.test.multi-valid") + +class CheckFileTest(unittest.TestCase): + """Tests for the check_file function.""" + def test_missing(self): + # Check a file that does not exist. + errors, warnings = metadata.validate.check_file( + filepath=os.path.join(_THIS_DIR, "data", "MISSING.chromium"), + repo_root_dir=_THIS_DIR, + ) + # TODO(aredulla): update this test once validation errors can be returned + # as errors. Bug: b/285453019. + # self.assertEqual(len(errors), 1) + # self.assertEqual(len(warnings), 0) + self.assertEqual(len(errors), 0) + self.assertEqual(len(warnings), 1) + + def test_valid(self): + # Check file with valid content (no errors or warnings). errors, warnings = metadata.validate.check_file( - filepath=test_filepath, + filepath=_VALID_METADATA_FILEPATH, repo_root_dir=_THIS_DIR, ) self.assertEqual(len(errors), 0) self.assertEqual(len(warnings), 0) - # Check an invalid file (both errors and warnings). - test_filepath = os.path.join(_THIS_DIR, "data", - "README.chromium.test.multi-invalid") + def test_invalid(self): + # Check file with invalid content (both errors and warnings). errors, warnings = metadata.validate.check_file( - filepath=test_filepath, + filepath=_INVALID_METADATA_FILEPATH, repo_root_dir=_THIS_DIR, ) # TODO(aredulla): update this test once validation errors can be returned - # as errors. + # as errors. Bug: b/285453019. # self.assertEqual(len(errors), 9) # self.assertEqual(len(warnings), 2) self.assertEqual(len(errors), 0) diff --git a/metadata/validate.py b/metadata/validate.py index 460660fb5..960c5e489 100644 --- a/metadata/validate.py +++ b/metadata/validate.py @@ -5,7 +5,7 @@ import os import sys -from typing import List, Tuple +from typing import Callable, List, Tuple, Union _THIS_DIR = os.path.abspath(os.path.dirname(__file__)) # The repo's root directory. @@ -14,36 +14,92 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..")) # Add the repo's root directory for clearer imports. sys.path.insert(0, _ROOT_DIR) +import gclient_utils import metadata.parse import metadata.validation_result as vr -def validate_file(filepath: str, - repo_root_dir: str) -> List[vr.ValidationResult]: - """Validate the metadata file.""" - if not os.path.exists(filepath): - result = vr.ValidationError(f"'{filepath}' not found.") - result.set_tag(tag="reason", value="file not found") - return [result] +def validate_content(content: str, source_file_dir: str, + repo_root_dir: str) -> List[vr.ValidationResult]: + """Validate the content as a metadata file. - # Get the directory the metadata file is in. - parent_dir = os.path.dirname(filepath) + Args: + content: the entire content of a file to be validated as a metadata file. + source_file_dir: the directory of the metadata file that the license file + value is from; this is needed to construct file paths to + license files. + repo_root_dir: the repository's root directory; this is needed to construct + file paths to license files. + Returns: the validation results for the given content. + """ results = [] - dependencies = metadata.parse.parse_file(filepath) - for dependency in dependencies: - results.extend( - dependency.validate( - source_file_dir=parent_dir, - repo_root_dir=repo_root_dir, - )) + dependencies = metadata.parse.parse_content(content) + if not dependencies: + result = vr.ValidationError("No dependency metadata found.") + result.set_tag("reason", "no metadata") + return [result] + for dependency in dependencies: + dependency_results = dependency.validate(source_file_dir=source_file_dir, + repo_root_dir=repo_root_dir) + results.extend(dependency_results) return results -def check_file(filepath: str, - repo_root_dir: str) -> Tuple[List[str], List[str]]: - """Run metadata validation on the given file, and return all validation +def _construct_file_read_error(filepath: str, cause: str) -> vr.ValidationError: + """Helper function to create a validation error for a file reading issue.""" + result = vr.ValidationError(f"Cannot read '{filepath}' - {cause}.") + result.set_tag(tag="reason", value="read error") + return result + + +def validate_file( + filepath: str, + repo_root_dir: str, + reader: Callable[[str], Union[str, bytes]] = None, +) -> List[vr.ValidationResult]: + """Validate the item located at the given filepath is a valid dependency + metadata file. + + Args: + filepath: the path to validate, + e.g. "/chromium/src/third_party/libname/README.chromium" + repo_root_dir: the repository's root directory; this is needed to construct + file paths to license files. + reader (optional): callable function/method to read the content of the file. + + Returns: the validation results for the given filepath and its contents, if + it exists. + """ + if reader is None: + reader = gclient_utils.FileRead + + try: + content = reader(filepath) + except FileNotFoundError: + return [_construct_file_read_error(filepath, "file not found")] + except PermissionError: + return [_construct_file_read_error(filepath, "access denied")] + except Exception as e: + return [_construct_file_read_error(filepath, f"unexpected error: '{e}'")] + else: + if not isinstance(content, str): + return [_construct_file_read_error(filepath, "decoding failed")] + + # Get the directory the metadata file is in. + source_file_dir = os.path.dirname(filepath) + return validate_content(content=content, + source_file_dir=source_file_dir, + repo_root_dir=repo_root_dir) + + +def check_file( + filepath: str, + repo_root_dir: str, + reader: Callable[[str], Union[str, bytes]] = None, +) -> Tuple[List[str], List[str]]: + """Run metadata validation on the given filepath, and return all validation errors and validation warnings. Args: @@ -51,6 +107,7 @@ def check_file(filepath: str, e.g. "/chromium/src/third_party/libname/README.chromium" repo_root_dir: the repository's root directory; this is needed to construct file paths to license files. + reader (optional): callable function/method to read the content of the file. Returns: error_messages: the fatal validation issues present in the file; @@ -58,20 +115,20 @@ def check_file(filepath: str, warning_messages: the non-fatal validation issues present in the file; i.e. presubmit should still pass. """ + results = validate_file(filepath=filepath, + repo_root_dir=repo_root_dir, + reader=reader) + error_messages = [] warning_messages = [] - for result in validate_file(filepath, repo_root_dir): - # Construct the message. - if result.get_tag("reason") == "file not found": - message = result.get_message(postscript="", width=60) - else: - message = result.get_message(width=60) + for result in results: + message = result.get_message(width=60) # TODO(aredulla): Actually distinguish between validation errors and # warnings. The quality of metadata is currently being uplifted, but is not # yet guaranteed to pass validation. So for now, all validation results will # be returned as warnings so CLs are not blocked by invalid metadata in - # presubmits yet. + # presubmits yet. Bug: b/285453019. # if result.is_fatal(): # error_messages.append(message) # else: diff --git a/presubmit_canned_checks.py b/presubmit_canned_checks.py index 2ffcfb2ee..e0ea28304 100644 --- a/presubmit_canned_checks.py +++ b/presubmit_canned_checks.py @@ -10,6 +10,9 @@ import io as _io import os as _os import zlib +import metadata.discover +import metadata.validate + _HERE = _os.path.dirname(_os.path.abspath(__file__)) # These filters will be disabled if callers do not explicitly supply a @@ -375,6 +378,7 @@ def CheckChangeHasNoCrAndHasOnlyOneEol(input_api, output_api, items=eof_files)) return outputs + def CheckGenderNeutral(input_api, output_api, source_file_filter=None): """Checks that there are no gendered pronouns in any of the text files to be submitted. @@ -398,7 +402,6 @@ def CheckGenderNeutral(input_api, output_api, source_file_filter=None): return [] - def _ReportErrorFileAndLine(filename, line_num, dummy_line): """Default error formatter for _FindNewViolationsOfRule.""" return '%s:%s' % (filename, line_num) @@ -789,6 +792,41 @@ def CheckLicense(input_api, output_api, license_re_param=None, return results +def CheckChromiumDependencyMetadata(input_api, output_api, file_filter=None): + """Check files for Chromium third party dependency metadata have sufficient + information, and are correctly formatted. + + See the README.chromium.template at + https://chromium.googlesource.com/chromium/src/+/main/third_party/README.chromium.template + """ + # If the file filter is unspecified, filter to known Chromium metadata files. + if file_filter is None: + file_filter = lambda f: metadata.discover.is_metadata_file(f.LocalPath()) + + # The repo's root directory is required to check license files. + repo_root_dir = input_api.change.RepositoryRoot() + + outputs = [] + for f in input_api.AffectedFiles(file_filter=file_filter): + if f.Action() == 'D': + # No need to validate a deleted file. + continue + + errors, warnings = metadata.validate.check_file( + filepath=f.AbsoluteLocalPath(), + repo_root_dir=repo_root_dir, + reader=input_api.ReadFile, + ) + + for warning in warnings: + outputs.append(output_api.PresubmitPromptWarning(warning, [f])) + + for error in errors: + outputs.append(output_api.PresubmitError(error, [f])) + + return outputs + + ### Other checks def CheckDoNotSubmit(input_api, output_api): @@ -843,6 +881,7 @@ def CheckTreeIsOpen(input_api, output_api, long_text=str(e))] return [] + def GetUnitTestsInDirectory(input_api, output_api, directory, diff --git a/tests/presubmit_canned_checks_test.py b/tests/presubmit_canned_checks_test.py index cee18da78..592504973 100644 --- a/tests/presubmit_canned_checks_test.py +++ b/tests/presubmit_canned_checks_test.py @@ -328,6 +328,69 @@ class DescriptionChecksTest(unittest.TestCase): self.assertEqual(0, len(errors)) +class ChromiumDependencyMetadataCheckTest(unittest.TestCase): + def testDefaultFileFilter(self): + """Checks the default file filter limits the scope to Chromium dependency + metadata files. + """ + input_api = MockInputApi() + input_api.change.RepositoryRoot = lambda: '' + input_api.files = [ + MockFile(os.path.normpath('foo/README.md'), ['Shipped: no?']), + MockFile(os.path.normpath('foo/main.py'), ['Shipped: yes?']), + ] + results = presubmit_canned_checks.CheckChromiumDependencyMetadata( + input_api, MockOutputApi()) + self.assertEqual(len(results), 0) + + def testSkipDeletedFiles(self): + """Checks validation is skipped for deleted files.""" + input_api = MockInputApi() + input_api.change.RepositoryRoot = lambda: '' + input_api.files = [ + MockFile(os.path.normpath('foo/README.chromium'), ['No fields'], + action='D'), + ] + results = presubmit_canned_checks.CheckChromiumDependencyMetadata( + input_api, MockOutputApi()) + self.assertEqual(len(results), 0) + + def testFeedbackForNoMetadata(self): + """Checks presubmit results are returned for files without any metadata.""" + input_api = MockInputApi() + input_api.change.RepositoryRoot = lambda: '' + input_api.files = [ + MockFile(os.path.normpath('foo/README.chromium'), ['No fields']), + ] + results = presubmit_canned_checks.CheckChromiumDependencyMetadata( + input_api, MockOutputApi()) + self.assertEqual(len(results), 1) + self.assertTrue("No dependency metadata" in results[0].message) + + def testFeedbackForInvalidMetadata(self): + """Checks presubmit results are returned for files with invalid metadata.""" + input_api = MockInputApi() + input_api.change.RepositoryRoot = lambda: '' + test_file = MockFile(os.path.normpath('foo/README.chromium'), + ['Shipped: yes?']) + input_api.files = [test_file] + results = presubmit_canned_checks.CheckChromiumDependencyMetadata( + input_api, MockOutputApi()) + + # There should be 10 results due to + # - missing 5 mandatory fields: Name, URL, Version, License, and + # Security Critical + # - missing 4 required fields: Date, Revision, License File, and + # License Android Compatible + # - Shipped should be only 'yes' or 'no'. + self.assertEqual(len(results), 10) + + # Check each presubmit result is associated with the test file. + for result in results: + self.assertEqual(len(result.items), 1) + self.assertEqual(result.items[0], test_file) + + class CheckUpdateOwnersFileReferences(unittest.TestCase): def testShowsWarningIfDeleting(self): input_api = MockInputApi()