From 3aeb682373154b38d95c17da521d8a0235be8be1 Mon Sep 17 00:00:00 2001 From: Anne Redulla Date: Mon, 21 Aug 2023 03:33:17 +0000 Subject: [PATCH] [ssci] Added validate method for single dependencies Bug: b:277147404 Change-Id: I54c9c82d093cb11813e1c224da125b8d555f1b29 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4797050 Reviewed-by: Rachael Newitt Commit-Queue: Anne Redulla --- metadata/PRESUBMIT.py | 13 ++ metadata/dependency_metadata.py | 139 ++++++++++++++++++++- metadata/fields/custom/license_file.py | 19 ++- metadata/fields/custom/version.py | 2 +- metadata/fields/known.py | 4 +- metadata/fields/types.py | 9 ++ metadata/fields/util.py | 20 +++ metadata/parse.py | 2 +- metadata/tests/dependency_metadata_test.py | 132 +++++++++++++++++++ metadata/tests/fields_test.py | 16 +-- metadata/validation_result.py | 4 +- 11 files changed, 339 insertions(+), 21 deletions(-) create mode 100644 metadata/PRESUBMIT.py create mode 100644 metadata/tests/dependency_metadata_test.py diff --git a/metadata/PRESUBMIT.py b/metadata/PRESUBMIT.py new file mode 100644 index 000000000..7d15cd776 --- /dev/null +++ b/metadata/PRESUBMIT.py @@ -0,0 +1,13 @@ +#!/usr/bin/env vpython3 +# Copyright (c) 2023 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +PRESUBMIT_VERSION = '2.0.0' + + +def CheckPythonUnitTests(input_api, output_api): + tests = input_api.canned_checks.GetUnitTestsInDirectory( + input_api, output_api, "tests", files_to_check=[r'.+_test\.py$']) + + return input_api.RunTests(tests) diff --git a/metadata/dependency_metadata.py b/metadata/dependency_metadata.py index 64904ab4f..0f2a251d1 100644 --- a/metadata/dependency_metadata.py +++ b/metadata/dependency_metadata.py @@ -3,19 +3,152 @@ # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. -from typing import List, Tuple +from collections import defaultdict +import os +import sys +from typing import Dict, List, Set, Tuple + +_THIS_DIR = os.path.abspath(os.path.dirname(__file__)) +# The repo's root directory. +_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..")) + +# Add the repo's root directory for clearer imports. +sys.path.insert(0, _ROOT_DIR) + +import metadata.fields.types as field_types +import metadata.fields.custom.license +import metadata.fields.custom.version +import metadata.fields.known as known_fields +import metadata.fields.util as util +import metadata.validation_result as vr class DependencyMetadata: """The metadata for a single dependency.""" + + # Fields that are always required. + _MANDATORY_FIELDS = { + known_fields.NAME, + known_fields.URL, + known_fields.VERSION, + known_fields.LICENSE, + known_fields.SECURITY_CRITICAL, + known_fields.SHIPPED, + } + def __init__(self): - self._entries = [] + # The record of all entries added, including repeated fields. + self._entries: List[Tuple[str, str]] = [] + + # The current value of each field. + self._metadata: Dict[field_types.MetadataField, str] = {} + + # The record of how many times a field entry was added. + self._occurrences: Dict[field_types.MetadataField, int] = defaultdict(int) def add_entry(self, field_name: str, field_value: str): - self._entries.append((field_name, field_value.strip())) + value = field_value.strip() + self._entries.append((field_name, value)) + + field = known_fields.get_field(field_name) + if field: + self._metadata[field] = value + self._occurrences[field] += 1 def has_entries(self) -> bool: return len(self._entries) > 0 def get_entries(self) -> List[Tuple[str, str]]: return list(self._entries) + + def _assess_required_fields(self) -> Set[field_types.MetadataField]: + """Returns the set of required fields, based on the current metadata.""" + required = set(self._MANDATORY_FIELDS) + + # The date and revision are required if the version has not been specified. + version_value = self._metadata.get(known_fields.VERSION) + if (version_value is None + or metadata.fields.custom.version.is_unknown(version_value)): + required.add(known_fields.DATE) + required.add(known_fields.REVISION) + + # Assume the dependency is shipped if not specified. + shipped_value = self._metadata.get(known_fields.SHIPPED) + is_shipped = (shipped_value is None + or util.infer_as_boolean(shipped_value, default=True)) + + if is_shipped: + # A license file is required if the dependency is shipped. + required.add(known_fields.LICENSE_FILE) + + # License compatibility with Android must be set if the package is shipped + # and the license is not in the allowlist. + has_allowlisted = False + license_value = self._metadata.get(known_fields.LICENSE) + if license_value: + licenses = metadata.fields.custom.license.process_license_value( + license_value, + atomic_delimiter=known_fields.LICENSE.VALUE_DELIMITER) + for _, allowed in licenses: + if allowed: + has_allowlisted = True + break + + if not has_allowlisted: + required.add(known_fields.LICENSE_ANDROID_COMPATIBLE) + + return required + + def validate(self, source_file_dir: str, + repo_root_dir: str) -> List[vr.ValidationResult]: + """Validates all the metadata. + + Args: + source_file_dir: the directory of the file that the metadata is from. + repo_root_dir: the repository's root directory. + + Returns: the metadata's validation results. + """ + results = [] + + # Check for duplicate fields. + repeated_field_info = [ + f"{field.get_name()} ({count})" + for field, count in self._occurrences.items() if count > 1 + ] + if repeated_field_info: + repeated = ", ".join(repeated_field_info) + error = vr.ValidationError( + f"Multiple entries for the same field: {repeated}") + error.set_tag(tag="reason", value="repeated field") + results.append(error) + + # Check required fields are present. + required_fields = self._assess_required_fields() + for field in required_fields: + if field not in self._metadata: + field_name = field.get_name() + error = vr.ValidationError(f"Required field '{field_name}' is missing") + error.set_tag(tag="reason", value="missing required field") + results.append(error) + + # Validate values for all present fields. + for field, value in self._metadata.items(): + field_result = field.validate(value) + if field_result: + field_result.set_tag(tag="field", value=field.get_name()) + results.append(field_result) + + # Check existence of the license file(s) on disk. + license_file_value = self._metadata.get(known_fields.LICENSE_FILE) + if license_file_value is not None: + result = known_fields.LICENSE_FILE.validate_on_disk( + value=license_file_value, + source_file_dir=source_file_dir, + repo_root_dir=repo_root_dir, + ) + if result: + result.set_tag(tag="field", value=known_fields.LICENSE_FILE.get_name()) + results.append(result) + + return results diff --git a/metadata/fields/custom/license_file.py b/metadata/fields/custom/license_file.py index fd5f2023e..aac843b5a 100644 --- a/metadata/fields/custom/license_file.py +++ b/metadata/fields/custom/license_file.py @@ -64,12 +64,23 @@ class LicenseFileField(field_types.MetadataField): def validate_on_disk( self, value: str, - readme_dir: str, - chromium_src_dir: str, + source_file_dir: str, + repo_root_dir: str, ) -> Union[vr.ValidationResult, None]: """Checks the given value consists of file paths which exist on disk. Note: this field supports multiple values. + + Args: + value: the value to validate. + source_file_dir: the directory of the metadata file that the license file + value is from; this is needed to construct file paths to + license files. + repo_root_dir: the repository's root directory; this is needed to + construct file paths to license files. + + Returns: a validation result based on the license file value, and whether + the license file(s) exist on disk, otherwise None. """ if value == _NOT_SHIPPED: return vr.ValidationWarning( @@ -80,9 +91,9 @@ class LicenseFileField(field_types.MetadataField): license_filename = license_filename.strip() if license_filename.startswith("/"): license_filepath = os.path.join( - chromium_src_dir, os.path.normpath(license_filename.lstrip("/"))) + repo_root_dir, os.path.normpath(license_filename.lstrip("/"))) else: - license_filepath = os.path.join(readme_dir, + license_filepath = os.path.join(source_file_dir, os.path.normpath(license_filename)) if not os.path.exists(license_filepath): diff --git a/metadata/fields/custom/version.py b/metadata/fields/custom/version.py index 2b51901a7..af73e6364 100644 --- a/metadata/fields/custom/version.py +++ b/metadata/fields/custom/version.py @@ -22,7 +22,7 @@ import metadata.validation_result as vr _PATTERN_NOT_APPLICABLE = re.compile(r"^N ?\/ ?A$", re.IGNORECASE) -def is_version_unknown(value: str) -> bool: +def is_unknown(value: str) -> bool: """Returns whether the value denotes the version being unknown.""" return (value == "0" or util.matches(_PATTERN_NOT_APPLICABLE, value) or util.is_unknown(value)) diff --git a/metadata/fields/known.py b/metadata/fields/known.py index 81f0e3d71..b4750b895 100644 --- a/metadata/fields/known.py +++ b/metadata/fields/known.py @@ -61,8 +61,8 @@ ALL_FIELDS = ( LOCAL_MODIFICATIONS, ) ALL_FIELD_NAMES = {field.get_name() for field in ALL_FIELDS} -FIELD_MAPPING = {field.get_name().lower(): field for field in ALL_FIELDS} +_FIELD_MAPPING = {field.get_name().lower(): field for field in ALL_FIELDS} def get_field(label: str) -> Union[field_types.MetadataField, None]: - return FIELD_MAPPING.get(label.lower()) + return _FIELD_MAPPING.get(label.lower()) diff --git a/metadata/fields/types.py b/metadata/fields/types.py index 9a301163b..21f8ebe1e 100644 --- a/metadata/fields/types.py +++ b/metadata/fields/types.py @@ -37,6 +37,15 @@ class MetadataField: self._name = name self._one_liner = one_liner + def __eq__(self, other): + if not isinstance(other, MetadataField): + return False + + return self._name.lower() == other._name.lower() + + def __hash__(self): + return hash(self._name.lower()) + def get_name(self): return self._name diff --git a/metadata/fields/util.py b/metadata/fields/util.py index 1a05661d9..2a4b67a0d 100644 --- a/metadata/fields/util.py +++ b/metadata/fields/util.py @@ -17,6 +17,12 @@ _PATTERN_UNKNOWN = re.compile(r"^unknown$", re.IGNORECASE) # empty string, or all characters are only whitespace. _PATTERN_ONLY_WHITESPACE = re.compile(r"^\s*$") +# Pattern used to check if the string starts with "yes", case-insensitive. +_PATTERN_STARTS_WITH_YES = re.compile(r"^yes", re.IGNORECASE) + +# Pattern used to check if the string starts with "no", case-insensitive. +_PATTERN_STARTS_WITH_NO = re.compile(r"^no", re.IGNORECASE) + def matches(pattern: re.Pattern, value: str) -> bool: """Returns whether the value matches the pattern.""" @@ -36,3 +42,17 @@ def is_unknown(value: str) -> bool: def quoted(values: List[str]) -> str: """Returns a string of the given values, each being individually quoted.""" return ", ".join([f"'{entry}'" for entry in values]) + + +def infer_as_boolean(value: str, default: bool = True) -> bool: + """Attempts to infer the value as a boolean, where: + - "yes"-ish values return True; + - "no"-ish values return False; and + - default is returned otherwise. + """ + if matches(_PATTERN_STARTS_WITH_YES, value): + return True + elif matches(_PATTERN_STARTS_WITH_NO, value): + return False + else: + return default diff --git a/metadata/parse.py b/metadata/parse.py index b507036f4..2d673501f 100644 --- a/metadata/parse.py +++ b/metadata/parse.py @@ -40,7 +40,7 @@ def parse_file(filepath: str) -> List[dm.DependencyMetadata]: Returns: each dependency's metadata described in the file. """ - with open(filepath, "r") as f: + with open(filepath, "r", encoding="utf-8") as f: lines = f.readlines() dependencies = [] diff --git a/metadata/tests/dependency_metadata_test.py b/metadata/tests/dependency_metadata_test.py new file mode 100644 index 000000000..7ef77bae8 --- /dev/null +++ b/metadata/tests/dependency_metadata_test.py @@ -0,0 +1,132 @@ +#!/usr/bin/env vpython3 +# Copyright (c) 2023 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import os +import sys +import unittest + +_THIS_DIR = os.path.abspath(os.path.dirname(__file__)) +# The repo's root directory. +_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..")) + +# Add the repo's root directory for clearer imports. +sys.path.insert(0, _ROOT_DIR) + +import metadata.dependency_metadata as dm +import metadata.fields.known as known_fields +import metadata.fields.types as field_types +import metadata.validation_result as vr + + +class DependencyValidationTest(unittest.TestCase): + def test_repeated_field(self): + """Check that a validation error is returned for a repeated field.""" + dependency = dm.DependencyMetadata() + dependency.add_entry(known_fields.NAME.get_name(), "Test repeated field") + dependency.add_entry(known_fields.URL.get_name(), "https://www.example.com") + dependency.add_entry(known_fields.VERSION.get_name(), "1.0.0") + dependency.add_entry(known_fields.LICENSE.get_name(), "Public Domain") + dependency.add_entry(known_fields.LICENSE_FILE.get_name(), "LICENSE") + dependency.add_entry(known_fields.SECURITY_CRITICAL.get_name(), "no") + dependency.add_entry(known_fields.SHIPPED.get_name(), "no") + dependency.add_entry(known_fields.NAME.get_name(), "again") + + results = dependency.validate( + source_file_dir=os.path.join(_THIS_DIR, "data"), + repo_root_dir=_THIS_DIR, + ) + self.assertEqual(len(results), 1) + self.assertTrue(isinstance(results[0], vr.ValidationError)) + self.assertDictEqual(results[0].get_all_tags(), + {"reason": "repeated field"}) + + def test_required_field(self): + """Check that a validation error is returned for a missing field.""" + dependency = dm.DependencyMetadata() + dependency.add_entry(known_fields.SHIPPED.get_name(), "no") + dependency.add_entry(known_fields.SECURITY_CRITICAL.get_name(), "no") + dependency.add_entry(known_fields.LICENSE_FILE.get_name(), "LICENSE") + dependency.add_entry(known_fields.LICENSE.get_name(), "Public Domain") + dependency.add_entry(known_fields.VERSION.get_name(), "1.0.0") + dependency.add_entry(known_fields.NAME.get_name(), "Test missing field") + # Leave URL field unspecified. + + results = dependency.validate( + source_file_dir=os.path.join(_THIS_DIR, "data"), + repo_root_dir=_THIS_DIR, + ) + self.assertEqual(len(results), 1) + self.assertTrue(isinstance(results[0], vr.ValidationError)) + self.assertDictEqual(results[0].get_all_tags(), + {"reason": "missing required field"}) + + def test_invalid_field(self): + """Check field validation issues are returned.""" + dependency = dm.DependencyMetadata() + dependency.add_entry(known_fields.URL.get_name(), "https://www.example.com") + dependency.add_entry(known_fields.NAME.get_name(), "Test invalid field") + dependency.add_entry(known_fields.VERSION.get_name(), "1.0.0") + dependency.add_entry(known_fields.LICENSE_FILE.get_name(), "LICENSE") + dependency.add_entry(known_fields.LICENSE.get_name(), "Public domain") + dependency.add_entry(known_fields.SHIPPED.get_name(), "no") + dependency.add_entry(known_fields.SECURITY_CRITICAL.get_name(), "test") + + results = dependency.validate( + source_file_dir=os.path.join(_THIS_DIR, "data"), + repo_root_dir=_THIS_DIR, + ) + self.assertEqual(len(results), 1) + self.assertTrue(isinstance(results[0], vr.ValidationError)) + self.assertDictEqual(results[0].get_all_tags(), + {"field": known_fields.SECURITY_CRITICAL.get_name()}) + + def test_invalid_license_file_path(self): + """Check license file path validation issues are returned.""" + dependency = dm.DependencyMetadata() + dependency.add_entry(known_fields.NAME.get_name(), "Test license file path") + dependency.add_entry(known_fields.URL.get_name(), "https://www.example.com") + dependency.add_entry(known_fields.VERSION.get_name(), "1.0.0") + dependency.add_entry(known_fields.LICENSE.get_name(), "Public domain") + dependency.add_entry(known_fields.LICENSE_FILE.get_name(), + "MISSING-LICENSE") + dependency.add_entry(known_fields.SECURITY_CRITICAL.get_name(), "no") + dependency.add_entry(known_fields.SHIPPED.get_name(), "no") + + results = dependency.validate( + source_file_dir=os.path.join(_THIS_DIR, "data"), + repo_root_dir=_THIS_DIR, + ) + self.assertEqual(len(results), 1) + self.assertTrue(isinstance(results[0], vr.ValidationError)) + self.assertDictEqual(results[0].get_all_tags(), + {"field": known_fields.LICENSE_FILE.get_name()}) + + def test_multiple_validation_issues(self): + """Check all validation issues are returned.""" + dependency = dm.DependencyMetadata() + dependency.add_entry(known_fields.NAME.get_name(), "Test multiple errors") + # Leave URL field unspecified. + dependency.add_entry(known_fields.VERSION.get_name(), "1.0.0") + dependency.add_entry(known_fields.LICENSE.get_name(), "Public domain") + dependency.add_entry(known_fields.LICENSE_FILE.get_name(), + "MISSING-LICENSE") + dependency.add_entry(known_fields.SECURITY_CRITICAL.get_name(), "test") + dependency.add_entry(known_fields.SHIPPED.get_name(), "no") + dependency.add_entry(known_fields.NAME.get_name(), "again") + + # Check 4 validation results are returned, for: + # - missing field; + # - invalid license file path; + # - invalid yes/no field value; and + # - repeated field entry. + results = dependency.validate( + source_file_dir=os.path.join(_THIS_DIR, "data"), + repo_root_dir=_THIS_DIR, + ) + self.assertEqual(len(results), 4) + + +if __name__ == "__main__": + unittest.main() diff --git a/metadata/tests/fields_test.py b/metadata/tests/fields_test.py index a34dcf824..9980b36a1 100644 --- a/metadata/tests/fields_test.py +++ b/metadata/tests/fields_test.py @@ -113,32 +113,32 @@ class FieldValidationTest(unittest.TestCase): # Check relative path from README directory, and multiple license files. result = known_fields.LICENSE_FILE.validate_on_disk( value="LICENSE, src/LICENSE.txt", - readme_dir=os.path.join(_THIS_DIR, "data"), - chromium_src_dir=_THIS_DIR, + source_file_dir=os.path.join(_THIS_DIR, "data"), + repo_root_dir=_THIS_DIR, ) self.assertIsNone(result) # Check relative path from Chromium src directory. result = known_fields.LICENSE_FILE.validate_on_disk( value="//data/LICENSE", - readme_dir=os.path.join(_THIS_DIR, "data"), - chromium_src_dir=_THIS_DIR, + source_file_dir=os.path.join(_THIS_DIR, "data"), + repo_root_dir=_THIS_DIR, ) self.assertIsNone(result) # Check missing file. result = known_fields.LICENSE_FILE.validate_on_disk( value="MISSING_LICENSE", - readme_dir=os.path.join(_THIS_DIR, "data"), - chromium_src_dir=_THIS_DIR, + source_file_dir=os.path.join(_THIS_DIR, "data"), + repo_root_dir=_THIS_DIR, ) self.assertIsInstance(result, vr.ValidationError) # Check deprecated NOT_SHIPPED. result = known_fields.LICENSE_FILE.validate_on_disk( value="NOT_SHIPPED", - readme_dir=os.path.join(_THIS_DIR, "data"), - chromium_src_dir=_THIS_DIR, + source_file_dir=os.path.join(_THIS_DIR, "data"), + repo_root_dir=_THIS_DIR, ) self.assertIsInstance(result, vr.ValidationWarning) diff --git a/metadata/validation_result.py b/metadata/validation_result.py index 7d7566025..d86d8b3cc 100644 --- a/metadata/validation_result.py +++ b/metadata/validation_result.py @@ -9,10 +9,10 @@ from typing import Dict, Union class ValidationResult: """Base class for validation issues.""" - def __init__(self, message: str, fatal: bool, **tags: Dict[str, str]): + def __init__(self, message: str, fatal: bool): self._fatal = fatal self._message = message - self._tags = tags + self._tags = {} def __str__(self) -> str: prefix = "ERROR" if self._fatal else "[non-fatal]"