[ssci] Added validate method for single dependencies

Bug: b:277147404
Change-Id: I54c9c82d093cb11813e1c224da125b8d555f1b29
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4797050
Reviewed-by: Rachael Newitt <renewitt@google.com>
Commit-Queue: Anne Redulla <aredulla@google.com>
changes/50/4797050/2
Anne Redulla 2 years ago committed by LUCI CQ
parent 3edcbf07bf
commit 3aeb682373

@ -0,0 +1,13 @@
#!/usr/bin/env vpython3
# Copyright (c) 2023 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
PRESUBMIT_VERSION = '2.0.0'
def CheckPythonUnitTests(input_api, output_api):
tests = input_api.canned_checks.GetUnitTestsInDirectory(
input_api, output_api, "tests", files_to_check=[r'.+_test\.py$'])
return input_api.RunTests(tests)

@ -3,19 +3,152 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from typing import List, Tuple
from collections import defaultdict
import os
import sys
from typing import Dict, List, Set, Tuple
_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
# The repo's root directory.
_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
# Add the repo's root directory for clearer imports.
sys.path.insert(0, _ROOT_DIR)
import metadata.fields.types as field_types
import metadata.fields.custom.license
import metadata.fields.custom.version
import metadata.fields.known as known_fields
import metadata.fields.util as util
import metadata.validation_result as vr
class DependencyMetadata:
"""The metadata for a single dependency."""
# Fields that are always required.
_MANDATORY_FIELDS = {
known_fields.NAME,
known_fields.URL,
known_fields.VERSION,
known_fields.LICENSE,
known_fields.SECURITY_CRITICAL,
known_fields.SHIPPED,
}
def __init__(self):
self._entries = []
# The record of all entries added, including repeated fields.
self._entries: List[Tuple[str, str]] = []
# The current value of each field.
self._metadata: Dict[field_types.MetadataField, str] = {}
# The record of how many times a field entry was added.
self._occurrences: Dict[field_types.MetadataField, int] = defaultdict(int)
def add_entry(self, field_name: str, field_value: str):
self._entries.append((field_name, field_value.strip()))
value = field_value.strip()
self._entries.append((field_name, value))
field = known_fields.get_field(field_name)
if field:
self._metadata[field] = value
self._occurrences[field] += 1
def has_entries(self) -> bool:
return len(self._entries) > 0
def get_entries(self) -> List[Tuple[str, str]]:
return list(self._entries)
def _assess_required_fields(self) -> Set[field_types.MetadataField]:
"""Returns the set of required fields, based on the current metadata."""
required = set(self._MANDATORY_FIELDS)
# The date and revision are required if the version has not been specified.
version_value = self._metadata.get(known_fields.VERSION)
if (version_value is None
or metadata.fields.custom.version.is_unknown(version_value)):
required.add(known_fields.DATE)
required.add(known_fields.REVISION)
# Assume the dependency is shipped if not specified.
shipped_value = self._metadata.get(known_fields.SHIPPED)
is_shipped = (shipped_value is None
or util.infer_as_boolean(shipped_value, default=True))
if is_shipped:
# A license file is required if the dependency is shipped.
required.add(known_fields.LICENSE_FILE)
# License compatibility with Android must be set if the package is shipped
# and the license is not in the allowlist.
has_allowlisted = False
license_value = self._metadata.get(known_fields.LICENSE)
if license_value:
licenses = metadata.fields.custom.license.process_license_value(
license_value,
atomic_delimiter=known_fields.LICENSE.VALUE_DELIMITER)
for _, allowed in licenses:
if allowed:
has_allowlisted = True
break
if not has_allowlisted:
required.add(known_fields.LICENSE_ANDROID_COMPATIBLE)
return required
def validate(self, source_file_dir: str,
repo_root_dir: str) -> List[vr.ValidationResult]:
"""Validates all the metadata.
Args:
source_file_dir: the directory of the file that the metadata is from.
repo_root_dir: the repository's root directory.
Returns: the metadata's validation results.
"""
results = []
# Check for duplicate fields.
repeated_field_info = [
f"{field.get_name()} ({count})"
for field, count in self._occurrences.items() if count > 1
]
if repeated_field_info:
repeated = ", ".join(repeated_field_info)
error = vr.ValidationError(
f"Multiple entries for the same field: {repeated}")
error.set_tag(tag="reason", value="repeated field")
results.append(error)
# Check required fields are present.
required_fields = self._assess_required_fields()
for field in required_fields:
if field not in self._metadata:
field_name = field.get_name()
error = vr.ValidationError(f"Required field '{field_name}' is missing")
error.set_tag(tag="reason", value="missing required field")
results.append(error)
# Validate values for all present fields.
for field, value in self._metadata.items():
field_result = field.validate(value)
if field_result:
field_result.set_tag(tag="field", value=field.get_name())
results.append(field_result)
# Check existence of the license file(s) on disk.
license_file_value = self._metadata.get(known_fields.LICENSE_FILE)
if license_file_value is not None:
result = known_fields.LICENSE_FILE.validate_on_disk(
value=license_file_value,
source_file_dir=source_file_dir,
repo_root_dir=repo_root_dir,
)
if result:
result.set_tag(tag="field", value=known_fields.LICENSE_FILE.get_name())
results.append(result)
return results

@ -64,12 +64,23 @@ class LicenseFileField(field_types.MetadataField):
def validate_on_disk(
self,
value: str,
readme_dir: str,
chromium_src_dir: str,
source_file_dir: str,
repo_root_dir: str,
) -> Union[vr.ValidationResult, None]:
"""Checks the given value consists of file paths which exist on disk.
Note: this field supports multiple values.
Args:
value: the value to validate.
source_file_dir: the directory of the metadata file that the license file
value is from; this is needed to construct file paths to
license files.
repo_root_dir: the repository's root directory; this is needed to
construct file paths to license files.
Returns: a validation result based on the license file value, and whether
the license file(s) exist on disk, otherwise None.
"""
if value == _NOT_SHIPPED:
return vr.ValidationWarning(
@ -80,9 +91,9 @@ class LicenseFileField(field_types.MetadataField):
license_filename = license_filename.strip()
if license_filename.startswith("/"):
license_filepath = os.path.join(
chromium_src_dir, os.path.normpath(license_filename.lstrip("/")))
repo_root_dir, os.path.normpath(license_filename.lstrip("/")))
else:
license_filepath = os.path.join(readme_dir,
license_filepath = os.path.join(source_file_dir,
os.path.normpath(license_filename))
if not os.path.exists(license_filepath):

@ -22,7 +22,7 @@ import metadata.validation_result as vr
_PATTERN_NOT_APPLICABLE = re.compile(r"^N ?\/ ?A$", re.IGNORECASE)
def is_version_unknown(value: str) -> bool:
def is_unknown(value: str) -> bool:
"""Returns whether the value denotes the version being unknown."""
return (value == "0" or util.matches(_PATTERN_NOT_APPLICABLE, value)
or util.is_unknown(value))

@ -61,8 +61,8 @@ ALL_FIELDS = (
LOCAL_MODIFICATIONS,
)
ALL_FIELD_NAMES = {field.get_name() for field in ALL_FIELDS}
FIELD_MAPPING = {field.get_name().lower(): field for field in ALL_FIELDS}
_FIELD_MAPPING = {field.get_name().lower(): field for field in ALL_FIELDS}
def get_field(label: str) -> Union[field_types.MetadataField, None]:
return FIELD_MAPPING.get(label.lower())
return _FIELD_MAPPING.get(label.lower())

@ -37,6 +37,15 @@ class MetadataField:
self._name = name
self._one_liner = one_liner
def __eq__(self, other):
if not isinstance(other, MetadataField):
return False
return self._name.lower() == other._name.lower()
def __hash__(self):
return hash(self._name.lower())
def get_name(self):
return self._name

@ -17,6 +17,12 @@ _PATTERN_UNKNOWN = re.compile(r"^unknown$", re.IGNORECASE)
# empty string, or all characters are only whitespace.
_PATTERN_ONLY_WHITESPACE = re.compile(r"^\s*$")
# Pattern used to check if the string starts with "yes", case-insensitive.
_PATTERN_STARTS_WITH_YES = re.compile(r"^yes", re.IGNORECASE)
# Pattern used to check if the string starts with "no", case-insensitive.
_PATTERN_STARTS_WITH_NO = re.compile(r"^no", re.IGNORECASE)
def matches(pattern: re.Pattern, value: str) -> bool:
"""Returns whether the value matches the pattern."""
@ -36,3 +42,17 @@ def is_unknown(value: str) -> bool:
def quoted(values: List[str]) -> str:
"""Returns a string of the given values, each being individually quoted."""
return ", ".join([f"'{entry}'" for entry in values])
def infer_as_boolean(value: str, default: bool = True) -> bool:
"""Attempts to infer the value as a boolean, where:
- "yes"-ish values return True;
- "no"-ish values return False; and
- default is returned otherwise.
"""
if matches(_PATTERN_STARTS_WITH_YES, value):
return True
elif matches(_PATTERN_STARTS_WITH_NO, value):
return False
else:
return default

@ -40,7 +40,7 @@ def parse_file(filepath: str) -> List[dm.DependencyMetadata]:
Returns:
each dependency's metadata described in the file.
"""
with open(filepath, "r") as f:
with open(filepath, "r", encoding="utf-8") as f:
lines = f.readlines()
dependencies = []

@ -0,0 +1,132 @@
#!/usr/bin/env vpython3
# Copyright (c) 2023 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import sys
import unittest
_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
# The repo's root directory.
_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
# Add the repo's root directory for clearer imports.
sys.path.insert(0, _ROOT_DIR)
import metadata.dependency_metadata as dm
import metadata.fields.known as known_fields
import metadata.fields.types as field_types
import metadata.validation_result as vr
class DependencyValidationTest(unittest.TestCase):
def test_repeated_field(self):
"""Check that a validation error is returned for a repeated field."""
dependency = dm.DependencyMetadata()
dependency.add_entry(known_fields.NAME.get_name(), "Test repeated field")
dependency.add_entry(known_fields.URL.get_name(), "https://www.example.com")
dependency.add_entry(known_fields.VERSION.get_name(), "1.0.0")
dependency.add_entry(known_fields.LICENSE.get_name(), "Public Domain")
dependency.add_entry(known_fields.LICENSE_FILE.get_name(), "LICENSE")
dependency.add_entry(known_fields.SECURITY_CRITICAL.get_name(), "no")
dependency.add_entry(known_fields.SHIPPED.get_name(), "no")
dependency.add_entry(known_fields.NAME.get_name(), "again")
results = dependency.validate(
source_file_dir=os.path.join(_THIS_DIR, "data"),
repo_root_dir=_THIS_DIR,
)
self.assertEqual(len(results), 1)
self.assertTrue(isinstance(results[0], vr.ValidationError))
self.assertDictEqual(results[0].get_all_tags(),
{"reason": "repeated field"})
def test_required_field(self):
"""Check that a validation error is returned for a missing field."""
dependency = dm.DependencyMetadata()
dependency.add_entry(known_fields.SHIPPED.get_name(), "no")
dependency.add_entry(known_fields.SECURITY_CRITICAL.get_name(), "no")
dependency.add_entry(known_fields.LICENSE_FILE.get_name(), "LICENSE")
dependency.add_entry(known_fields.LICENSE.get_name(), "Public Domain")
dependency.add_entry(known_fields.VERSION.get_name(), "1.0.0")
dependency.add_entry(known_fields.NAME.get_name(), "Test missing field")
# Leave URL field unspecified.
results = dependency.validate(
source_file_dir=os.path.join(_THIS_DIR, "data"),
repo_root_dir=_THIS_DIR,
)
self.assertEqual(len(results), 1)
self.assertTrue(isinstance(results[0], vr.ValidationError))
self.assertDictEqual(results[0].get_all_tags(),
{"reason": "missing required field"})
def test_invalid_field(self):
"""Check field validation issues are returned."""
dependency = dm.DependencyMetadata()
dependency.add_entry(known_fields.URL.get_name(), "https://www.example.com")
dependency.add_entry(known_fields.NAME.get_name(), "Test invalid field")
dependency.add_entry(known_fields.VERSION.get_name(), "1.0.0")
dependency.add_entry(known_fields.LICENSE_FILE.get_name(), "LICENSE")
dependency.add_entry(known_fields.LICENSE.get_name(), "Public domain")
dependency.add_entry(known_fields.SHIPPED.get_name(), "no")
dependency.add_entry(known_fields.SECURITY_CRITICAL.get_name(), "test")
results = dependency.validate(
source_file_dir=os.path.join(_THIS_DIR, "data"),
repo_root_dir=_THIS_DIR,
)
self.assertEqual(len(results), 1)
self.assertTrue(isinstance(results[0], vr.ValidationError))
self.assertDictEqual(results[0].get_all_tags(),
{"field": known_fields.SECURITY_CRITICAL.get_name()})
def test_invalid_license_file_path(self):
"""Check license file path validation issues are returned."""
dependency = dm.DependencyMetadata()
dependency.add_entry(known_fields.NAME.get_name(), "Test license file path")
dependency.add_entry(known_fields.URL.get_name(), "https://www.example.com")
dependency.add_entry(known_fields.VERSION.get_name(), "1.0.0")
dependency.add_entry(known_fields.LICENSE.get_name(), "Public domain")
dependency.add_entry(known_fields.LICENSE_FILE.get_name(),
"MISSING-LICENSE")
dependency.add_entry(known_fields.SECURITY_CRITICAL.get_name(), "no")
dependency.add_entry(known_fields.SHIPPED.get_name(), "no")
results = dependency.validate(
source_file_dir=os.path.join(_THIS_DIR, "data"),
repo_root_dir=_THIS_DIR,
)
self.assertEqual(len(results), 1)
self.assertTrue(isinstance(results[0], vr.ValidationError))
self.assertDictEqual(results[0].get_all_tags(),
{"field": known_fields.LICENSE_FILE.get_name()})
def test_multiple_validation_issues(self):
"""Check all validation issues are returned."""
dependency = dm.DependencyMetadata()
dependency.add_entry(known_fields.NAME.get_name(), "Test multiple errors")
# Leave URL field unspecified.
dependency.add_entry(known_fields.VERSION.get_name(), "1.0.0")
dependency.add_entry(known_fields.LICENSE.get_name(), "Public domain")
dependency.add_entry(known_fields.LICENSE_FILE.get_name(),
"MISSING-LICENSE")
dependency.add_entry(known_fields.SECURITY_CRITICAL.get_name(), "test")
dependency.add_entry(known_fields.SHIPPED.get_name(), "no")
dependency.add_entry(known_fields.NAME.get_name(), "again")
# Check 4 validation results are returned, for:
# - missing field;
# - invalid license file path;
# - invalid yes/no field value; and
# - repeated field entry.
results = dependency.validate(
source_file_dir=os.path.join(_THIS_DIR, "data"),
repo_root_dir=_THIS_DIR,
)
self.assertEqual(len(results), 4)
if __name__ == "__main__":
unittest.main()

@ -113,32 +113,32 @@ class FieldValidationTest(unittest.TestCase):
# Check relative path from README directory, and multiple license files.
result = known_fields.LICENSE_FILE.validate_on_disk(
value="LICENSE, src/LICENSE.txt",
readme_dir=os.path.join(_THIS_DIR, "data"),
chromium_src_dir=_THIS_DIR,
source_file_dir=os.path.join(_THIS_DIR, "data"),
repo_root_dir=_THIS_DIR,
)
self.assertIsNone(result)
# Check relative path from Chromium src directory.
result = known_fields.LICENSE_FILE.validate_on_disk(
value="//data/LICENSE",
readme_dir=os.path.join(_THIS_DIR, "data"),
chromium_src_dir=_THIS_DIR,
source_file_dir=os.path.join(_THIS_DIR, "data"),
repo_root_dir=_THIS_DIR,
)
self.assertIsNone(result)
# Check missing file.
result = known_fields.LICENSE_FILE.validate_on_disk(
value="MISSING_LICENSE",
readme_dir=os.path.join(_THIS_DIR, "data"),
chromium_src_dir=_THIS_DIR,
source_file_dir=os.path.join(_THIS_DIR, "data"),
repo_root_dir=_THIS_DIR,
)
self.assertIsInstance(result, vr.ValidationError)
# Check deprecated NOT_SHIPPED.
result = known_fields.LICENSE_FILE.validate_on_disk(
value="NOT_SHIPPED",
readme_dir=os.path.join(_THIS_DIR, "data"),
chromium_src_dir=_THIS_DIR,
source_file_dir=os.path.join(_THIS_DIR, "data"),
repo_root_dir=_THIS_DIR,
)
self.assertIsInstance(result, vr.ValidationWarning)

@ -9,10 +9,10 @@ from typing import Dict, Union
class ValidationResult:
"""Base class for validation issues."""
def __init__(self, message: str, fatal: bool, **tags: Dict[str, str]):
def __init__(self, message: str, fatal: bool):
self._fatal = fatal
self._message = message
self._tags = tags
self._tags = {}
def __str__(self) -> str:
prefix = "ERROR" if self._fatal else "[non-fatal]"

Loading…
Cancel
Save