[ssci] Script to run validation on all metadata files

Adds script metadata/scan.py which can be used to search for and
validate Chromium dependency metadata files, given a repository
root directory.

Bug: b:277147404
Change-Id: Ibde0eeb7babe0b1e3f9c7f887bece629d390974a
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4823596
Commit-Queue: Anne Redulla <aredulla@google.com>
Reviewed-by: Rachael Newitt <renewitt@google.com>
changes/96/4823596/3
Anne Redulla 2 years ago committed by LUCI CQ
parent e36c6bba14
commit 427f0f43ad

@ -1,4 +1,18 @@
# Validation for Chromium's Third Party Metadata Files
This directory contains the code to validate Chromium's third party metadata
This directory contains the code to validate Chromium third party metadata
files, i.e. `README.chromium` files.
## Prerequisites
1. Have the Chromium source code
[checked out](https://chromium.googlesource.com/chromium/src/+/main/docs/#checking-out-and-building) on disk
1. Ensure you've run `gclient runhooks` on your source checkout
## Run
`metadata/scan.py` can be used to search for and validate all Chromium third
party metadata files within a repository. For example, if your `chromium/src`
checkout is at `~/my/path/to/chromium/src`, run the following command from the
root directory of `depot_tools`:
```
vpython3 --vpython-spec=.vpython3 metadata/scan.py ~/my/path/to/chromium/src
```

@ -118,9 +118,10 @@ class DependencyMetadata:
]
if repeated_field_info:
repeated = ", ".join(repeated_field_info)
error = vr.ValidationError(
f"Multiple entries for the same field: {repeated}.")
error.set_tag(tag="reason", value="repeated field")
error = vr.ValidationError(reason="There is a repeated field.",
additional=[
f"Repeated fields: {repeated}",
])
results.append(error)
# Check required fields are present.
@ -128,8 +129,8 @@ class DependencyMetadata:
for field in required_fields:
if field not in self._metadata:
field_name = field.get_name()
error = vr.ValidationError(f"Required field '{field_name}' is missing.")
error.set_tag(tag="reason", value="missing required field")
error = vr.ValidationError(
reason=f"Required field '{field_name}' is missing.")
results.append(error)
# Validate values for all present fields.

@ -3,6 +3,7 @@
# found in the LICENSE file.
import os
from typing import List
# The base names that are known to be Chromium metadata files.
_METADATA_FILES = {
@ -13,3 +14,25 @@ _METADATA_FILES = {
def is_metadata_file(path: str) -> bool:
"""Filter for metadata files."""
return os.path.basename(path) in _METADATA_FILES
def find_metadata_files(root: str) -> List[str]:
"""Finds all metadata files within the given root directory, including
subdirectories.
Args:
root: the absolute path to the root directory within which to search.
Returns:
the absolute full paths for all the metadata files within the root
directory.
"""
metadata_files = []
for item in os.listdir(root):
full_path = os.path.join(root, item)
if is_metadata_file(item):
metadata_files.append(full_path)
elif os.path.isdir(full_path):
metadata_files.extend(find_metadata_files(full_path))
return metadata_files

@ -19,7 +19,7 @@ import metadata.fields.field_types as field_types
import metadata.fields.util as util
import metadata.validation_result as vr
_PATTERN_CPE_PREFIX = re.compile(r"^cpe:/.+:.+:.+(:.+)*$")
_PATTERN_CPE_PREFIX = re.compile(r"^cpe:(2.3:|/).+:.+:.+(:.+)*$", re.IGNORECASE)
class CPEPrefixField(field_types.MetadataField):
@ -28,12 +28,16 @@ class CPEPrefixField(field_types.MetadataField):
super().__init__(name="CPEPrefix", one_liner=True)
def validate(self, value: str) -> Union[vr.ValidationResult, None]:
"""Checks the given value is either 'unknown', or a valid
CPE in the URI form `cpe:/<part>:<vendor>:<product>[:<optional fields>]`.
"""Checks the given value is either 'unknown', or conforms to either the
CPE 2.3 or 2.2 format.
"""
if util.is_unknown(value) or util.matches(_PATTERN_CPE_PREFIX, value):
return None
return vr.ValidationError(
f"{self._name} is '{value}' - must be either 'unknown', or in the form "
"'cpe:/<part>:<vendor>:<product>[:<optional fields>]'.")
reason=f"{self._name} is invalid.",
additional=[
"This field should be a CPE (version 2.3 or 2.2), or 'unknown'.",
"Search for a CPE tag for the package at "
"https://nvd.nist.gov/products/cpe/search.",
])

@ -32,5 +32,8 @@ class DateField(field_types.MetadataField):
if util.matches(_PATTERN_DATE, value):
return None
return vr.ValidationError(
f"{self._name} is '{value}' - must use format YYYY-MM-DD.")
return vr.ValidationError(reason=f"{self._name} is invalid.",
additional=[
"The correct format is YYYY-MM-DD.",
f"Current value is '{value}'.",
])

@ -108,24 +108,22 @@ class LicenseField(field_types.MetadataField):
atomic_delimiter=self.VALUE_DELIMITER)
for license, allowed in licenses:
if util.is_empty(license):
return vr.ValidationError(f"{self._name} has an empty value.")
return vr.ValidationError(reason=f"{self._name} has an empty value.")
if not allowed:
not_allowlisted.append(license)
if not_allowlisted:
template = ("{field_name} has licenses not in the allowlist. If "
"there are multiple license types, separate them with a "
"'{delim}'. Invalid values: {values}.")
message = template.format(field_name=self._name,
delim=self.VALUE_DELIMITER,
values=util.quoted(not_allowlisted))
return vr.ValidationWarning(message)
return vr.ValidationWarning(
reason=f"{self._name} has a license not in the allowlist.",
additional=[
f"Separate licenses using a '{self.VALUE_DELIMITER}'.",
f"Licenses not allowlisted: {util.quoted(not_allowlisted)}.",
])
# Suggest using the standard value delimiter when possible.
if (re.search(_PATTERN_VERBOSE_DELIMITER, value)
and self.VALUE_DELIMITER not in value):
return vr.ValidationWarning(
f"{self._name} should use '{self.VALUE_DELIMITER}' to delimit "
"values.")
reason=f"Separate licenses using a '{self.VALUE_DELIMITER}'.")
return None

@ -42,7 +42,10 @@ class LicenseFileField(field_types.MetadataField):
"""
if value == _NOT_SHIPPED:
return vr.ValidationWarning(
f"{self._name} uses deprecated value '{_NOT_SHIPPED}'.")
reason=f"{self._name} uses deprecated value '{_NOT_SHIPPED}'.",
additional=[
f"Remove this field and use 'Shipped: {util.NO}' instead.",
])
invalid_values = []
for path in value.split(self.VALUE_DELIMITER):
@ -51,13 +54,13 @@ class LicenseFileField(field_types.MetadataField):
invalid_values.append(path)
if invalid_values:
template = ("{field_name} has invalid values. Paths cannot be empty, "
"or include '../'. If there are multiple license files, "
"separate them with a '{delim}'. Invalid values: {values}.")
message = template.format(field_name=self._name,
delim=self.VALUE_DELIMITER,
values=util.quoted(invalid_values))
return vr.ValidationError(message)
return vr.ValidationError(
reason=f"{self._name} is invalid.",
additional=[
"File paths cannot be empty, or include '../'.",
f"Separate license files using a '{self.VALUE_DELIMITER}'.",
f"Invalid values: {util.quoted(invalid_values)}.",
])
return None
@ -84,7 +87,10 @@ class LicenseFileField(field_types.MetadataField):
"""
if value == _NOT_SHIPPED:
return vr.ValidationWarning(
f"{self._name} uses deprecated value '{_NOT_SHIPPED}'.")
reason=f"{self._name} uses deprecated value '{_NOT_SHIPPED}'.",
additional=[
f"Remove this field and use 'Shipped: {util.NO}' instead.",
])
invalid_values = []
for license_filename in value.split(self.VALUE_DELIMITER):
@ -100,10 +106,12 @@ class LicenseFileField(field_types.MetadataField):
invalid_values.append(license_filepath)
if invalid_values:
template = ("{field_name} has invalid values. Failed to find file(s) on"
"local disk. Invalid values: {values}.")
message = template.format(field_name=self._name,
values=util.quoted(invalid_values))
return vr.ValidationError(message)
missing = ", ".join(invalid_values)
return vr.ValidationError(
reason=f"{self._name} is invalid.",
additional=[
"Failed to find all license files on local disk.",
f"Missing files:{missing}.",
])
return None

@ -44,13 +44,12 @@ class URLField(field_types.MetadataField):
invalid_values.append(url)
if invalid_values:
template = ("{field_name} has invalid values. URLs must use a protocol "
"scheme in [http, https, ftp, git]. If there are multiple "
"URLs, separate them with a '{delim}'. Invalid values: "
"{values}.")
message = template.format(field_name=self._name,
delim=self.VALUE_DELIMITER,
values=util.quoted(invalid_values))
return vr.ValidationError(message)
return vr.ValidationError(
reason=f"{self._name} is invalid.",
additional=[
"URLs must use a protocol scheme in [http, https, ftp, git].",
f"Separate URLs using a '{self.VALUE_DELIMITER}'.",
f"Invalid values: {util.quoted(invalid_values)}.",
])
return None

@ -40,12 +40,18 @@ class VersionField(field_types.MetadataField):
"""
if value == "0" or util.is_unknown(value):
return vr.ValidationWarning(
f"{self._name} is '{value}' - use 'N/A' if this package does not "
"version or is versioned by date or revision.")
reason=f"{self._name} is '{value}'.",
additional=[
"Set this field to 'N/A' if this package does not version or is "
"versioned by date or revision.",
])
if util.is_empty(value):
return vr.ValidationError(
f"{self._name} is empty - use 'N/A' if this package is versioned by "
"date or revision.")
reason=f"{self._name} is empty.",
additional=[
"Set this field to 'N/A' if this package does not version or is "
"versioned by date or revision.",
])
return None

@ -66,7 +66,7 @@ class FreeformTextField(MetadataField):
def validate(self, value: str) -> Union[vr.ValidationResult, None]:
"""Checks the given value has at least one non-whitespace character."""
if util.is_empty(value):
return vr.ValidationError(f"{self._name} is empty.")
return vr.ValidationError(reason=f"{self._name} is empty.")
return None
@ -83,8 +83,15 @@ class YesNoField(MetadataField):
if util.matches(_PATTERN_STARTS_WITH_YES_OR_NO, value):
return vr.ValidationWarning(
f"{self._name} is '{value}' - should be only {util.YES} or {util.NO}."
)
reason=f"{self._name} is invalid.",
additional=[
f"This field should be only {util.YES} or {util.NO}.",
f"Current value is '{value}'.",
])
return vr.ValidationError(
f"{self._name} is '{value}' - must be {util.YES} or {util.NO}.")
reason=f"{self._name} is invalid.",
additional=[
f"This field must be {util.YES} or {util.NO}.",
f"Current value is '{value}'.",
])

@ -0,0 +1,93 @@
#!/usr/bin/env python3
# Copyright 2023 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from __future__ import print_function
import argparse
from collections import defaultdict
import os
import sys
_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
# The repo's root directory.
_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
# Add the repo's root directory for clearer imports.
sys.path.insert(0, _ROOT_DIR)
import metadata.discover
import metadata.validate
def parse_args() -> argparse.Namespace:
"""Helper to parse args to this script."""
parser = argparse.ArgumentParser()
repo_root_dir = parser.add_argument(
"repo_root_dir",
help=("The path to the repository's root directory, which will be "
"scanned for Chromium metadata files, e.g. '~/chromium/src'."),
)
args = parser.parse_args()
# Check the repo root directory exists.
src_dir = os.path.abspath(args.repo_root_dir)
if not os.path.exists(src_dir) or not os.path.isdir(src_dir):
raise argparse.ArgumentError(
repo_root_dir,
f"Invalid repository root directory '{src_dir}' - not found",
)
return args
def main() -> None:
"""Runs validation on all metadata files within the directory specified by the
repo_root_dir arg.
"""
config = parse_args()
src_dir = os.path.abspath(config.repo_root_dir)
metadata_files = metadata.discover.find_metadata_files(src_dir)
file_count = len(metadata_files)
print(f"Found {file_count} metadata files.")
invalid_file_count = 0
# Key is constructed from the result severity and reason;
# Value is a list of files affected by that reason at that severity.
all_reasons = defaultdict(list)
for filepath in metadata_files:
file_results = metadata.validate.validate_file(filepath,
repo_root_dir=src_dir)
invalid = False
if file_results:
relpath = os.path.relpath(filepath, start=src_dir)
print(f"\n{len(file_results)} problem(s) in {relpath}:")
for result in file_results:
print(f" {result}")
summary_key = "{severity} - {reason}".format(
severity=result.get_severity_prefix(), reason=result.get_reason())
all_reasons[summary_key].append(relpath)
if result.is_fatal():
invalid = True
if invalid:
invalid_file_count += 1
print("\n\nDone.\nSummary:")
for summary_key, affected_files in all_reasons.items():
count = len(affected_files)
plural = "s" if count > 1 else ""
print(f"\n {count} file{plural}: {summary_key}")
for affected_file in affected_files:
print(f" {affected_file}")
print(f"\n\n{invalid_file_count} / {file_count} metadata files are invalid, "
"i.e. the file has at least one fatal validation issue.")
if __name__ == "__main__":
main()

@ -38,8 +38,7 @@ class DependencyValidationTest(unittest.TestCase):
)
self.assertEqual(len(results), 1)
self.assertTrue(isinstance(results[0], vr.ValidationError))
self.assertDictEqual(results[0].get_all_tags(),
{"reason": "repeated field"})
self.assertEqual(results[0].get_reason(), "There is a repeated field.")
def test_required_field(self):
"""Check that a validation error is returned for a missing field."""
@ -58,8 +57,8 @@ class DependencyValidationTest(unittest.TestCase):
)
self.assertEqual(len(results), 1)
self.assertTrue(isinstance(results[0], vr.ValidationError))
self.assertDictEqual(results[0].get_all_tags(),
{"reason": "missing required field"})
self.assertEqual(results[0].get_reason(),
"Required field 'URL' is missing.")
def test_invalid_field(self):
"""Check field validation issues are returned."""
@ -78,8 +77,7 @@ class DependencyValidationTest(unittest.TestCase):
)
self.assertEqual(len(results), 1)
self.assertTrue(isinstance(results[0], vr.ValidationError))
self.assertDictEqual(results[0].get_all_tags(),
{"field": known_fields.SECURITY_CRITICAL.get_name()})
self.assertEqual(results[0].get_reason(), "Security Critical is invalid.")
def test_invalid_license_file_path(self):
"""Check license file path validation issues are returned."""
@ -99,8 +97,7 @@ class DependencyValidationTest(unittest.TestCase):
)
self.assertEqual(len(results), 1)
self.assertTrue(isinstance(results[0], vr.ValidationError))
self.assertDictEqual(results[0].get_all_tags(),
{"field": known_fields.LICENSE_FILE.get_name()})
self.assertEqual(results[0].get_reason(), "License File is invalid.")
def test_multiple_validation_issues(self):
"""Check all validation issues are returned."""

@ -70,7 +70,11 @@ class FieldValidationTest(unittest.TestCase):
self._run_field_validation(
field=known_fields.CPE_PREFIX,
valid_values=[
"unknown", "cpe:/a:sqlite:sqlite:3.0.0", "cpe:/a:sqlite:sqlite"
"unknown",
"Cpe:2.3:a:sqlite:sqlite:3.0.0",
"cpe:2.3:a:sqlite:sqlite",
"CPE:/a:sqlite:sqlite:3.0.0",
"cpe:/a:sqlite:sqlite",
],
error_values=["", "\n"],
)

@ -36,8 +36,7 @@ def validate_content(content: str, source_file_dir: str,
results = []
dependencies = metadata.parse.parse_content(content)
if not dependencies:
result = vr.ValidationError("No dependency metadata found.")
result.set_tag("reason", "no metadata")
result = vr.ValidationError(reason="No dependency metadata found.")
return [result]
for dependency in dependencies:
@ -49,8 +48,9 @@ def validate_content(content: str, source_file_dir: str,
def _construct_file_read_error(filepath: str, cause: str) -> vr.ValidationError:
"""Helper function to create a validation error for a file reading issue."""
result = vr.ValidationError(f"Cannot read '{filepath}' - {cause}.")
result.set_tag(tag="reason", value="read error")
result = vr.ValidationError(
reason="Cannot read metadata file.",
additional=[f"Attempted to read '{filepath}' but {cause}."])
return result

@ -4,7 +4,7 @@
# found in the LICENSE file.
import textwrap
from typing import Dict, Union
from typing import Dict, List, Union
_CHROMIUM_METADATA_PRESCRIPT = "Third party metadata issue:"
@ -14,13 +14,23 @@ _CHROMIUM_METADATA_POSTSCRIPT = ("Check //third_party/README.chromium.template "
class ValidationResult:
"""Base class for validation issues."""
def __init__(self, message: str, fatal: bool):
def __init__(self, reason: str, fatal: bool, additional: List[str] = []):
"""Constructor for a validation issue.
Args:
reason: the root cause of the issue.
fatal: whether the issue is fatal.
additional: details that should be included in the validation message,
e.g. advice on how to address the issue, or specific
problematic values.
"""
self._reason = reason
self._fatal = fatal
self._message = message
self._message = " ".join([reason] + additional)
self._tags = {}
def __str__(self) -> str:
prefix = "ERROR" if self._fatal else "[non-fatal]"
prefix = self.get_severity_prefix()
return f"{prefix} - {self._message}"
def __repr__(self) -> str:
@ -29,6 +39,14 @@ class ValidationResult:
def is_fatal(self) -> bool:
return self._fatal
def get_severity_prefix(self):
if self._fatal:
return "ERROR"
return "[non-fatal]"
def get_reason(self) -> str:
return self._reason
def set_tag(self, tag: str, value: str) -> bool:
self._tags[tag] = value
@ -54,11 +72,11 @@ class ValidationResult:
class ValidationError(ValidationResult):
"""Fatal validation issue. Presubmit should fail."""
def __init__(self, message: str):
super().__init__(message=message, fatal=True)
def __init__(self, reason: str, additional: List[str] = []):
super().__init__(reason=reason, fatal=True, additional=additional)
class ValidationWarning(ValidationResult):
"""Non-fatal validation issue. Presubmit should pass."""
def __init__(self, message: str):
super().__init__(message=message, fatal=False)
def __init__(self, reason: str, additional: List[str] = []):
super().__init__(reason=reason, fatal=False, additional=additional)

Loading…
Cancel
Save