diff --git a/metadata/fields/custom/license.py b/metadata/fields/custom/license.py new file mode 100644 index 000000000..d21785bde --- /dev/null +++ b/metadata/fields/custom/license.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +# Copyright 2023 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import os +import re +import sys +from typing import List, Union, Tuple + +_THIS_DIR = os.path.abspath(os.path.dirname(__file__)) +# The repo's root directory. +_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..", "..")) + +# Add the repo's root directory for clearer imports. +sys.path.insert(0, _ROOT_DIR) + +import metadata.fields.types as field_types +import metadata.fields.util as util +import metadata.validation_result as vr + +# Copied from ANDROID_ALLOWED_LICENSES in +# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/third_party/PRESUBMIT.py +_ANDROID_ALLOWED_LICENSES = [ + "A(pple )?PSL 2(\.0)?", + "Android Software Development Kit License", + "Apache( License)?,?( Version)? 2(\.0)?", + "(New )?([23]-Clause )?BSD( [23]-Clause)?( with advertising clause)?", + "GNU Lesser Public License", + "L?GPL ?v?2(\.[01])?( or later)?( with the classpath exception)?", + "(The )?MIT(/X11)?(-like)?( License)?", + "MPL 1\.1 ?/ ?GPL 2(\.0)? ?/ ?LGPL 2\.1", + "MPL 2(\.0)?", + "Microsoft Limited Public License", + "Microsoft Permissive License", + "Public Domain", + "Python", + "SIL Open Font License, Version 1.1", + "SGI Free Software License B", + "Unicode, Inc. License", + "University of Illinois\/NCSA Open Source", + "X11", + "Zlib", +] +_PATTERN_LICENSE_ALLOWED = re.compile( + "^({})$".format("|".join(_ANDROID_ALLOWED_LICENSES)), + re.IGNORECASE, +) + +_PATTERN_VERBOSE_DELIMITER = re.compile(r" and | or | / ") + + +def process_license_value(value: str, + atomic_delimiter: str) -> List[Tuple[str, bool]]: + """Process a license field value, which may list multiple licenses. + + Args: + value: the value to process, which may include both verbose and atomic + delimiters, e.g. "Apache, 2.0 and MIT and custom" + atomic_delimiter: the delimiter to use as a final step; values will not be + further split after using this delimiter. + + Returns: a list of the constituent licenses within the given value, + and whether the constituent license is on the allowlist. + e.g. [("Apache, 2.0", True), ("MIT", True), ("custom", False)] + """ + # Check if the value is on the allowlist as-is, and thus does not require + # further processing. + if is_license_allowlisted(value): + return [(value, True)] + + breakdown = [] + if re.search(_PATTERN_VERBOSE_DELIMITER, value): + # Split using the verbose delimiters. + for component in re.split(_PATTERN_VERBOSE_DELIMITER, value): + breakdown.extend( + process_license_value(component.strip(), atomic_delimiter)) + else: + # Split using the standard value delimiter. This results in atomic values; + # there is no further splitting possible. + for atomic_value in value.split(atomic_delimiter): + atomic_value = atomic_value.strip() + breakdown.append((atomic_value, is_license_allowlisted(atomic_value))) + + return breakdown + + +def is_license_allowlisted(value: str) -> bool: + """Returns whether the value is in the allowlist for license types.""" + return util.matches(_PATTERN_LICENSE_ALLOWED, value) + + +class LicenseField(field_types.MetadataField): + """Custom field for the package's license type(s). + + e.g. Apache 2.0, MIT, BSD, Public Domain. + """ + def __init__(self): + super().__init__(name="License", one_liner=False) + + def validate(self, value: str) -> Union[vr.ValidationResult, None]: + """Checks the given value consists of recognized license types. + + Note: this field supports multiple values. + """ + not_allowlisted = [] + licenses = process_license_value(value, + atomic_delimiter=self.VALUE_DELIMITER) + for license, allowed in licenses: + if util.is_empty(license): + return vr.ValidationError(f"{self._name} has an empty value.") + if not allowed: + not_allowlisted.append(license) + + if not_allowlisted: + template = ("{field_name} has licenses not in the allowlist. If " + "there are multiple license types, separate them with a " + "'{delim}'. Invalid values: {values}.") + message = template.format(field_name=self._name, + delim=self.VALUE_DELIMITER, + values=util.quoted(not_allowlisted)) + return vr.ValidationWarning(message) + + # Suggest using the standard value delimiter when possible. + if (re.search(_PATTERN_VERBOSE_DELIMITER, value) + and self.VALUE_DELIMITER not in value): + return vr.ValidationWarning( + f"{self._name} should use '{self.VALUE_DELIMITER}' to delimit " + "values.") + + return None diff --git a/metadata/fields/custom/license_file.py b/metadata/fields/custom/license_file.py index 87518b167..fd5f2023e 100644 --- a/metadata/fields/custom/license_file.py +++ b/metadata/fields/custom/license_file.py @@ -25,9 +25,6 @@ _PATTERN_PATH_BACKWARD = re.compile(r"\.\.\/") # Deprecated special value for packages that aren't shipped. _NOT_SHIPPED = "NOT_SHIPPED" -# The delimiter used to separate multiple license file paths. -_VALUE_DELIMITER = "," - class LicenseFileField(field_types.MetadataField): """Custom field for the paths to the package's license file(s).""" @@ -48,7 +45,7 @@ class LicenseFileField(field_types.MetadataField): f"{self._name} uses deprecated value '{_NOT_SHIPPED}'.") invalid_values = [] - for path in value.split(_VALUE_DELIMITER): + for path in value.split(self.VALUE_DELIMITER): path = path.strip() if util.is_empty(path) or util.matches(_PATTERN_PATH_BACKWARD, path): invalid_values.append(path) @@ -58,7 +55,7 @@ class LicenseFileField(field_types.MetadataField): "or include '../'. If there are multiple license files, " "separate them with a '{delim}'. Invalid values: {values}.") message = template.format(field_name=self._name, - delim=_VALUE_DELIMITER, + delim=self.VALUE_DELIMITER, values=util.quoted(invalid_values)) return vr.ValidationError(message) @@ -79,7 +76,7 @@ class LicenseFileField(field_types.MetadataField): f"{self._name} uses deprecated value '{_NOT_SHIPPED}'.") invalid_values = [] - for license_filename in value.split(_VALUE_DELIMITER): + for license_filename in value.split(self.VALUE_DELIMITER): license_filename = license_filename.strip() if license_filename.startswith("/"): license_filepath = os.path.join( diff --git a/metadata/fields/custom/url.py b/metadata/fields/custom/url.py index 90d7ea307..bf87a43d5 100644 --- a/metadata/fields/custom/url.py +++ b/metadata/fields/custom/url.py @@ -19,9 +19,6 @@ import metadata.fields.types as field_types import metadata.fields.util as util import metadata.validation_result as vr -# The delimiter used to separate multiple URLs. -_VALUE_DELIMITER = "," - _PATTERN_URL_ALLOWED = re.compile(r"^(https?|ftp|git):\/\/\S+$") _PATTERN_URL_CANONICAL_REPO = re.compile( r"^This is the canonical (public )?repo(sitory)?\.?$", re.IGNORECASE) @@ -41,7 +38,7 @@ class URLField(field_types.MetadataField): return None invalid_values = [] - for url in value.split(_VALUE_DELIMITER): + for url in value.split(self.VALUE_DELIMITER): url = url.strip() if not util.matches(_PATTERN_URL_ALLOWED, url): invalid_values.append(url) @@ -52,7 +49,7 @@ class URLField(field_types.MetadataField): "URLs, separate them with a '{delim}'. Invalid values: " "{values}.") message = template.format(field_name=self._name, - delim=_VALUE_DELIMITER, + delim=self.VALUE_DELIMITER, values=util.quoted(invalid_values)) return vr.ValidationError(message) diff --git a/metadata/fields/known.py b/metadata/fields/known.py index 62f1932f4..81f0e3d71 100644 --- a/metadata/fields/known.py +++ b/metadata/fields/known.py @@ -16,6 +16,7 @@ sys.path.insert(0, _ROOT_DIR) import metadata.fields.custom.cpe_prefix import metadata.fields.custom.date +import metadata.fields.custom.license import metadata.fields.custom.license_file import metadata.fields.custom.url import metadata.fields.custom.version @@ -38,6 +39,7 @@ LICENSE_ANDROID_COMPATIBLE = field_types.YesNoField( # Custom fields. CPE_PREFIX = metadata.fields.custom.cpe_prefix.CPEPrefixField() DATE = metadata.fields.custom.date.DateField() +LICENSE = metadata.fields.custom.license.LicenseField() LICENSE_FILE = metadata.fields.custom.license_file.LicenseFileField() URL = metadata.fields.custom.url.URLField() VERSION = metadata.fields.custom.version.VersionField() @@ -49,6 +51,7 @@ ALL_FIELDS = ( VERSION, DATE, REVISION, + LICENSE, LICENSE_FILE, SECURITY_CRITICAL, SHIPPED, diff --git a/metadata/fields/types.py b/metadata/fields/types.py index b9dd24910..f36dded60 100644 --- a/metadata/fields/types.py +++ b/metadata/fields/types.py @@ -29,6 +29,10 @@ _PATTERN_STARTS_WITH_YES_OR_NO = re.compile(r"^(yes|no)", re.IGNORECASE) class MetadataField: """Base class for all metadata fields.""" + + # The delimiter used to separate multiple values. + VALUE_DELIMITER = "," + def __init__(self, name: str, one_liner: bool = True): self._name = name self._one_liner = one_liner diff --git a/metadata/tests/fields_test.py b/metadata/tests/fields_test.py index ae28b4342..a34dcf824 100644 --- a/metadata/tests/fields_test.py +++ b/metadata/tests/fields_test.py @@ -82,6 +82,23 @@ class FieldValidationTest(unittest.TestCase): error_values=["", "\n", "April 3, 2012", "2012/03/04"], ) + def test_license_validation(self): + self._run_field_validation( + field=known_fields.LICENSE, + valid_values=[ + "Apache, 2.0 / MIT / MPL 2", + "LGPL 2.1", + "Apache, Version 2 and Public domain", + ], + error_values=["", "\n", ",", "Apache 2.0 / MIT / "], + warning_values=[ + "Custom license", + "Custom / MIT", + "Public domain or MPL 2", + "APSL 2 and the MIT license", + ], + ) + def test_license_file_validation(self): self._run_field_validation( field=known_fields.LICENSE_FILE,