diff --git a/metadata/dependency_metadata.py b/metadata/dependency_metadata.py index 79d746208..12755c84a 100644 --- a/metadata/dependency_metadata.py +++ b/metadata/dependency_metadata.py @@ -6,7 +6,7 @@ from collections import defaultdict import os import sys -from typing import Dict, List, Set, Tuple +from typing import Dict, List, Set, Tuple, Union, Optional, Literal, Any _THIS_DIR = os.path.abspath(os.path.dirname(__file__)) # The repo's root directory. @@ -24,7 +24,19 @@ import metadata.validation_result as vr class DependencyMetadata: - """The metadata for a single dependency.""" + """The metadata for a single dependency. + + See @property declarations below to retrieve validated fields for + downstream consumption. + + The property returns `None` if the provided value (e.g. in + README.chromium file) is clearly invalid. + + Otherwise, it returns a suitably typed value (see comments on each + property). + + To retrieve unvalidated (i.e. raw values) fields, use get_entries(). + """ # Fields that are always required. _MANDATORY_FIELDS = { @@ -171,9 +183,11 @@ class DependencyMetadata: version_value = self._metadata.get(known_fields.VERSION) date_value = self._metadata.get(known_fields.DATE) revision_value = self._metadata.get(known_fields.REVISION) - if ((not version_value or version_util.is_unknown(version_value)) - and (not date_value or util.is_unknown(date_value)) - and (not revision_value or util.is_unknown(revision_value))): + if ((not version_value + or version_util.version_is_unknown(version_value)) and + (not date_value or version_util.version_is_unknown(date_value)) + and (not revision_value + or version_util.version_is_unknown(revision_value))): versioning_fields = [ known_fields.VERSION, known_fields.DATE, known_fields.REVISION ] @@ -199,3 +213,105 @@ class DependencyMetadata: results.append(result) return results + + def _return_as_property(self, field: field_types.MetadataField) -> Any: + """Helper function to create a property for DependencyMetadata. + + The property accessor will validate and return sanitized field value. + """ + assert field in known_fields.ALL_FIELDS + + raw_value = self._metadata.get(field, None) + if raw_value is None: + # Field is not set. + return None + + return field.narrow_type(raw_value) + + @property + def name(self) -> Optional[str]: + return self._return_as_property(known_fields.NAME) + + @property + def short_name(self) -> Optional[str]: + return self._return_as_property(known_fields.SHORT_NAME) + + @property + def url(self) -> Optional[List[str]]: + """ + Returns a list of URLs that points to upstream repo. + The URLs are guaranteed to `urllib.parse.urlparse` without errors. + + Returns None if this repository is the canonical repository of this + dependency (see is_canonical below). + """ + return self._return_as_property(known_fields.URL) + + @property + def is_canonical(self) -> bool: + """ + Returns whether this repository is the canonical public repository of this dependency. + + This is derived from a special value in the URL field. + """ + value = self._metadata.get(known_fields.URL, "") + return known_fields.URL.repo_is_canonical(value) + + @property + def version(self) -> Optional[str]: + return self._return_as_property(known_fields.VERSION) + + @property + def date(self) -> Optional[str]: + """Returns in "YYYY-MM-DD" format.""" + return self._return_as_property(known_fields.DATE) + + @property + def revision(self) -> Optional[str]: + return self._return_as_property(known_fields.REVISION) + + @property + def license(self) -> Optional[List[str]]: + """Returns a list of license names.""" + return self._return_as_property(known_fields.LICENSE) + + @property + def license_file(self) -> Optional[List[str]]: + # TODO(b/321154076): Consider excluding files that doesn't exist on + # disk if it's not too hard. + # + # Plumbing src_root and dependency_dir into field validator is + # required. + return self._return_as_property(known_fields.LICENSE_FILE) + + @property + def security_critical(self) -> Optional[bool]: + return self._return_as_property(known_fields.SECURITY_CRITICAL) + + @property + def shipped(self) -> Optional[bool]: + return self._return_as_property(known_fields.SHIPPED) + + @property + def shipped_in_chromium(self) -> Optional[bool]: + return self._return_as_property(known_fields.SHIPPED_IN_CHROMIUM) + + @property + def license_android_compatible(self) -> Optional[bool]: + return self._return_as_property(known_fields.LICENSE_ANDROID_COMPATIBLE) + + @property + def cpe_prefix(self) -> Optional[str]: + """Returns a lowercase string (CPE names are case-insensitive).""" + return self._return_as_property(known_fields.CPE_PREFIX) + + @property + def description(self) -> Optional[str]: + return self._return_as_property(known_fields.DESCRIPTION) + + @property + def local_modifications(self) -> Optional[Union[Literal[False], str]]: + """Returns `False` if there's no local modifications. + Otherwise the text content extracted from the metadata. + """ + return self._return_as_property(known_fields.LOCAL_MODIFICATIONS) diff --git a/metadata/fields/custom/cpe_prefix.py b/metadata/fields/custom/cpe_prefix.py index 6481af796..3bcef029e 100644 --- a/metadata/fields/custom/cpe_prefix.py +++ b/metadata/fields/custom/cpe_prefix.py @@ -68,12 +68,15 @@ class CPEPrefixField(field_types.SingleLineTextField): def __init__(self): super().__init__(name="CPEPrefix") + def _is_valid(self, value: str) -> bool: + return (util.is_unknown(value) or is_formatted_string_cpe(value) + or is_uri_cpe(value)) + def validate(self, value: str) -> Optional[vr.ValidationResult]: """Checks the given value is either 'unknown', or conforms to either the CPE 2.3 or 2.2 format. """ - if (util.is_unknown(value) or is_formatted_string_cpe(value) - or is_uri_cpe(value)): + if self._is_valid(value): return None return vr.ValidationError( @@ -85,3 +88,13 @@ class CPEPrefixField(field_types.SingleLineTextField): "https://nvd.nist.gov/products/cpe/search.", f"Current value: '{value}'.", ]) + + def narrow_type(self, value: str) -> Optional[str]: + if not self._is_valid(value): + return None + + # CPE names are case-insensitive, we normalize to lowercase. + # See https://cpe.mitre.org/specification/. + value = value.lower() + + return value diff --git a/metadata/fields/custom/date.py b/metadata/fields/custom/date.py index 25d88996f..a5a120799 100644 --- a/metadata/fields/custom/date.py +++ b/metadata/fields/custom/date.py @@ -6,7 +6,7 @@ import datetime import os import sys -from typing import Optional +from typing import Optional, Tuple _THIS_DIR = os.path.abspath(os.path.dirname(__file__)) # The repo's root directory. @@ -59,13 +59,65 @@ _RECOGNIZED_DATE_FORMATS = ( ) -def format_matches(value: str, date_format: str): - """Returns whether the given value matches the date format.""" +def parse_with_format(value: str, + date_format: str) -> Optional[datetime.datetime]: + """Returns datetime object if `value` can be parsed with `date_format`""" try: - datetime.datetime.strptime(value, date_format) + return datetime.datetime.strptime(value, date_format) except ValueError: - return False - return True + return None + + +def to_preferred_format(dt: datetime.datetime) -> str: + return datetime.datetime.strftime(dt, _PREFERRED_PREFIX_FORMAT) + + +def parse_date(value: str) -> Optional[Tuple[str, bool]]: + """Try to parse value into a YYYY-MM-DD date. + + If successful: returns (str, int). + - The str is guaranteed to be in YYYY-MM-DD format. + - The bool indicates whether `value` is ambiguous. + For example, "2020/03/05" matches both "YYYY/MM/DD" and "YYYY/DD/MM". + """ + matches = [] + value = value.strip() + if not value: + return None + + first_part = value.split()[0] + + # Try to match preferred prefix. + if dt := parse_with_format(first_part, _PREFERRED_PREFIX_FORMAT): + matches.append(dt) + + if not matches: + # Try alternative prefix formats. + for date_format in _RECOGNIZED_PREFIX_FORMATS: + if dt := parse_with_format(first_part, date_format): + matches.append(dt) + + if not matches: + # Try matching the complete string. + for date_format in _RECOGNIZED_DATE_FORMATS: + if dt := parse_with_format(value, date_format): + matches.append(dt) + + if not matches: + # Try ISO 8601. + try: + dt = datetime.datetime.fromisoformat(value) + matches.append(dt) + except ValueError: + pass + + if not matches: + return None + + # Determine if the value is parsed without ambiguity. + is_ambiguous = len(set(map(to_preferred_format, matches))) > 1 + + return to_preferred_format(matches[0]), is_ambiguous class DateField(field_types.SingleLineTextField): @@ -81,32 +133,29 @@ class DateField(field_types.SingleLineTextField): reason=f"{self._name} is empty.", additional=["Provide date in format YYYY-MM-DD."]) - # Check if the first part (to ignore timezone info) uses the - # preferred format. - parts = value.split() - if format_matches(parts[0], _PREFERRED_PREFIX_FORMAT): + if not (parsed := parse_date(value)): + return vr.ValidationError( + reason=f"{self._name} is invalid.", + additional=["Use YYYY-MM-DD.", f"Current value is '{value}'."]) + + parsed_date, is_ambiguous = parsed + if is_ambiguous: + return vr.ValidationError( + reason=f"{self._name} is ambiguous.", + additional=["Use YYYY-MM-DD.", f"Current value is '{value}'."]) + + if not parse_with_format(value, _PREFERRED_PREFIX_FORMAT): + return vr.ValidationWarning( + reason=f"{self._name} isn't using the canonical format.", + additional=["Use YYYY-MM-DD.", f"Current value is '{value}'."]) + + return None + + def narrow_type(self, value: str) -> Optional[str]: + """Returns ISO 8601 date string, guarantees to be YYYY-MM-DD or None.""" + if not (parsed := parse_date(value)): return None - # Check if the first part (to ignore timezone info) uses a - # recognized format. - for prefix_format in _RECOGNIZED_PREFIX_FORMATS: - if format_matches(parts[0], prefix_format): - return vr.ValidationWarning( - reason=f"{self._name} is not in the preferred format.", - additional=[ - "Use YYYY-MM-DD.", f"Current value is '{value}'." - ]) - - # Check the entire value for recognized date formats. - for date_format in _RECOGNIZED_DATE_FORMATS: - if format_matches(value, date_format): - return vr.ValidationWarning( - reason=f"{self._name} is not in the preferred format.", - additional=[ - "Use YYYY-MM-DD.", f"Current value is '{value}'." - ]) - - # Return an error as the value's format was not recognized. - return vr.ValidationError( - reason=f"{self._name} is invalid.", - additional=["Use YYYY-MM-DD.", f"Current value is '{value}'."]) + # We still return a date even if the parsing result is ambiguous. An + # date that's a few month off is better than nothing at all. + return parsed[0] diff --git a/metadata/fields/custom/license.py b/metadata/fields/custom/license.py index 713ee2258..d1e3578d2 100644 --- a/metadata/fields/custom/license.py +++ b/metadata/fields/custom/license.py @@ -49,6 +49,11 @@ _PATTERN_LICENSE_ALLOWED = re.compile( _PATTERN_VERBOSE_DELIMITER = re.compile(r" and | or | / ") +# Split on the canonical delimiter, or any of the non-canonical delimiters. +_PATTERN_SPLIT_LICENSE = re.compile("{}|{}".format( + _PATTERN_VERBOSE_DELIMITER.pattern, + field_types.MetadataField.VALUE_DELIMITER)) + def process_license_value(value: str, atomic_delimiter: str) -> List[Tuple[str, bool]]: @@ -134,3 +139,11 @@ class LicenseField(field_types.SingleLineTextField): reason=f"Separate licenses using a '{self.VALUE_DELIMITER}'.") return None + + def narrow_type(self, value: str) -> Optional[List[str]]: + if not value: + # Empty License field is equivalent to "not declared". + return None + + parts = _PATTERN_SPLIT_LICENSE.split(value) + return list(filter(bool, map(lambda str: str.strip(), parts))) diff --git a/metadata/fields/custom/local_modifications.py b/metadata/fields/custom/local_modifications.py index 1b1912a60..82e859118 100644 --- a/metadata/fields/custom/local_modifications.py +++ b/metadata/fields/custom/local_modifications.py @@ -6,6 +6,7 @@ import os import re import sys +from typing import Optional, Union, Literal _THIS_DIR = os.path.abspath(os.path.dirname(__file__)) # The repo's root directory. @@ -39,13 +40,28 @@ class LocalModificationsField(field_types.FreeformTextField): def __init__(self): super().__init__(name="Local Modifications", structured=False) - def should_terminate_field(self, field_value) -> bool: - field_value = field_value.strip() + def _is_no_modification(self, value) -> bool: + for pattern in _PATTERNS_NOT_MODIFIED: + if pattern.match(value): + return True + + return False + + def should_terminate_field(self, value) -> bool: + value = value.strip() # If we can reasonably infer the field value means "No modification", # terminate this field to avoid over extraction. - for pattern in _PATTERNS_NOT_MODIFIED: - if pattern.match(field_value): - return True + if self._is_no_modification(value): + return True return False + + def narrow_type(self, value) -> Optional[Union[Literal[False], str]]: + if not value: + return False + + if self._is_no_modification(value): + return False + + return value diff --git a/metadata/fields/custom/revision.py b/metadata/fields/custom/revision.py new file mode 100644 index 000000000..4ffa6df57 --- /dev/null +++ b/metadata/fields/custom/revision.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +# Copyright 2024 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import os +import re +import sys +from typing import Optional + +_THIS_DIR = os.path.abspath(os.path.dirname(__file__)) +# The repo's root directory. +_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..", "..")) + +# Add the repo's root directory for clearer imports. +sys.path.insert(0, _ROOT_DIR) + +import metadata.fields.field_types as field_types +import metadata.fields.custom.version as version_field +import metadata.fields.util as util +import metadata.validation_result as vr + + +class RevisionField(field_types.SingleLineTextField): + """Custom field for the revision.""" + + def __init__(self): + super().__init__(name="Revision") + + def narrow_type(self, value: str) -> Optional[str]: + value = super().narrow_type(value) + if not value: + return None + + if version_field.version_is_unknown(value): + return None + + if util.is_known_invalid_value(value): + return None + + return value diff --git a/metadata/fields/custom/url.py b/metadata/fields/custom/url.py index e1cc96ed0..2dfb7319b 100644 --- a/metadata/fields/custom/url.py +++ b/metadata/fields/custom/url.py @@ -6,7 +6,9 @@ import os import re import sys -from typing import Optional +from typing import Optional, List +from urllib.parse import urlparse, urlunparse +from itertools import filterfalse _THIS_DIR = os.path.abspath(os.path.dirname(__file__)) # The repo's root directory. @@ -19,29 +21,73 @@ import metadata.fields.field_types as field_types import metadata.fields.util as util import metadata.validation_result as vr -_PATTERN_URL_ALLOWED = re.compile(r"^(https?|ftp|git):\/\/\S+$") _PATTERN_URL_CANONICAL_REPO = re.compile( r"^This is the canonical (public )?repo(sitory)?\.?$", re.IGNORECASE) +_SUPPORTED_SCHEMES = { + 'http', + 'https', + 'git', + 'ftp', +} + +# URLs can't contain whitespaces. Treat them as delimiters so we can handle cases where URL field contains one URL per line (without comma delimiter). +_PATTERN_URL_DELIMITER = re.compile("{}|{}".format( + r'\s+', field_types.MetadataField.VALUE_DELIMITER)) + + +def _split_urls(value: str) -> List[str]: + """Split url field value into individual URLs.""" + urls = _PATTERN_URL_DELIMITER.split(value) + return list(filter(lambda x: len(x) > 0, map(str.strip, urls))) + + +def _url_canonicalize(url: str) -> str: + """Return the canonicalized URL (e.g. make scheme lower case).""" + return urlunparse(urlparse(url)) + + +def _url_is_canonical(url: str) -> bool: + return url == _url_canonicalize(url) + + +def _url_is_valid(url: str) -> bool: + """Checks whether the given `url` is acceptable: + * url is can be parsed without an error. + * url uses a supported scheme / protocol. + """ + try: + u = urlparse(url) + except: + return False + + if u.scheme not in _SUPPORTED_SCHEMES: + return False + + return True class URLField(field_types.MetadataField): """Custom field for the package URL(s).""" def __init__(self): super().__init__(name="URL") + def repo_is_canonical(self, value: str): + """Returns if `raw_value` indicates this repository is the canonical repository.""" + return util.matches(_PATTERN_URL_CANONICAL_REPO, value.strip()) + def validate(self, value: str) -> Optional[vr.ValidationResult]: """Checks the given value has acceptable URL values only. Note: this field supports multiple values. """ - if util.matches(_PATTERN_URL_CANONICAL_REPO, value): + if self.repo_is_canonical(value): return None - invalid_values = [] - for url in value.split(self.VALUE_DELIMITER): - url = url.strip() - if not util.matches(_PATTERN_URL_ALLOWED, url): - invalid_values.append(url) + urls = _split_urls(value) + if not urls: + return vr.ValidationError(reason=f"{self._name} must be provided.") + + invalid_values = list(filterfalse(_url_is_valid, urls)) if invalid_values: return vr.ValidationError( @@ -53,4 +99,26 @@ class URLField(field_types.MetadataField): f"Invalid values: {util.quoted(invalid_values)}.", ]) + non_canon_values = list(filterfalse(_url_is_canonical, urls)) + if non_canon_values: + canon_values = list(map(_url_canonicalize, non_canon_values)) + return vr.ValidationWarning( + reason=f"{self._name} is contains non-canonical URLs.", + additional=[ + "URLs should be canonical and well-formed." + f"Non canonical values: {util.quoted(non_canon_values)}.", + f"Canonicalized URLs should be: {util.quoted(canon_values)}." + ]) + return None + + def narrow_type(self, value) -> Optional[List[str]]: + if not value: + return None + + if self.repo_is_canonical(value): + return None + + # Filter out invalid URLs, and canonicalize the URLs. + return list( + map(_url_canonicalize, filter(_url_is_valid, _split_urls(value)))) diff --git a/metadata/fields/custom/version.py b/metadata/fields/custom/version.py index 9d78c954a..068676946 100644 --- a/metadata/fields/custom/version.py +++ b/metadata/fields/custom/version.py @@ -19,17 +19,16 @@ import metadata.fields.field_types as field_types import metadata.fields.util as util import metadata.validation_result as vr -_PATTERN_NOT_APPLICABLE = re.compile(r"^N ?\/ ?A$", re.IGNORECASE) - -def is_unknown(value: str) -> bool: +def version_is_unknown(value: str) -> bool: """Returns whether the value denotes the version being unknown.""" - return (value == "0" or util.matches(_PATTERN_NOT_APPLICABLE, value) + return (value == "0" or util.is_not_applicable(value) or util.is_unknown(value)) class VersionField(field_types.SingleLineTextField): """Custom field for the package version.""" + def __init__(self): super().__init__(name="Version") @@ -55,3 +54,16 @@ class VersionField(field_types.SingleLineTextField): ]) return None + + def narrow_type(self, value: str) -> Optional[str]: + value = super().narrow_type(value) + if not value: + return None + + if version_is_unknown(value): + return None + + if util.is_known_invalid_value(value): + return None + + return value diff --git a/metadata/fields/field_types.py b/metadata/fields/field_types.py index cb5623152..c74af66bb 100644 --- a/metadata/fields/field_types.py +++ b/metadata/fields/field_types.py @@ -7,6 +7,7 @@ import os import re import sys from typing import Optional +from enum import Enum _THIS_DIR = os.path.abspath(os.path.dirname(__file__)) # The repo's root directory. @@ -26,7 +27,6 @@ _PATTERN_YES_OR_NO = re.compile(r"^(yes|no)$", re.IGNORECASE) # case-insensitive. e.g. "No (test only)", "Yes?" _PATTERN_STARTS_WITH_YES_OR_NO = re.compile(r"^(yes|no)", re.IGNORECASE) - class MetadataField: """Base class for all metadata fields.""" @@ -73,6 +73,15 @@ class MetadataField: """ raise NotImplementedError(f"{self._name} field validation not defined.") + def narrow_type(self, value): + """Returns a narrowly typed (e.g. bool) value for this field for + downstream consumption. + + The alternative being the downstream parses the string again. + """ + raise NotImplementedError( + f"{self._name} field value coersion not defined.") + class FreeformTextField(MetadataField): """Field where the value is freeform text.""" @@ -86,6 +95,9 @@ class FreeformTextField(MetadataField): return None + def narrow_type(self, value): + assert value is not None + return value class SingleLineTextField(FreeformTextField): """Field where the field as a whole is a single line of text.""" @@ -126,3 +138,6 @@ class YesNoField(SingleLineTextField): f"This field must be {util.YES} or {util.NO}.", f"Current value is '{value}'.", ]) + + def narrow_type(self, value) -> Optional[bool]: + return util.infer_as_boolean(super().narrow_type(value)) diff --git a/metadata/fields/known.py b/metadata/fields/known.py index 4d69a161c..6f97fccfc 100644 --- a/metadata/fields/known.py +++ b/metadata/fields/known.py @@ -21,12 +21,12 @@ import metadata.fields.custom.license_file import metadata.fields.custom.local_modifications import metadata.fields.custom.url import metadata.fields.custom.version +import metadata.fields.custom.revision import metadata.fields.field_types as field_types # Freeform text fields. NAME = field_types.SingleLineTextField("Name") SHORT_NAME = field_types.SingleLineTextField("Short Name") -REVISION = field_types.SingleLineTextField("Revision") DESCRIPTION = field_types.FreeformTextField("Description", structured=False) # Yes/no fields. @@ -43,6 +43,7 @@ LICENSE = metadata.fields.custom.license.LicenseField() LICENSE_FILE = metadata.fields.custom.license_file.LicenseFileField() URL = metadata.fields.custom.url.URLField() VERSION = metadata.fields.custom.version.VersionField() +REVISION = metadata.fields.custom.revision.RevisionField() LOCAL_MODIFICATIONS = metadata.fields.custom.local_modifications.LocalModificationsField( ) diff --git a/metadata/fields/util.py b/metadata/fields/util.py index 4c51ba7a9..9c256f8c0 100644 --- a/metadata/fields/util.py +++ b/metadata/fields/util.py @@ -26,6 +26,22 @@ _PATTERN_STARTS_WITH_YES = re.compile(r"^yes", re.IGNORECASE) # case-insensitive. _PATTERN_STARTS_WITH_NO = re.compile(r"^no", re.IGNORECASE) +# Variants of N/A (Not Applicable). +_PATTERN_NOT_APPLICABLE = re.compile(r"^(N ?\/ ?A)\.?|na\.?|not applicable\.?$", + re.IGNORECASE) + +# A collection of values that provides little information. +# Use lower-case for easier comparison. +_KNOWN_INVALID_VALUES = { + "0", + "varies", + "-", + "unknown", + "head", + "see deps", + "deps", +} + def matches(pattern: re.Pattern, value: str) -> bool: """Returns whether the value matches the pattern.""" @@ -61,3 +77,20 @@ def infer_as_boolean(value: str, default: bool = True) -> bool: return False else: return default + + +def is_known_invalid_value(value: str): + """Returns whether `value` is among the known bad values that provides + little machine readable information. + """ + if not value: + return False + + if value.lower() in _KNOWN_INVALID_VALUES: + return True + + return False + + +def is_not_applicable(value: str) -> bool: + return matches(_PATTERN_NOT_APPLICABLE, value) diff --git a/metadata/tests/fields_test.py b/metadata/tests/fields_test.py index 263a8c478..e2214bc09 100644 --- a/metadata/tests/fields_test.py +++ b/metadata/tests/fields_test.py @@ -95,17 +95,18 @@ class FieldValidationTest(unittest.TestCase): def test_date_validation(self): self._run_field_validation( field=known_fields.DATE, - valid_values=[ - "2012-03-04", "2012-03-04 UTC", "2012-03-04 UTC+10:00" - ], + valid_values=["2012-03-04"], error_values=[ "", "\n", "N/A", + "03-04-12", # Ambiguous month and day. + "04/03/2012", # Ambiguous month and day. ], warning_values=[ + "2012-03-04 UTC", "2012-03-04 UTC+10:00", "2012/03/04 UTC+10:00", "20120304", "April 3, 2012", - "3 Apr 2012", "03-04-12", "04/03/2012", + "3 Apr 2012", "30/12/2000", "20-03-2020", "Tue Apr 3 05:06:07 2012 +0800" ], ) @@ -181,14 +182,18 @@ class FieldValidationTest(unittest.TestCase): "https://www.example.com/a", "http://www.example.com/b", "ftp://www.example.com/c,git://www.example.com/d", + "https://www.example.com/a\n https://example.com/b", "This is the canonical public repository", ], + warning_values=[ + # Scheme is case-insensitive, but should be lower case. + "Https://www.example.com/g", + ], error_values=[ "", "\n", "ghttps://www.example.com/e", "https://www.example.com/ f", - "Https://www.example.com/g", "This is an unrecognized message for the URL", ], ) diff --git a/metadata/tests/type_narrowing_test.py b/metadata/tests/type_narrowing_test.py new file mode 100644 index 000000000..8f84f8668 --- /dev/null +++ b/metadata/tests/type_narrowing_test.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +# Copyright 2024 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import os +import sys +import unittest +from typing import Any, Callable + +_THIS_DIR = os.path.abspath(os.path.dirname(__file__)) +# The repo's root directory. +_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..")) + +# Add the repo's root directory for clearer imports. +sys.path.insert(0, _ROOT_DIR) + +from metadata.fields.field_types import MetadataField +import metadata.fields.known as fields +from metadata.dependency_metadata import DependencyMetadata + + +class FieldValidationTest(unittest.TestCase): + """Tests narrow_type() on fields we validate and extract structural data.""" + + def _test_on_field(self, field: MetadataField) -> Callable: + + def expect(value: str, expected_value: Any, reason: str): + output = field.narrow_type(value) + self.assertEqual( + output, expected_value, + f'Field "{field.get_name()}" should {reason}. Input value' + f' was: "{value}", but got coerced into {repr(output)}') + + return expect + + def test_name(self): + expect = self._test_on_field(fields.NAME) + expect("package name", "package name", "return as-is") + expect("", "", "not coerce empty string to `None`") + + def test_short_name(self): + expect = self._test_on_field(fields.SHORT_NAME) + expect("pkg-name", "pkg-name", "return as-is") + expect("", "", "not coerce empty string to `None`") + + def test_url(self): + expect = self._test_on_field(fields.URL) + expect("", None, "treat empty string as None") + expect("https://example.com/", ["https://example.com/"], + "return valid url") + expect("https://example.com/,\nhttps://example2.com/", + ["https://example.com/", "https://example2.com/"], + "return multiple valid urls") + expect("file://test", [], "reject unsupported scheme") + expect("file://test,\nhttps://example.com", ["https://example.com"], + "reject unsupported scheme") + expect("HTTPS://example.com", ["https://example.com"], + "canonicalize url") + expect("http", [], "reject invalid url") + expect("This is the canonical repo.", None, + "understand the this repo is canonical message") + + def test_version(self): + expect = self._test_on_field(fields.VERSION) + expect("", None, "treat empty string as None") + expect("0", None, "treat invalid value as None") + expect("varies", None, "treat invalid value as None") + expect("see deps", None, "treat invalid value as None") + expect("N/A", None, "N/A is treated as None") + expect("Not applicable.", None, "N/A is treated as None") + + def test_date(self): + expect = self._test_on_field(fields.DATE) + expect("", None, "treat empty string as None") + expect("0", None, "treat invalid value as None") + expect("varies", None, "treat invalid value as None") + expect("2024-01-02", "2024-01-02", "accepts ISO 8601 date") + expect("2024-01-02T03:04:05Z", "2024-01-02", + "accepts ISO 8601 date time") + expect("Jan 2 2024", "2024-01-02", "accepts locale format") + expect( + "02/03/2000", "2000-03-02", + "accepts ambiguous MM/DD format (better than no date info at all)") + expect("11/30/2000", "2000-11-30", "accepts unambiguous MM/DD format") + + def test_revision(self): + expect = self._test_on_field(fields.REVISION) + expect("", None, "treat empty string as None") + expect("0", None, "treat invalid value as None") + expect("varies", None, "treat invalid value as None") + expect("see deps", None, "treat invalid value as None") + expect("N/A", None, "N/A is treated as None") + expect("Not applicable.", None, "N/A is treated as None") + + def test_license(self): + expect = self._test_on_field(fields.LICENSE) + expect("", None, "treat empty string as None") + expect("LICENSE-1", ["LICENSE-1"], "return as a list") + expect("LGPL v2 and BSD", ["LGPL v2", "BSD"], "return as a list") + + def test_license_file(self): + # TODO(b/321154076): Consider excluding files that doesn't exist on + # disk if it's not too hard. + # + # Right now, we return the unparsed license file field as-is. + expect = self._test_on_field(fields.LICENSE_FILE) + expect("src/file", "src/file", "return value as-is") + + def test_security_critical(self): + expect = self._test_on_field(fields.SECURITY_CRITICAL) + expect("yes", True, "understand truthy value") + expect("Yes", True, "understand truthy value") + expect("no", False, "understand falsey value") + expect("No, because", False, + "understand falsey value, with description") + + def test_shipped(self): + expect = self._test_on_field(fields.SHIPPED) + expect("yes", True, "understand truthy value") + expect("Yes, but", True, "understand truthy value with extra comment") + expect("no", False, "understand falsey value") + expect("no, because", False, + "understand falsey value, with extra comment") + + def test_shipped_in_chromium(self): + expect = self._test_on_field(fields.SHIPPED_IN_CHROMIUM) + expect("yes", True, "understand truthy value") + expect("Yes", True, "understand truthy value") + expect("no", False, "understand falsey value") + expect("no, because", False, + "understand falsey value, with extra comment") + + def test_license_android_compatible(self): + expect = self._test_on_field(fields.LICENSE_ANDROID_COMPATIBLE) + expect("yes", True, "understand truthy value") + expect("Yes", True, "understand truthy value") + expect("no", False, "understand falsey value") + expect("no, because", False, + "understand falsey value, with extra comment") + + def test_cpe_prefix(self): + expect = self._test_on_field(fields.CPE_PREFIX) + expect("unknown", "unknown", "understand unknown") + expect("bad_cpe_format", None, "rejects invalid value") + expect("cpe:/a:d3", "cpe:/a:d3", "accept a valid cpe prefix") + expect("cpe:/a:D3", "cpe:/a:d3", "normalize to lowercase") + + def test_description(self): + expect = self._test_on_field(fields.DESCRIPTION) + expect("desc", "desc", "return value as-is") + + def test_local_modification(self): + expect = self._test_on_field(fields.LOCAL_MODIFICATIONS) + expect("none", False, "understands none") + expect("(none)", False, "understands none") + expect("not applicable", False, "understands N/A") + expect("", False, "treat empty string as False") + expect("modified X file", "modified X file", + "return value as-is if it doesn't mean no modification") + + def test_dependency_data_return_as_property(self): + dm = DependencyMetadata() + dm.add_entry("name", "package") + dm.add_entry("url", "git://git@example.com,\nbad_url://example.com") + dm.add_entry("security critical", "no") + dm.add_entry("date", "2024-01-02") + dm.add_entry("revision", "") + + self.assertEqual(dm.name, "package") + self.assertEqual(dm.url, ["git://git@example.com"]) + self.assertEqual(dm.security_critical, False) + self.assertEqual(dm.date, "2024-01-02") + self.assertEqual(dm.revision, None) + self.assertEqual(dm.version, None) + + def test_dependency_data_repo_is_canonical(self): + dm = DependencyMetadata() + dm.add_entry("name", "package") + dm.add_entry("url", "This is the canonical repo.") + + self.assertEqual(dm.url, None) + self.assertEqual(dm.is_canonical, True) + + +if __name__ == "__main__": + unittest.main()