You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
depot_tools/metadata/dependency_metadata.py

602 lines
24 KiB
Python

#!/usr/bin/env python3
# Copyright 2023 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from collections import defaultdict
import os
import sys
import itertools
from typing import Dict, List, Set, Tuple, Union, Optional, Literal, Any
from urllib.parse import urlparse
_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
# The repo's root directory.
_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
# Used to identify git clonable domains.
GIT_DOMAIN_INDICATORS = ["git", "googlesource", "bitbucket", "github", "gitlab"]
# Substrings for supported package manager URLs.
PACKAGE_MANAGER_PATHS = (
"crates.io/crates/",
"npmjs.com/package/",
"developer.android.com/jetpack/androidx/releases/",
"/maven2/",
"/artifacts/repository/",
)
# Add the repo's root directory for clearer imports.
sys.path.insert(0, _ROOT_DIR)
import metadata.fields.field_types as field_types
import metadata.fields.custom.cpe_prefix as cpe_prefix_util
import metadata.fields.custom.license as license_util
import metadata.fields.custom.version as version_util
import metadata.fields.custom.mitigated as mitigated_util
import metadata.fields.known as known_fields
import metadata.fields.util as util
import metadata.validation_result as vr
from metadata.fields.custom.license_allowlist import OPEN_SOURCE_SPDX_LICENSES
class DependencyMetadata:
"""The metadata for a single dependency.
See @property declarations below to retrieve validated fields for
downstream consumption.
The property returns `None` if the provided value (e.g. in
README.chromium file) is clearly invalid.
Otherwise, it returns a suitably typed value (see comments on each
property).
To retrieve unvalidated (i.e. raw values) fields, use get_entries().
"""
# Fields that are always required.
_MANDATORY_FIELDS = {
known_fields.NAME,
known_fields.URL,
known_fields.VERSION,
known_fields.LICENSE,
known_fields.SECURITY_CRITICAL,
known_fields.SHIPPED,
}
# Aliases for fields, where:
# * key is the alias field; and
# * value is the main field to which it should be mapped.
# Note: if both the alias and main fields are specified in metadata,
# the value from the alias field will be used.
_FIELD_ALIASES = {
known_fields.SHIPPED_IN_CHROMIUM: known_fields.SHIPPED,
}
def __init__(self):
# The record of all entries added, including repeated fields.
self._entries: List[Tuple[str, str]] = []
# The current value of each field.
self._metadata: Dict[field_types.MetadataField, str] = {}
# The line numbers of each metadata fields.
self._metadata_line_numbers: Dict[field_types.MetadataField,
Set[int]] = defaultdict(lambda: set())
# The line numbers of the first and the last line (in the text file)
# of this dependency metadata.
self._first_line = float('inf')
self._last_line = -1
# The record of how many times a field entry was added.
self._occurrences: Dict[field_types.MetadataField,
int] = defaultdict(int)
def add_entry(self, field_name: str, field_value: str):
value = field_value.strip()
self._entries.append((field_name, value))
field = known_fields.get_field(field_name)
if field:
self._metadata[field] = value
self._occurrences[field] += 1
def has_entries(self) -> bool:
return len(self._entries) > 0
def get_entries(self) -> List[Tuple[str, str]]:
return list(self._entries)
def record_line(self, line_number):
"""Records `line_number` to be part of this metadata."""
self._first_line = min(self._first_line, line_number)
self._last_line = max(self._last_line, line_number)
def record_field_line_number(self, field: field_types.MetadataField,
line_number: int):
self._metadata_line_numbers[field].add(line_number)
def get_first_and_last_line_number(self) -> Tuple[int, int]:
return (self._first_line, self._last_line)
def get_field_line_numbers(self,
field: field_types.MetadataField) -> List[int]:
return sorted(self._metadata_line_numbers[field])
def all_licenses_allowlisted(self, license_field_value: str, is_open_source_project: bool) -> bool:
"""Returns whether all licenses in the field are allowlisted.
Assumes a non-empty license_field_value"""
licenses = license_util.process_license_value(
license_field_value,
atomic_delimiter=known_fields.LICENSE.VALUE_DELIMITER)
for lic, valid in licenses:
allowed = license_util.is_license_allowlisted(lic, is_open_source_project=is_open_source_project)
if not valid or not allowed:
return False
return True
def only_open_source_licenses(self, license_field_value: str) ->List[str]:
"""Returns a list of licenses that are only allowed in open source projects."""
licenses = license_util.process_license_value(
license_field_value,
atomic_delimiter=known_fields.LICENSE.VALUE_DELIMITER)
open_source_only = []
for lic, valid in licenses:
if valid and lic in OPEN_SOURCE_SPDX_LICENSES:
open_source_only.append(lic)
return open_source_only
def _assess_required_fields(self, is_open_source_project: bool = False) -> Set[field_types.MetadataField]:
"""Returns the set of required fields, based on the current
metadata.
"""
required = set(self._MANDATORY_FIELDS)
# Assume the dependency is shipped if not specified.
shipped_value = self._metadata.get(known_fields.SHIPPED)
is_shipped = (shipped_value is None
or util.infer_as_boolean(shipped_value, default=True))
if is_shipped:
# A license file is required if the dependency is shipped.
required.add(known_fields.LICENSE_FILE)
# License compatibility with Android must be set if the
# package is shipped and the license is not in the
# allowlist.
license_value = self._metadata.get(known_fields.LICENSE)
if not license_value or not self.all_licenses_allowlisted(license_value, is_open_source_project):
required.add(known_fields.LICENSE_ANDROID_COMPATIBLE)
return required
def validate(self, source_file_dir: str,
repo_root_dir: str,
is_open_source_project: bool = False) -> List[vr.ValidationResult]:
"""Validates all the metadata.
Args:
source_file_dir: the directory of the file that the metadata
is from.
repo_root_dir: the repository's root directory.
is_open_source_project: whether the project is open source.
Returns: the metadata's validation results.
"""
results = []
# Check for duplicate fields.
repeated_fields = [
field for field, count in self._occurrences.items() if count > 1
]
if repeated_fields:
repeated = ", ".join([
f"{field.get_name()} ({self._occurrences[field]})"
for field in repeated_fields
])
error = vr.ValidationError(reason="There is a repeated field.",
additional=[
f"Repeated fields: {repeated}",
])
# Merge line numbers.
lines = sorted(
set(
itertools.chain.from_iterable([
self.get_field_line_numbers(field)
for field in repeated_fields
])))
error.set_lines(lines)
results.append(error)
# Process alias fields.
sources = {}
for alias_field, main_field in self._FIELD_ALIASES.items():
if alias_field in self._metadata:
# Validate the value that was present for the main field
# before overwriting it with the alias field value.
if main_field in self._metadata:
main_value = self._metadata.get(main_field)
field_result = main_field.validate(main_value)
if field_result:
field_result.set_tag(tag="field",
value=main_field.get_name())
field_result.set_lines(
self.get_field_line_numbers(main_field))
results.append(field_result)
self._metadata[main_field] = self._metadata[alias_field]
sources[main_field] = alias_field
self._metadata.pop(alias_field)
# Validate values for all present fields.
for field, value in self._metadata.items():
source_field = sources.get(field) or field
field_result = source_field.validate(value)
if field_result:
field_result.set_tag(tag="field", value=source_field.get_name())
field_result.set_lines(
self.get_field_line_numbers(source_field))
results.append(field_result)
# Check required fields are present.
required_fields = self._assess_required_fields(is_open_source_project=is_open_source_project)
for field in required_fields:
if field not in self._metadata:
field_name = field.get_name()
error = vr.ValidationError(
reason=f"Required field '{field_name}' is missing.")
results.append(error)
# If CPEPrefix is provided without a version, the Version field must be
# present.
if self._cpe_prefix_lacks_version():
error = vr.ValidationWarning(
reason="CPEPrefix is missing a version, and no Version is "
"specified.",
additional=[
"When the 'Version' field is not provided, the 'CPEPrefix' "
"must include a version component."
])
error.set_lines(self.get_field_line_numbers(known_fields.CPE_PREFIX))
results.append(error)
# If the repository is hosted somewhere (i.e. Chromium isn't the
# canonical repositroy of the dependency), at least one of the fields
# Version, Date or Revision must be provided, unless it is canonical or internal.
if not (self.is_canonical or self.is_internal) and not (
self.version or self.date or self.revision
or self.revision_in_deps):
versioning_fields = [
known_fields.VERSION, known_fields.DATE, known_fields.REVISION
]
names = util.quoted(
[field.get_name() for field in versioning_fields])
error = vr.ValidationError(
reason="Versioning fields are insufficient.",
additional=[f"Provide at least one of [{names}]."],
)
results.append(error)
# Check existence of the license file(s) on disk.
license_file_value = self._metadata.get(known_fields.LICENSE_FILE)
if license_file_value is not None:
result = known_fields.LICENSE_FILE.validate_on_disk(
value=license_file_value,
source_file_dir=source_file_dir,
repo_root_dir=repo_root_dir,
)
if result:
result.set_tag(tag="field",
value=known_fields.LICENSE_FILE.get_name())
result.set_lines(
self.get_field_line_numbers(known_fields.LICENSE_FILE))
results.append(result)
if not is_open_source_project:
license_value = self._metadata.get(known_fields.LICENSE)
if license_value is not None:
not_allowed_licenses = self.only_open_source_licenses(license_value)
if len(not_allowed_licenses) > 0:
license_result = vr.ValidationWarning(
reason=f"License has a license not in the allowlist."
" (see https://source.chromium.org/chromium/chromium/tools/depot_tools/+/main:metadata/fields/custom/license_allowlist.py).",
additional=[
f"The following license{'s are' if len(not_allowed_licenses) > 1 else ' is'} only allowed in open source projects: "
f"{util.quoted(not_allowed_licenses)}.",
])
license_result.set_tag(tag="field", value=known_fields.LICENSE.get_name())
license_result.set_lines(
self.get_field_line_numbers(known_fields.LICENSE))
results.append(license_result)
# Match values reported in the 'Mitigated:' field with the supplementry
# fields e.g. 'CVE-2024-12345: description'.
mitigated_values = self._return_as_property(known_fields.MITIGATED)
mitigated_ids = set()
if mitigated_values is not None:
mitigated_ids = set(mitigated_values)
# Reported as their own field e.g. 'CVE-2024-12345: description'.
mitigated_entries = set(self._mitigations_from_entries().keys())
missing_descriptions = mitigated_ids - mitigated_entries
if missing_descriptions:
results.append(
vr.ValidationWarning(
reason="Missing descriptions for vulnerability IDs",
additional=[
f"Add descriptions for: {util.quoted(missing_descriptions)}"
]))
extra_descriptions = mitigated_entries - mitigated_ids
if extra_descriptions:
results.append(
vr.ValidationWarning(
reason="Found descriptions for unlisted vulnerability IDs",
additional=[
f"List these IDs in the 'Mitigated:' field: {util.quoted(extra_descriptions)}"
]))
# Begin by only warning for a small subset of cases.
# TODO(b/438384123): Expand this to all cases.
if (self.security_critical
and self.shipped
and self.vuln_scan_sufficiency == "insufficient"):
# TODO(b/448003595): Provide a pre-populated bug link for when people
# think this is incorrect.
results.append(
vr.ValidationWarning(
reason=
"Dependency metadata is insufficient for vulnerability scanning.",
additional=[
"Please provide one of the following combinations:",
"- 'CPEPrefix' with a version.",
"- A git clonable 'URL' and a 'Revision'.",
"- A git clonable 'URL' and a 'Version' matching the git tag.",
"- A package manager 'URL' and a 'Version'. ",
]))
return results
def _cpe_prefix_lacks_version(self) -> List[vr.ValidationResult]:
"""Validates that if CPEPrefix is provided without a version, the
Version field must be present."""
cpe_prefix = self._metadata.get(known_fields.CPE_PREFIX)
version = self._metadata.get(known_fields.VERSION)
cpe_provided = cpe_prefix and not util.is_unknown(cpe_prefix)
version_is_valid = version and not util.is_not_applicable(version)
cpe_has_version = cpe_prefix and cpe_prefix_util.has_version_component(
cpe_prefix)
return cpe_provided and not (version_is_valid or cpe_has_version)
def _mitigations_from_entries(self) -> Dict[str, str]:
result = {}
for key, value in self._entries:
if mitigated_util.PATTERN_VULN_ID_WITH_ANCHORS.match(key):
result[key] = value.strip()
return result
def _return_as_property(self, field: field_types.MetadataField) -> Any:
"""Helper function to create a property for DependencyMetadata.
The property accessor will validate and return sanitized field value.
"""
assert field in known_fields.ALL_FIELDS
raw_value = self._metadata.get(field, None)
if raw_value is None:
# Field is not set.
return None
return field.narrow_type(raw_value)
@property
def name(self) -> Optional[str]:
return self._return_as_property(known_fields.NAME)
@property
def mitigations(self) -> Dict[str, str]:
"""Returns mapping of vulnerability IDs to their descriptions."""
result = self._mitigations_from_entries()
mitigated_values = self._return_as_property(known_fields.MITIGATED) or []
# Add entries listed in Mitigated field but without a supplement
# mitigation description line.
for id in mitigated_values:
if id not in result:
result[id] = ""
return result
@property
def short_name(self) -> Optional[str]:
return self._return_as_property(known_fields.SHORT_NAME)
@property
def url(self) -> Optional[List[str]]:
"""
Returns a list of URLs that points to upstream repo.
The URLs are guaranteed to `urllib.parse.urlparse` without errors.
Returns None if this repository is the canonical repository of this
dependency (see is_canonical below).
"""
return self._return_as_property(known_fields.URL)
@property
def is_canonical(self) -> bool:
"""
Returns whether this repository is the canonical public repository of this dependency.
This is derived from a special value in the URL field.
"""
value = self._metadata.get(known_fields.URL, "")
return known_fields.URL.repo_is_canonical(value)
@property
def is_internal(self) -> bool:
"""
Returns whether this repository is internal to google/chromium.
This is derived from a special value in the URL field.
"""
value = self._metadata.get(known_fields.URL, "")
return known_fields.URL.repo_is_internal(value)
@property
def version(self) -> Optional[str]:
return self._return_as_property(known_fields.VERSION)
@property
def date(self) -> Optional[str]:
"""Returns in "YYYY-MM-DD" format."""
return self._return_as_property(known_fields.DATE)
@property
def revision(self) -> Optional[str]:
return self._return_as_property(known_fields.REVISION)
@property
def revision_in_deps(self) -> bool:
value = self._metadata.get(known_fields.REVISION, "")
return known_fields.REVISION.is_revision_in_deps(value)
@property
def license(self) -> Optional[List[str]]:
"""Returns a list of license names."""
return self._return_as_property(known_fields.LICENSE)
@property
def license_file(self) -> Optional[List[str]]:
# TODO(b/321154076): Consider excluding files that doesn't exist on
# disk if it's not too hard.
#
# Plumbing src_root and dependency_dir into field validator is
# required.
return self._return_as_property(known_fields.LICENSE_FILE)
@property
def security_critical(self) -> Optional[bool]:
return self._return_as_property(known_fields.SECURITY_CRITICAL)
@property
def shipped(self) -> Optional[bool]:
return self._return_as_property(known_fields.SHIPPED)
@property
def shipped_in_chromium(self) -> Optional[bool]:
return self._return_as_property(known_fields.SHIPPED_IN_CHROMIUM)
@property
def license_android_compatible(self) -> Optional[bool]:
return self._return_as_property(known_fields.LICENSE_ANDROID_COMPATIBLE)
@property
def cpe_prefix(self) -> Optional[str]:
"""Returns a lowercase string (CPE names are case-insensitive)."""
return self._return_as_property(known_fields.CPE_PREFIX)
@property
def description(self) -> Optional[str]:
return self._return_as_property(known_fields.DESCRIPTION)
@property
def local_modifications(self) -> Optional[Union[Literal[False], str]]:
"""Returns `False` if there's no local modifications.
Otherwise the text content extracted from the metadata.
"""
return self._return_as_property(known_fields.LOCAL_MODIFICATIONS)
@property
def update_mechanism(
self) -> Optional[Tuple[str, Optional[str], Optional[str]]]:
"""
Returns the parsed Update Mechanism value.
The format is `Primary[.Secondary] [(bug_link)]. This function returns
(Primary, Secondary, bug_link) if the field is valid, otherwise (None, None, None).
"""
return self._return_as_property(known_fields.UPDATE_MECHANISM)
@property
def url_is_git_clonable(self) -> bool:
"""
Checks if any of the provided URLs appear to be a clonable Git repository.
This is determined by checking for:
- The 'git://' protocol.
- A path ending in '.git'.
- subdomain matching. See GIT_DOMAIN_INDICATORS for the full list.
"""
for u in self.url:
if not u:
continue
parsed = urlparse(u)
if parsed.scheme == "git" or parsed.path.endswith(".git"):
return True
if parsed.netloc:
domain_parts = parsed.netloc.split(".")
if any(gi in domain_parts for gi in GIT_DOMAIN_INDICATORS):
return True
return False
@property
def url_is_package_manager(self) -> bool:
"""
Checks if any URL contains a known package manager path substring. See PACKAGE_MANAGER_PATHS for the supported list.
"""
for u in self.url:
if not u:
continue
for p in PACKAGE_MANAGER_PATHS:
if p in u and u.split(p)[-1]:
return True
return False
@property
def vuln_scan_sufficiency(self) -> str:
"""Determines if the dependency metadata is sufficient for vulnerability scanning.
Returns:
A string indicating the sufficiency status:
- 'sufficient:CPE' if a CPE prefix is provided and a version is included in the README.
- 'sufficient:URL and Revision' if URL is a git url and a Revision is provided.
- 'sufficient:URL and Revision[DEPS]' as above, but 'Revision:DEPS'.
- 'sufficient:Git URL and Version' if a git clonable URL and a Version are provided.
- 'sufficient:Package Manager URL and Version' if a package manager URL and a Version are provided.
- 'ignore:Canonical' if the dependency is the canonical repository.
- 'ignore:Internal' if the dependency is internal.
- 'ignore:Static' if the dependency's update mechanism is static.
- 'ignore:GoogleManaged' if the dependency's update mechanism ends in .GoogleManaged.
- 'insufficient' otherwise.
"""
if self.cpe_prefix and not self._cpe_prefix_lacks_version():
return "sufficient:CPE"
if self.url:
if self.revision and self.url_is_git_clonable:
return "sufficient:URL and Revision"
if self.revision_in_deps:
return "sufficient:URL and Revision[DEPS]"
if self.version:
if self.url_is_git_clonable:
return "sufficient:Git URL and Version"
if self.url_is_package_manager:
return "sufficient:Package Manager URL and Version"
if self.is_canonical:
return "ignore:Canonical"
if self.is_internal:
return "ignore:Internal"
if self.update_mechanism and self.update_mechanism[0]:
if self.update_mechanism[0].lower() == "static":
return "ignore:Static"
if (self.update_mechanism[1]
and self.update_mechanism[1].lower() == "googlemanaged"):
return "ignore:GoogleManaged"
return "insufficient"