[ssci] Added CheckChromiumDependencyMetadata in presubmit_canned_checks

This CL adds a new function `CheckChromiumDependencyMetadata` in `presubmit_canned_checks.py`. It can be used to check that files satisfy the format defined by `README.chromium.template` (https://chromium.googlesource.com/chromium/src/+/main/third_party/README.chromium.template). The code for metadata validation can be found in `//metadata`. Note that all metadata validation issues will be returned as warnings only for now, while the quality of metadata is being uplifted. Bug: b:277147404 Change-Id: Iacf1b3a11219ab752549f6dc6e882c93c0fbe780 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4812578 Commit-Queue: Anne Redulla <aredulla@google.com> Reviewed-by: Rachael Newitt <renewitt@google.com> Reviewed-by: Gavin Mak <gavinmak@google.com> Reviewed-by: Bruce Dawson <brucedawson@chromium.org>
2 years ago · b9d7c85582
parent 66d0f15a56
commit b9d7c85582
7 changed files with 300 additions and 56 deletions
--- a/metadata/discover.py
+++ b/metadata/discover.py
@ -0,0 +1,15 @@
 # Copyright 2023 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 import os
 # The base names that are known to be Chromium metadata files.
 _METADATA_FILES = {
    "README.chromium",
 }
 def is_metadata_file(path: str) -> bool:
  """Filter for metadata files."""
  return os.path.basename(path) in _METADATA_FILES
--- a/metadata/parse.py
+++ b/metadata/parse.py
@ -31,23 +31,21 @@ _PATTERN_FIELD_DECLARATION = re.compile(
 )
-def parse_file(filepath: str) -> List[dm.DependencyMetadata]:
+def parse_content(content: str) -> List[dm.DependencyMetadata]:
-  """Reads and parses the metadata in the given file.
+  """Reads and parses the metadata from the given string.
    Args:
-        filepath: path to metadata file.
+        content: the string to parse metadata from.
    Returns:
-        each dependency's metadata described in the file.
+        all the metadata, which may be for zero or more dependencies, from the
        given string.
  """
  with open(filepath, "r", encoding="utf-8") as f:
    lines = f.readlines()
  dependencies = []
  current_metadata = dm.DependencyMetadata()
  current_field_name = None
  current_field_value = ""
-  for line in lines:
+  for line in content.splitlines(keepends=True):
    # Check if a new dependency is being described.
    if DEPENDENCY_DIVIDER.match(line):
      if current_field_name:
--- a/metadata/tests/parse_test.py
+++ b/metadata/tests/parse_test.py
@ -14,6 +14,7 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
 # Add the repo's root directory for clearer imports.
 sys.path.insert(0, _ROOT_DIR)
 import gclient_utils
 import metadata.parse
@ -22,7 +23,8 @@ class ParseTest(unittest.TestCase):
    """Check parsing works for a single dependency's metadata."""
    filepath = os.path.join(_THIS_DIR, "data",
                            "README.chromium.test.single-valid")
-    all_metadata = metadata.parse.parse_file(filepath)
+    content = gclient_utils.FileRead(filepath)
    all_metadata = metadata.parse.parse_content(content)
    self.assertEqual(len(all_metadata), 1)
    self.assertListEqual(
@ -49,7 +51,8 @@ class ParseTest(unittest.TestCase):
    """Check parsing works for multiple dependencies' metadata."""
    filepath = os.path.join(_THIS_DIR, "data",
                            "README.chromium.test.multi-invalid")
-    all_metadata = metadata.parse.parse_file(filepath)
+    content = gclient_utils.FileRead(filepath)
    all_metadata = metadata.parse.parse_content(content)
    # Dependency metadata with no entries at all are ignored.
    self.assertEqual(len(all_metadata), 3)
--- a/metadata/tests/validate_test.py
+++ b/metadata/tests/validate_test.py
@ -14,25 +14,81 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
 # Add the repo's root directory for clearer imports.
 sys.path.insert(0, _ROOT_DIR)
 import gclient_utils
 import metadata.validate
 # Common paths for tests.
 _SOURCE_FILE_DIR = os.path.join(_THIS_DIR, "data")
 _VALID_METADATA_FILEPATH = os.path.join(_THIS_DIR, "data",
                                        "README.chromium.test.multi-valid")
 _INVALID_METADATA_FILEPATH = os.path.join(_THIS_DIR, "data",
                                          "README.chromium.test.multi-invalid")
 class ValidateContentTest(unittest.TestCase):
  """Tests for the validate_content function."""
  def test_empty(self):
    # Validate empty content (should result in a validation error).
    results = metadata.validate.validate_content(
        content="",
        source_file_dir=_SOURCE_FILE_DIR,
        repo_root_dir=_THIS_DIR,
    )
    self.assertEqual(len(results), 1)
    self.assertTrue(results[0].is_fatal())
  def test_valid(self):
    # Validate valid file content (no errors or warnings).
    results = metadata.validate.validate_content(
        content=gclient_utils.FileRead(_VALID_METADATA_FILEPATH),
        source_file_dir=_SOURCE_FILE_DIR,
        repo_root_dir=_THIS_DIR,
    )
    self.assertEqual(len(results), 0)
-class ValidateTest(unittest.TestCase):
+  def test_invalid(self):
-  def test_validate_file(self):
+    # Validate invalid file content (both errors and warnings).
    results = metadata.validate.validate_content(
        content=gclient_utils.FileRead(_INVALID_METADATA_FILEPATH),
        source_file_dir=_SOURCE_FILE_DIR,
        repo_root_dir=_THIS_DIR,
    )
    self.assertEqual(len(results), 11)
    error_count = 0
    warning_count = 0
    for result in results:
      if result.is_fatal():
        error_count += 1
      else:
        warning_count += 1
    self.assertEqual(error_count, 9)
    self.assertEqual(warning_count, 2)
 class ValidateFileTest(unittest.TestCase):
  """Tests for the validate_file function."""
  def test_missing(self):
    # Validate a file that does not exist.
    results = metadata.validate.validate_file(
        filepath=os.path.join(_THIS_DIR, "data", "MISSING.chromium"),
        repo_root_dir=_THIS_DIR,
    )
    # There should be exactly 1 error returned.
    self.assertEqual(len(results), 1)
    self.assertTrue(results[0].is_fatal())
  def test_valid(self):
    # Validate a valid file (no errors or warnings).
    test_filepath = os.path.join(_THIS_DIR, "data",
                                 "README.chromium.test.multi-valid")
    results = metadata.validate.validate_file(
-        filepath=test_filepath,
+        filepath=_VALID_METADATA_FILEPATH,
        repo_root_dir=_THIS_DIR,
    )
    self.assertEqual(len(results), 0)
  def test_invalid(self):
    # Validate an invalid file (both errors and warnings).
    test_filepath = os.path.join(_THIS_DIR, "data",
                                 "README.chromium.test.multi-invalid")
    results = metadata.validate.validate_file(
-        filepath=test_filepath,
+        filepath=_INVALID_METADATA_FILEPATH,
        repo_root_dir=_THIS_DIR,
    )
    self.assertEqual(len(results), 11)
@ -46,26 +102,39 @@ class ValidateTest(unittest.TestCase):
    self.assertEqual(error_count, 9)
    self.assertEqual(warning_count, 2)
-  def test_check_file(self):
+
-    # Check a valid file (no errors or warnings).
+class CheckFileTest(unittest.TestCase):
-    test_filepath = os.path.join(_THIS_DIR, "data",
+  """Tests for the check_file function."""
-                                 "README.chromium.test.multi-valid")
+  def test_missing(self):
    # Check a file that does not exist.
    errors, warnings = metadata.validate.check_file(
-        filepath=test_filepath,
+        filepath=os.path.join(_THIS_DIR, "data", "MISSING.chromium"),
        repo_root_dir=_THIS_DIR,
    )
    # TODO(aredulla): update this test once validation errors can be returned
    # as errors. Bug: b/285453019.
    # self.assertEqual(len(errors), 1)
    # self.assertEqual(len(warnings), 0)
    self.assertEqual(len(errors), 0)
    self.assertEqual(len(warnings), 1)
  def test_valid(self):
    # Check file with valid content (no errors or warnings).
    errors, warnings = metadata.validate.check_file(
        filepath=_VALID_METADATA_FILEPATH,
        repo_root_dir=_THIS_DIR,
    )
    self.assertEqual(len(errors), 0)
    self.assertEqual(len(warnings), 0)
-    # Check an invalid file (both errors and warnings).
+  def test_invalid(self):
-    test_filepath = os.path.join(_THIS_DIR, "data",
+    # Check file with invalid content (both errors and warnings).
                                 "README.chromium.test.multi-invalid")
    errors, warnings = metadata.validate.check_file(
-        filepath=test_filepath,
+        filepath=_INVALID_METADATA_FILEPATH,
        repo_root_dir=_THIS_DIR,
    )
    # TODO(aredulla): update this test once validation errors can be returned
-    #                 as errors.
+    # as errors. Bug: b/285453019.
    # self.assertEqual(len(errors), 9)
    # self.assertEqual(len(warnings), 2)
    self.assertEqual(len(errors), 0)
--- a/metadata/validate.py
+++ b/metadata/validate.py
@ -5,7 +5,7 @@
 import os
 import sys
-from typing import List, Tuple
+from typing import Callable, List, Tuple, Union
 _THIS_DIR = os.path.abspath(os.path.dirname(__file__))
 # The repo's root directory.
@ -14,36 +14,92 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
 # Add the repo's root directory for clearer imports.
 sys.path.insert(0, _ROOT_DIR)
 import gclient_utils
 import metadata.parse
 import metadata.validation_result as vr
-def validate_file(filepath: str,
+def validate_content(content: str, source_file_dir: str,
                     repo_root_dir: str) -> List[vr.ValidationResult]:
-  """Validate the metadata file."""
+  """Validate the content as a metadata file.
  if not os.path.exists(filepath):
    result = vr.ValidationError(f"'{filepath}' not found.")
    result.set_tag(tag="reason", value="file not found")
    return [result]
-  # Get the directory the metadata file is in.
+  Args:
-  parent_dir = os.path.dirname(filepath)
+    content: the entire content of a file to be validated as a metadata file.
    source_file_dir: the directory of the metadata file that the license file
                     value is from; this is needed to construct file paths to
                     license files.
    repo_root_dir: the repository's root directory; this is needed to construct
                   file paths to license files.
  Returns: the validation results for the given content.
  """
  results = []
-  dependencies = metadata.parse.parse_file(filepath)
+  dependencies = metadata.parse.parse_content(content)
-  for dependency in dependencies:
+  if not dependencies:
-    results.extend(
+    result = vr.ValidationError("No dependency metadata found.")
-        dependency.validate(
+    result.set_tag("reason", "no metadata")
-            source_file_dir=parent_dir,
+    return [result]
            repo_root_dir=repo_root_dir,
        ))
  for dependency in dependencies:
    dependency_results = dependency.validate(source_file_dir=source_file_dir,
                                             repo_root_dir=repo_root_dir)
    results.extend(dependency_results)
  return results
-def check_file(filepath: str,
+def _construct_file_read_error(filepath: str, cause: str) -> vr.ValidationError:
-               repo_root_dir: str) -> Tuple[List[str], List[str]]:
+  """Helper function to create a validation error for a file reading issue."""
-  """Run metadata validation on the given file, and return all validation
+  result = vr.ValidationError(f"Cannot read '{filepath}' - {cause}.")
  result.set_tag(tag="reason", value="read error")
  return result
 def validate_file(
    filepath: str,
    repo_root_dir: str,
    reader: Callable[[str], Union[str, bytes]] = None,
 ) -> List[vr.ValidationResult]:
  """Validate the item located at the given filepath is a valid dependency
  metadata file.
  Args:
    filepath: the path to validate,
              e.g. "/chromium/src/third_party/libname/README.chromium"
    repo_root_dir: the repository's root directory; this is needed to construct
                   file paths to license files.
    reader (optional): callable function/method to read the content of the file.
  Returns: the validation results for the given filepath and its contents, if
           it exists.
  """
  if reader is None:
    reader = gclient_utils.FileRead
  try:
    content = reader(filepath)
  except FileNotFoundError:
    return [_construct_file_read_error(filepath, "file not found")]
  except PermissionError:
    return [_construct_file_read_error(filepath, "access denied")]
  except Exception as e:
    return [_construct_file_read_error(filepath, f"unexpected error: '{e}'")]
  else:
    if not isinstance(content, str):
      return [_construct_file_read_error(filepath, "decoding failed")]
    # Get the directory the metadata file is in.
    source_file_dir = os.path.dirname(filepath)
    return validate_content(content=content,
                            source_file_dir=source_file_dir,
                            repo_root_dir=repo_root_dir)
 def check_file(
    filepath: str,
    repo_root_dir: str,
    reader: Callable[[str], Union[str, bytes]] = None,
 ) -> Tuple[List[str], List[str]]:
  """Run metadata validation on the given filepath, and return all validation
  errors and validation warnings.
  Args:
@ -51,6 +107,7 @@ def check_file(filepath: str,
              e.g. "/chromium/src/third_party/libname/README.chromium"
    repo_root_dir: the repository's root directory; this is needed to construct
                   file paths to license files.
    reader (optional): callable function/method to read the content of the file.
  Returns:
    error_messages: the fatal validation issues present in the file;
@ -58,20 +115,20 @@ def check_file(filepath: str,
    warning_messages: the non-fatal validation issues present in the file;
                      i.e. presubmit should still pass.
  """
  results = validate_file(filepath=filepath,
                          repo_root_dir=repo_root_dir,
                          reader=reader)
  error_messages = []
  warning_messages = []
-  for result in validate_file(filepath, repo_root_dir):
+  for result in results:
    # Construct the message.
    if result.get_tag("reason") == "file not found":
      message = result.get_message(postscript="", width=60)
    else:
    message = result.get_message(width=60)
    # TODO(aredulla): Actually distinguish between validation errors and
    # warnings. The quality of metadata is currently being uplifted, but is not
    # yet guaranteed to pass validation. So for now, all validation results will
    # be returned as warnings so CLs are not blocked by invalid metadata in
-    # presubmits yet.
+    # presubmits yet. Bug: b/285453019.
    # if result.is_fatal():
    #   error_messages.append(message)
    # else:
--- a/presubmit_canned_checks.py
+++ b/presubmit_canned_checks.py
@ -10,6 +10,9 @@ import io as _io
 import os as _os
 import zlib
 import metadata.discover
 import metadata.validate
 _HERE = _os.path.dirname(_os.path.abspath(__file__))
 # These filters will be disabled if callers do not explicitly supply a
@ -375,6 +378,7 @@ def CheckChangeHasNoCrAndHasOnlyOneEol(input_api, output_api,
      items=eof_files))
  return outputs
 def CheckGenderNeutral(input_api, output_api, source_file_filter=None):
  """Checks that there are no gendered pronouns in any of the text files to be
  submitted.
@ -398,7 +402,6 @@ def CheckGenderNeutral(input_api, output_api, source_file_filter=None):
  return []
 def _ReportErrorFileAndLine(filename, line_num, dummy_line):
  """Default error formatter for _FindNewViolationsOfRule."""
  return '%s:%s' % (filename, line_num)
@ -789,6 +792,41 @@ def CheckLicense(input_api, output_api, license_re_param=None,
  return results
 def CheckChromiumDependencyMetadata(input_api, output_api, file_filter=None):
  """Check files for Chromium third party dependency metadata have sufficient
  information, and are correctly formatted.
  See the README.chromium.template at
  https://chromium.googlesource.com/chromium/src/+/main/third_party/README.chromium.template
  """
  # If the file filter is unspecified, filter to known Chromium metadata files.
  if file_filter is None:
    file_filter = lambda f: metadata.discover.is_metadata_file(f.LocalPath())
  # The repo's root directory is required to check license files.
  repo_root_dir = input_api.change.RepositoryRoot()
  outputs = []
  for f in input_api.AffectedFiles(file_filter=file_filter):
    if f.Action() == 'D':
      # No need to validate a deleted file.
      continue
    errors, warnings = metadata.validate.check_file(
        filepath=f.AbsoluteLocalPath(),
        repo_root_dir=repo_root_dir,
        reader=input_api.ReadFile,
    )
    for warning in warnings:
      outputs.append(output_api.PresubmitPromptWarning(warning, [f]))
    for error in errors:
      outputs.append(output_api.PresubmitError(error, [f]))
  return outputs
 ### Other checks
 def CheckDoNotSubmit(input_api, output_api):
@ -843,6 +881,7 @@ def CheckTreeIsOpen(input_api, output_api,
                                      long_text=str(e))]
  return []
 def GetUnitTestsInDirectory(input_api,
                            output_api,
                            directory,
--- a/tests/presubmit_canned_checks_test.py
+++ b/tests/presubmit_canned_checks_test.py
@ -328,6 +328,69 @@ class DescriptionChecksTest(unittest.TestCase):
    self.assertEqual(0, len(errors))
 class ChromiumDependencyMetadataCheckTest(unittest.TestCase):
  def testDefaultFileFilter(self):
    """Checks the default file filter limits the scope to Chromium dependency
    metadata files.
    """
    input_api = MockInputApi()
    input_api.change.RepositoryRoot = lambda: ''
    input_api.files = [
        MockFile(os.path.normpath('foo/README.md'), ['Shipped: no?']),
        MockFile(os.path.normpath('foo/main.py'), ['Shipped: yes?']),
    ]
    results = presubmit_canned_checks.CheckChromiumDependencyMetadata(
        input_api, MockOutputApi())
    self.assertEqual(len(results), 0)
  def testSkipDeletedFiles(self):
    """Checks validation is skipped for deleted files."""
    input_api = MockInputApi()
    input_api.change.RepositoryRoot = lambda: ''
    input_api.files = [
        MockFile(os.path.normpath('foo/README.chromium'), ['No fields'],
                 action='D'),
    ]
    results = presubmit_canned_checks.CheckChromiumDependencyMetadata(
        input_api, MockOutputApi())
    self.assertEqual(len(results), 0)
  def testFeedbackForNoMetadata(self):
    """Checks presubmit results are returned for files without any metadata."""
    input_api = MockInputApi()
    input_api.change.RepositoryRoot = lambda: ''
    input_api.files = [
        MockFile(os.path.normpath('foo/README.chromium'), ['No fields']),
    ]
    results = presubmit_canned_checks.CheckChromiumDependencyMetadata(
        input_api, MockOutputApi())
    self.assertEqual(len(results), 1)
    self.assertTrue("No dependency metadata" in results[0].message)
  def testFeedbackForInvalidMetadata(self):
    """Checks presubmit results are returned for files with invalid metadata."""
    input_api = MockInputApi()
    input_api.change.RepositoryRoot = lambda: ''
    test_file = MockFile(os.path.normpath('foo/README.chromium'),
                         ['Shipped: yes?'])
    input_api.files = [test_file]
    results = presubmit_canned_checks.CheckChromiumDependencyMetadata(
        input_api, MockOutputApi())
    # There should be 10 results due to
    # - missing 5 mandatory fields: Name, URL, Version, License, and
    #                               Security Critical
    # - missing 4 required fields: Date, Revision, License File, and
    #                              License Android Compatible
    # - Shipped should be only 'yes' or 'no'.
    self.assertEqual(len(results), 10)
    # Check each presubmit result is associated with the test file.
    for result in results:
      self.assertEqual(len(result.items), 1)
      self.assertEqual(result.items[0], test_file)
 class CheckUpdateOwnersFileReferences(unittest.TestCase):
  def testShowsWarningIfDeleting(self):
    input_api = MockInputApi()