[ssci] Added CheckChromiumDependencyMetadata in presubmit_canned_checks

This CL adds a new function `CheckChromiumDependencyMetadata` in `presubmit_canned_checks.py`. It can be used to check that files satisfy the format defined by `README.chromium.template` (https://chromium.googlesource.com/chromium/src/+/main/third_party/README.chromium.template). The code for metadata validation can be found in `//metadata`. Note that all metadata validation issues will be returned as warnings only for now, while the quality of metadata is being uplifted. Bug: b:277147404 Change-Id: Iacf1b3a11219ab752549f6dc6e882c93c0fbe780 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4812578 Commit-Queue: Anne Redulla <aredulla@google.com> Reviewed-by: Rachael Newitt <renewitt@google.com> Reviewed-by: Gavin Mak <gavinmak@google.com> Reviewed-by: Bruce Dawson <brucedawson@chromium.org>
2 years ago · b9d7c85582
parent 66d0f15a56
commit b9d7c85582
7 changed files with 300 additions and 56 deletions
--- a/metadata/discover.py
+++ b/metadata/discover.py
@ -0,0 +1,15 @@
+# Copyright 2023 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+
+# The base names that are known to be Chromium metadata files.
+_METADATA_FILES = {
+    "README.chromium",
+}
+
+
+def is_metadata_file(path: str) -> bool:
+  """Filter for metadata files."""
+  return os.path.basename(path) in _METADATA_FILES
--- a/metadata/parse.py
+++ b/metadata/parse.py
@ -31,23 +31,21 @@ _PATTERN_FIELD_DECLARATION = re.compile(
 )


-def parse_file(filepath: str) -> List[dm.DependencyMetadata]:
-  """Reads and parses the metadata in the given file.
+def parse_content(content: str) -> List[dm.DependencyMetadata]:
+  """Reads and parses the metadata from the given string.

    Args:
-        filepath: path to metadata file.
+        content: the string to parse metadata from.

    Returns:
-        each dependency's metadata described in the file.
+        all the metadata, which may be for zero or more dependencies, from the
+        given string.
  """
-  with open(filepath, "r", encoding="utf-8") as f:
-    lines = f.readlines()
-
  dependencies = []
  current_metadata = dm.DependencyMetadata()
  current_field_name = None
  current_field_value = ""
-  for line in lines:
+  for line in content.splitlines(keepends=True):
    # Check if a new dependency is being described.
    if DEPENDENCY_DIVIDER.match(line):
      if current_field_name:
--- a/metadata/tests/parse_test.py
+++ b/metadata/tests/parse_test.py
@ -14,6 +14,7 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
 # Add the repo's root directory for clearer imports.
 sys.path.insert(0, _ROOT_DIR)

+import gclient_utils
 import metadata.parse


@ -22,7 +23,8 @@ class ParseTest(unittest.TestCase):
    """Check parsing works for a single dependency's metadata."""
    filepath = os.path.join(_THIS_DIR, "data",
                            "README.chromium.test.single-valid")
-    all_metadata = metadata.parse.parse_file(filepath)
+    content = gclient_utils.FileRead(filepath)
+    all_metadata = metadata.parse.parse_content(content)

    self.assertEqual(len(all_metadata), 1)
    self.assertListEqual(
@ -49,7 +51,8 @@ class ParseTest(unittest.TestCase):
    """Check parsing works for multiple dependencies' metadata."""
    filepath = os.path.join(_THIS_DIR, "data",
                            "README.chromium.test.multi-invalid")
-    all_metadata = metadata.parse.parse_file(filepath)
+    content = gclient_utils.FileRead(filepath)
+    all_metadata = metadata.parse.parse_content(content)

    # Dependency metadata with no entries at all are ignored.
    self.assertEqual(len(all_metadata), 3)
--- a/metadata/tests/validate_test.py
+++ b/metadata/tests/validate_test.py
@ -14,25 +14,81 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
 # Add the repo's root directory for clearer imports.
 sys.path.insert(0, _ROOT_DIR)

+import gclient_utils
 import metadata.validate

+# Common paths for tests.
+_SOURCE_FILE_DIR = os.path.join(_THIS_DIR, "data")
+_VALID_METADATA_FILEPATH = os.path.join(_THIS_DIR, "data",
+                                        "README.chromium.test.multi-valid")
+_INVALID_METADATA_FILEPATH = os.path.join(_THIS_DIR, "data",
+                                          "README.chromium.test.multi-invalid")

-class ValidateTest(unittest.TestCase):
-  def test_validate_file(self):
+
+class ValidateContentTest(unittest.TestCase):
+  """Tests for the validate_content function."""
+  def test_empty(self):
+    # Validate empty content (should result in a validation error).
+    results = metadata.validate.validate_content(
+        content="",
+        source_file_dir=_SOURCE_FILE_DIR,
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(results), 1)
+    self.assertTrue(results[0].is_fatal())
+
+  def test_valid(self):
+    # Validate valid file content (no errors or warnings).
+    results = metadata.validate.validate_content(
+        content=gclient_utils.FileRead(_VALID_METADATA_FILEPATH),
+        source_file_dir=_SOURCE_FILE_DIR,
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(results), 0)
+
+  def test_invalid(self):
+    # Validate invalid file content (both errors and warnings).
+    results = metadata.validate.validate_content(
+        content=gclient_utils.FileRead(_INVALID_METADATA_FILEPATH),
+        source_file_dir=_SOURCE_FILE_DIR,
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(results), 11)
+    error_count = 0
+    warning_count = 0
+    for result in results:
+      if result.is_fatal():
+        error_count += 1
+      else:
+        warning_count += 1
+    self.assertEqual(error_count, 9)
+    self.assertEqual(warning_count, 2)
+
+
+class ValidateFileTest(unittest.TestCase):
+  """Tests for the validate_file function."""
+  def test_missing(self):
+    # Validate a file that does not exist.
+    results = metadata.validate.validate_file(
+        filepath=os.path.join(_THIS_DIR, "data", "MISSING.chromium"),
+        repo_root_dir=_THIS_DIR,
+    )
+    # There should be exactly 1 error returned.
+    self.assertEqual(len(results), 1)
+    self.assertTrue(results[0].is_fatal())
+
+  def test_valid(self):
    # Validate a valid file (no errors or warnings).
-    test_filepath = os.path.join(_THIS_DIR, "data",
-                                 "README.chromium.test.multi-valid")
    results = metadata.validate.validate_file(
-        filepath=test_filepath,
+        filepath=_VALID_METADATA_FILEPATH,
        repo_root_dir=_THIS_DIR,
    )
    self.assertEqual(len(results), 0)

+  def test_invalid(self):
    # Validate an invalid file (both errors and warnings).
-    test_filepath = os.path.join(_THIS_DIR, "data",
-                                 "README.chromium.test.multi-invalid")
    results = metadata.validate.validate_file(
-        filepath=test_filepath,
+        filepath=_INVALID_METADATA_FILEPATH,
        repo_root_dir=_THIS_DIR,
    )
    self.assertEqual(len(results), 11)
@ -46,26 +102,39 @@ class ValidateTest(unittest.TestCase):
    self.assertEqual(error_count, 9)
    self.assertEqual(warning_count, 2)

-  def test_check_file(self):
-    # Check a valid file (no errors or warnings).
-    test_filepath = os.path.join(_THIS_DIR, "data",
-                                 "README.chromium.test.multi-valid")
+
+class CheckFileTest(unittest.TestCase):
+  """Tests for the check_file function."""
+  def test_missing(self):
+    # Check a file that does not exist.
+    errors, warnings = metadata.validate.check_file(
+        filepath=os.path.join(_THIS_DIR, "data", "MISSING.chromium"),
+        repo_root_dir=_THIS_DIR,
+    )
+    # TODO(aredulla): update this test once validation errors can be returned
+    # as errors. Bug: b/285453019.
+    # self.assertEqual(len(errors), 1)
+    # self.assertEqual(len(warnings), 0)
+    self.assertEqual(len(errors), 0)
+    self.assertEqual(len(warnings), 1)
+
+  def test_valid(self):
+    # Check file with valid content (no errors or warnings).
    errors, warnings = metadata.validate.check_file(
-        filepath=test_filepath,
+        filepath=_VALID_METADATA_FILEPATH,
        repo_root_dir=_THIS_DIR,
    )
    self.assertEqual(len(errors), 0)
    self.assertEqual(len(warnings), 0)

-    # Check an invalid file (both errors and warnings).
-    test_filepath = os.path.join(_THIS_DIR, "data",
-                                 "README.chromium.test.multi-invalid")
+  def test_invalid(self):
+    # Check file with invalid content (both errors and warnings).
    errors, warnings = metadata.validate.check_file(
-        filepath=test_filepath,
+        filepath=_INVALID_METADATA_FILEPATH,
        repo_root_dir=_THIS_DIR,
    )
    # TODO(aredulla): update this test once validation errors can be returned
-    #                 as errors.
+    # as errors. Bug: b/285453019.
    # self.assertEqual(len(errors), 9)
    # self.assertEqual(len(warnings), 2)
    self.assertEqual(len(errors), 0)
--- a/metadata/validate.py
+++ b/metadata/validate.py
@ -5,7 +5,7 @@

 import os
 import sys
-from typing import List, Tuple
+from typing import Callable, List, Tuple, Union

 _THIS_DIR = os.path.abspath(os.path.dirname(__file__))
 # The repo's root directory.
@ -14,36 +14,92 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
 # Add the repo's root directory for clearer imports.
 sys.path.insert(0, _ROOT_DIR)

+import gclient_utils
 import metadata.parse
 import metadata.validation_result as vr


-def validate_file(filepath: str,
-                  repo_root_dir: str) -> List[vr.ValidationResult]:
-  """Validate the metadata file."""
-  if not os.path.exists(filepath):
-    result = vr.ValidationError(f"'{filepath}' not found.")
-    result.set_tag(tag="reason", value="file not found")
-    return [result]
+def validate_content(content: str, source_file_dir: str,
+                     repo_root_dir: str) -> List[vr.ValidationResult]:
+  """Validate the content as a metadata file.

-  # Get the directory the metadata file is in.
-  parent_dir = os.path.dirname(filepath)
+  Args:
+    content: the entire content of a file to be validated as a metadata file.
+    source_file_dir: the directory of the metadata file that the license file
+                     value is from; this is needed to construct file paths to
+                     license files.
+    repo_root_dir: the repository's root directory; this is needed to construct
+                   file paths to license files.

+  Returns: the validation results for the given content.
+  """
  results = []
-  dependencies = metadata.parse.parse_file(filepath)
-  for dependency in dependencies:
-    results.extend(
-        dependency.validate(
-            source_file_dir=parent_dir,
-            repo_root_dir=repo_root_dir,
-        ))
+  dependencies = metadata.parse.parse_content(content)
+  if not dependencies:
+    result = vr.ValidationError("No dependency metadata found.")
+    result.set_tag("reason", "no metadata")
+    return [result]

+  for dependency in dependencies:
+    dependency_results = dependency.validate(source_file_dir=source_file_dir,
+                                             repo_root_dir=repo_root_dir)
+    results.extend(dependency_results)
  return results


-def check_file(filepath: str,
-               repo_root_dir: str) -> Tuple[List[str], List[str]]:
-  """Run metadata validation on the given file, and return all validation
+def _construct_file_read_error(filepath: str, cause: str) -> vr.ValidationError:
+  """Helper function to create a validation error for a file reading issue."""
+  result = vr.ValidationError(f"Cannot read '{filepath}' - {cause}.")
+  result.set_tag(tag="reason", value="read error")
+  return result
+
+
+def validate_file(
+    filepath: str,
+    repo_root_dir: str,
+    reader: Callable[[str], Union[str, bytes]] = None,
+) -> List[vr.ValidationResult]:
+  """Validate the item located at the given filepath is a valid dependency
+  metadata file.
+
+  Args:
+    filepath: the path to validate,
+              e.g. "/chromium/src/third_party/libname/README.chromium"
+    repo_root_dir: the repository's root directory; this is needed to construct
+                   file paths to license files.
+    reader (optional): callable function/method to read the content of the file.
+
+  Returns: the validation results for the given filepath and its contents, if
+           it exists.
+  """
+  if reader is None:
+    reader = gclient_utils.FileRead
+
+  try:
+    content = reader(filepath)
+  except FileNotFoundError:
+    return [_construct_file_read_error(filepath, "file not found")]
+  except PermissionError:
+    return [_construct_file_read_error(filepath, "access denied")]
+  except Exception as e:
+    return [_construct_file_read_error(filepath, f"unexpected error: '{e}'")]
+  else:
+    if not isinstance(content, str):
+      return [_construct_file_read_error(filepath, "decoding failed")]
+
+    # Get the directory the metadata file is in.
+    source_file_dir = os.path.dirname(filepath)
+    return validate_content(content=content,
+                            source_file_dir=source_file_dir,
+                            repo_root_dir=repo_root_dir)
+
+
+def check_file(
+    filepath: str,
+    repo_root_dir: str,
+    reader: Callable[[str], Union[str, bytes]] = None,
+) -> Tuple[List[str], List[str]]:
+  """Run metadata validation on the given filepath, and return all validation
  errors and validation warnings.

  Args:
@ -51,6 +107,7 @@ def check_file(filepath: str,
              e.g. "/chromium/src/third_party/libname/README.chromium"
    repo_root_dir: the repository's root directory; this is needed to construct
                   file paths to license files.
+    reader (optional): callable function/method to read the content of the file.

  Returns:
    error_messages: the fatal validation issues present in the file;
@ -58,20 +115,20 @@ def check_file(filepath: str,
    warning_messages: the non-fatal validation issues present in the file;
                      i.e. presubmit should still pass.
  """
+  results = validate_file(filepath=filepath,
+                          repo_root_dir=repo_root_dir,
+                          reader=reader)
+
  error_messages = []
  warning_messages = []
-  for result in validate_file(filepath, repo_root_dir):
-    # Construct the message.
-    if result.get_tag("reason") == "file not found":
-      message = result.get_message(postscript="", width=60)
-    else:
-      message = result.get_message(width=60)
+  for result in results:
+    message = result.get_message(width=60)

    # TODO(aredulla): Actually distinguish between validation errors and
    # warnings. The quality of metadata is currently being uplifted, but is not
    # yet guaranteed to pass validation. So for now, all validation results will
    # be returned as warnings so CLs are not blocked by invalid metadata in
-    # presubmits yet.
+    # presubmits yet. Bug: b/285453019.
    # if result.is_fatal():
    #   error_messages.append(message)
    # else:
--- a/presubmit_canned_checks.py
+++ b/presubmit_canned_checks.py
@ -10,6 +10,9 @@ import io as _io
 import os as _os
 import zlib

+import metadata.discover
+import metadata.validate
+
 _HERE = _os.path.dirname(_os.path.abspath(__file__))

 # These filters will be disabled if callers do not explicitly supply a
@ -375,6 +378,7 @@ def CheckChangeHasNoCrAndHasOnlyOneEol(input_api, output_api,
      items=eof_files))
  return outputs

+
 def CheckGenderNeutral(input_api, output_api, source_file_filter=None):
  """Checks that there are no gendered pronouns in any of the text files to be
  submitted.
@ -398,7 +402,6 @@ def CheckGenderNeutral(input_api, output_api, source_file_filter=None):
  return []


-
 def _ReportErrorFileAndLine(filename, line_num, dummy_line):
  """Default error formatter for _FindNewViolationsOfRule."""
  return '%s:%s' % (filename, line_num)
@ -789,6 +792,41 @@ def CheckLicense(input_api, output_api, license_re_param=None,
  return results


+def CheckChromiumDependencyMetadata(input_api, output_api, file_filter=None):
+  """Check files for Chromium third party dependency metadata have sufficient
+  information, and are correctly formatted.
+
+  See the README.chromium.template at
+  https://chromium.googlesource.com/chromium/src/+/main/third_party/README.chromium.template
+  """
+  # If the file filter is unspecified, filter to known Chromium metadata files.
+  if file_filter is None:
+    file_filter = lambda f: metadata.discover.is_metadata_file(f.LocalPath())
+
+  # The repo's root directory is required to check license files.
+  repo_root_dir = input_api.change.RepositoryRoot()
+
+  outputs = []
+  for f in input_api.AffectedFiles(file_filter=file_filter):
+    if f.Action() == 'D':
+      # No need to validate a deleted file.
+      continue
+
+    errors, warnings = metadata.validate.check_file(
+        filepath=f.AbsoluteLocalPath(),
+        repo_root_dir=repo_root_dir,
+        reader=input_api.ReadFile,
+    )
+
+    for warning in warnings:
+      outputs.append(output_api.PresubmitPromptWarning(warning, [f]))
+
+    for error in errors:
+      outputs.append(output_api.PresubmitError(error, [f]))
+
+  return outputs
+
+
 ### Other checks

 def CheckDoNotSubmit(input_api, output_api):
@ -843,6 +881,7 @@ def CheckTreeIsOpen(input_api, output_api,
                                      long_text=str(e))]
  return []

+
 def GetUnitTestsInDirectory(input_api,
                            output_api,
                            directory,
--- a/tests/presubmit_canned_checks_test.py
+++ b/tests/presubmit_canned_checks_test.py
@ -328,6 +328,69 @@ class DescriptionChecksTest(unittest.TestCase):
    self.assertEqual(0, len(errors))


+class ChromiumDependencyMetadataCheckTest(unittest.TestCase):
+  def testDefaultFileFilter(self):
+    """Checks the default file filter limits the scope to Chromium dependency
+    metadata files.
+    """
+    input_api = MockInputApi()
+    input_api.change.RepositoryRoot = lambda: ''
+    input_api.files = [
+        MockFile(os.path.normpath('foo/README.md'), ['Shipped: no?']),
+        MockFile(os.path.normpath('foo/main.py'), ['Shipped: yes?']),
+    ]
+    results = presubmit_canned_checks.CheckChromiumDependencyMetadata(
+        input_api, MockOutputApi())
+    self.assertEqual(len(results), 0)
+
+  def testSkipDeletedFiles(self):
+    """Checks validation is skipped for deleted files."""
+    input_api = MockInputApi()
+    input_api.change.RepositoryRoot = lambda: ''
+    input_api.files = [
+        MockFile(os.path.normpath('foo/README.chromium'), ['No fields'],
+                 action='D'),
+    ]
+    results = presubmit_canned_checks.CheckChromiumDependencyMetadata(
+        input_api, MockOutputApi())
+    self.assertEqual(len(results), 0)
+
+  def testFeedbackForNoMetadata(self):
+    """Checks presubmit results are returned for files without any metadata."""
+    input_api = MockInputApi()
+    input_api.change.RepositoryRoot = lambda: ''
+    input_api.files = [
+        MockFile(os.path.normpath('foo/README.chromium'), ['No fields']),
+    ]
+    results = presubmit_canned_checks.CheckChromiumDependencyMetadata(
+        input_api, MockOutputApi())
+    self.assertEqual(len(results), 1)
+    self.assertTrue("No dependency metadata" in results[0].message)
+
+  def testFeedbackForInvalidMetadata(self):
+    """Checks presubmit results are returned for files with invalid metadata."""
+    input_api = MockInputApi()
+    input_api.change.RepositoryRoot = lambda: ''
+    test_file = MockFile(os.path.normpath('foo/README.chromium'),
+                         ['Shipped: yes?'])
+    input_api.files = [test_file]
+    results = presubmit_canned_checks.CheckChromiumDependencyMetadata(
+        input_api, MockOutputApi())
+
+    # There should be 10 results due to
+    # - missing 5 mandatory fields: Name, URL, Version, License, and
+    #                               Security Critical
+    # - missing 4 required fields: Date, Revision, License File, and
+    #                              License Android Compatible
+    # - Shipped should be only 'yes' or 'no'.
+    self.assertEqual(len(results), 10)
+
+    # Check each presubmit result is associated with the test file.
+    for result in results:
+      self.assertEqual(len(result.items), 1)
+      self.assertEqual(result.items[0], test_file)
+
+
 class CheckUpdateOwnersFileReferences(unittest.TestCase):
  def testShowsWarningIfDeleting(self):
    input_api = MockInputApi()