From b9d7c8558207b5cbe5bc7fa41a0f46832cc35d6a Mon Sep 17 00:00:00 2001
From: Anne Redulla <aredulla@google.com>
Date: Tue, 29 Aug 2023 23:54:27 +0000
Subject: [PATCH] [ssci] Added CheckChromiumDependencyMetadata in
 presubmit_canned_checks

This CL adds a new function `CheckChromiumDependencyMetadata` in
`presubmit_canned_checks.py`. It can be used to check that files satisfy
the format defined by `README.chromium.template`
(https://chromium.googlesource.com/chromium/src/+/main/third_party/README.chromium.template).

The code for metadata validation can be found in `//metadata`. Note that
all metadata validation issues will be returned as warnings only for
now, while the quality of metadata is being uplifted.


Bug: b:277147404
Change-Id: Iacf1b3a11219ab752549f6dc6e882c93c0fbe780
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4812578
Commit-Queue: Anne Redulla <aredulla@google.com>
Reviewed-by: Rachael Newitt <renewitt@google.com>
Reviewed-by: Gavin Mak <gavinmak@google.com>
Reviewed-by: Bruce Dawson <brucedawson@chromium.org>
---
 metadata/discover.py                  |  15 ++++
 metadata/parse.py                     |  14 ++--
 metadata/tests/parse_test.py          |   7 +-
 metadata/tests/validate_test.py       | 105 +++++++++++++++++++-----
 metadata/validate.py                  | 111 +++++++++++++++++++-------
 presubmit_canned_checks.py            |  41 +++++++++-
 tests/presubmit_canned_checks_test.py |  63 +++++++++++++++
 7 files changed, 300 insertions(+), 56 deletions(-)
 create mode 100644 metadata/discover.py

diff --git a/metadata/discover.py b/metadata/discover.py
new file mode 100644
index 000000000..f9915da86
--- /dev/null
+++ b/metadata/discover.py
@@ -0,0 +1,15 @@
+# Copyright 2023 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+
+# The base names that are known to be Chromium metadata files.
+_METADATA_FILES = {
+    "README.chromium",
+}
+
+
+def is_metadata_file(path: str) -> bool:
+  """Filter for metadata files."""
+  return os.path.basename(path) in _METADATA_FILES
diff --git a/metadata/parse.py b/metadata/parse.py
index 2d673501f..7399b55e9 100644
--- a/metadata/parse.py
+++ b/metadata/parse.py
@@ -31,23 +31,21 @@ _PATTERN_FIELD_DECLARATION = re.compile(
 )
 
 
-def parse_file(filepath: str) -> List[dm.DependencyMetadata]:
-  """Reads and parses the metadata in the given file.
+def parse_content(content: str) -> List[dm.DependencyMetadata]:
+  """Reads and parses the metadata from the given string.
 
     Args:
-        filepath: path to metadata file.
+        content: the string to parse metadata from.
 
     Returns:
-        each dependency's metadata described in the file.
+        all the metadata, which may be for zero or more dependencies, from the
+        given string.
   """
-  with open(filepath, "r", encoding="utf-8") as f:
-    lines = f.readlines()
-
   dependencies = []
   current_metadata = dm.DependencyMetadata()
   current_field_name = None
   current_field_value = ""
-  for line in lines:
+  for line in content.splitlines(keepends=True):
     # Check if a new dependency is being described.
     if DEPENDENCY_DIVIDER.match(line):
       if current_field_name:
diff --git a/metadata/tests/parse_test.py b/metadata/tests/parse_test.py
index 23819e154..792321532 100644
--- a/metadata/tests/parse_test.py
+++ b/metadata/tests/parse_test.py
@@ -14,6 +14,7 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
 # Add the repo's root directory for clearer imports.
 sys.path.insert(0, _ROOT_DIR)
 
+import gclient_utils
 import metadata.parse
 
 
@@ -22,7 +23,8 @@ class ParseTest(unittest.TestCase):
     """Check parsing works for a single dependency's metadata."""
     filepath = os.path.join(_THIS_DIR, "data",
                             "README.chromium.test.single-valid")
-    all_metadata = metadata.parse.parse_file(filepath)
+    content = gclient_utils.FileRead(filepath)
+    all_metadata = metadata.parse.parse_content(content)
 
     self.assertEqual(len(all_metadata), 1)
     self.assertListEqual(
@@ -49,7 +51,8 @@ class ParseTest(unittest.TestCase):
     """Check parsing works for multiple dependencies' metadata."""
     filepath = os.path.join(_THIS_DIR, "data",
                             "README.chromium.test.multi-invalid")
-    all_metadata = metadata.parse.parse_file(filepath)
+    content = gclient_utils.FileRead(filepath)
+    all_metadata = metadata.parse.parse_content(content)
 
     # Dependency metadata with no entries at all are ignored.
     self.assertEqual(len(all_metadata), 3)
diff --git a/metadata/tests/validate_test.py b/metadata/tests/validate_test.py
index 33a6978a5..656e9ca69 100644
--- a/metadata/tests/validate_test.py
+++ b/metadata/tests/validate_test.py
@@ -14,25 +14,81 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
 # Add the repo's root directory for clearer imports.
 sys.path.insert(0, _ROOT_DIR)
 
+import gclient_utils
 import metadata.validate
 
+# Common paths for tests.
+_SOURCE_FILE_DIR = os.path.join(_THIS_DIR, "data")
+_VALID_METADATA_FILEPATH = os.path.join(_THIS_DIR, "data",
+                                        "README.chromium.test.multi-valid")
+_INVALID_METADATA_FILEPATH = os.path.join(_THIS_DIR, "data",
+                                          "README.chromium.test.multi-invalid")
 
-class ValidateTest(unittest.TestCase):
-  def test_validate_file(self):
+
+class ValidateContentTest(unittest.TestCase):
+  """Tests for the validate_content function."""
+  def test_empty(self):
+    # Validate empty content (should result in a validation error).
+    results = metadata.validate.validate_content(
+        content="",
+        source_file_dir=_SOURCE_FILE_DIR,
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(results), 1)
+    self.assertTrue(results[0].is_fatal())
+
+  def test_valid(self):
+    # Validate valid file content (no errors or warnings).
+    results = metadata.validate.validate_content(
+        content=gclient_utils.FileRead(_VALID_METADATA_FILEPATH),
+        source_file_dir=_SOURCE_FILE_DIR,
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(results), 0)
+
+  def test_invalid(self):
+    # Validate invalid file content (both errors and warnings).
+    results = metadata.validate.validate_content(
+        content=gclient_utils.FileRead(_INVALID_METADATA_FILEPATH),
+        source_file_dir=_SOURCE_FILE_DIR,
+        repo_root_dir=_THIS_DIR,
+    )
+    self.assertEqual(len(results), 11)
+    error_count = 0
+    warning_count = 0
+    for result in results:
+      if result.is_fatal():
+        error_count += 1
+      else:
+        warning_count += 1
+    self.assertEqual(error_count, 9)
+    self.assertEqual(warning_count, 2)
+
+
+class ValidateFileTest(unittest.TestCase):
+  """Tests for the validate_file function."""
+  def test_missing(self):
+    # Validate a file that does not exist.
+    results = metadata.validate.validate_file(
+        filepath=os.path.join(_THIS_DIR, "data", "MISSING.chromium"),
+        repo_root_dir=_THIS_DIR,
+    )
+    # There should be exactly 1 error returned.
+    self.assertEqual(len(results), 1)
+    self.assertTrue(results[0].is_fatal())
+
+  def test_valid(self):
     # Validate a valid file (no errors or warnings).
-    test_filepath = os.path.join(_THIS_DIR, "data",
-                                 "README.chromium.test.multi-valid")
     results = metadata.validate.validate_file(
-        filepath=test_filepath,
+        filepath=_VALID_METADATA_FILEPATH,
         repo_root_dir=_THIS_DIR,
     )
     self.assertEqual(len(results), 0)
 
+  def test_invalid(self):
     # Validate an invalid file (both errors and warnings).
-    test_filepath = os.path.join(_THIS_DIR, "data",
-                                 "README.chromium.test.multi-invalid")
     results = metadata.validate.validate_file(
-        filepath=test_filepath,
+        filepath=_INVALID_METADATA_FILEPATH,
         repo_root_dir=_THIS_DIR,
     )
     self.assertEqual(len(results), 11)
@@ -46,26 +102,39 @@ class ValidateTest(unittest.TestCase):
     self.assertEqual(error_count, 9)
     self.assertEqual(warning_count, 2)
 
-  def test_check_file(self):
-    # Check a valid file (no errors or warnings).
-    test_filepath = os.path.join(_THIS_DIR, "data",
-                                 "README.chromium.test.multi-valid")
+
+class CheckFileTest(unittest.TestCase):
+  """Tests for the check_file function."""
+  def test_missing(self):
+    # Check a file that does not exist.
+    errors, warnings = metadata.validate.check_file(
+        filepath=os.path.join(_THIS_DIR, "data", "MISSING.chromium"),
+        repo_root_dir=_THIS_DIR,
+    )
+    # TODO(aredulla): update this test once validation errors can be returned
+    # as errors. Bug: b/285453019.
+    # self.assertEqual(len(errors), 1)
+    # self.assertEqual(len(warnings), 0)
+    self.assertEqual(len(errors), 0)
+    self.assertEqual(len(warnings), 1)
+
+  def test_valid(self):
+    # Check file with valid content (no errors or warnings).
     errors, warnings = metadata.validate.check_file(
-        filepath=test_filepath,
+        filepath=_VALID_METADATA_FILEPATH,
         repo_root_dir=_THIS_DIR,
     )
     self.assertEqual(len(errors), 0)
     self.assertEqual(len(warnings), 0)
 
-    # Check an invalid file (both errors and warnings).
-    test_filepath = os.path.join(_THIS_DIR, "data",
-                                 "README.chromium.test.multi-invalid")
+  def test_invalid(self):
+    # Check file with invalid content (both errors and warnings).
     errors, warnings = metadata.validate.check_file(
-        filepath=test_filepath,
+        filepath=_INVALID_METADATA_FILEPATH,
         repo_root_dir=_THIS_DIR,
     )
     # TODO(aredulla): update this test once validation errors can be returned
-    #                 as errors.
+    # as errors. Bug: b/285453019.
     # self.assertEqual(len(errors), 9)
     # self.assertEqual(len(warnings), 2)
     self.assertEqual(len(errors), 0)
diff --git a/metadata/validate.py b/metadata/validate.py
index 460660fb5..960c5e489 100644
--- a/metadata/validate.py
+++ b/metadata/validate.py
@@ -5,7 +5,7 @@
 
 import os
 import sys
-from typing import List, Tuple
+from typing import Callable, List, Tuple, Union
 
 _THIS_DIR = os.path.abspath(os.path.dirname(__file__))
 # The repo's root directory.
@@ -14,36 +14,92 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
 # Add the repo's root directory for clearer imports.
 sys.path.insert(0, _ROOT_DIR)
 
+import gclient_utils
 import metadata.parse
 import metadata.validation_result as vr
 
 
-def validate_file(filepath: str,
-                  repo_root_dir: str) -> List[vr.ValidationResult]:
-  """Validate the metadata file."""
-  if not os.path.exists(filepath):
-    result = vr.ValidationError(f"'{filepath}' not found.")
-    result.set_tag(tag="reason", value="file not found")
-    return [result]
+def validate_content(content: str, source_file_dir: str,
+                     repo_root_dir: str) -> List[vr.ValidationResult]:
+  """Validate the content as a metadata file.
 
-  # Get the directory the metadata file is in.
-  parent_dir = os.path.dirname(filepath)
+  Args:
+    content: the entire content of a file to be validated as a metadata file.
+    source_file_dir: the directory of the metadata file that the license file
+                     value is from; this is needed to construct file paths to
+                     license files.
+    repo_root_dir: the repository's root directory; this is needed to construct
+                   file paths to license files.
 
+  Returns: the validation results for the given content.
+  """
   results = []
-  dependencies = metadata.parse.parse_file(filepath)
-  for dependency in dependencies:
-    results.extend(
-        dependency.validate(
-            source_file_dir=parent_dir,
-            repo_root_dir=repo_root_dir,
-        ))
+  dependencies = metadata.parse.parse_content(content)
+  if not dependencies:
+    result = vr.ValidationError("No dependency metadata found.")
+    result.set_tag("reason", "no metadata")
+    return [result]
 
+  for dependency in dependencies:
+    dependency_results = dependency.validate(source_file_dir=source_file_dir,
+                                             repo_root_dir=repo_root_dir)
+    results.extend(dependency_results)
   return results
 
 
-def check_file(filepath: str,
-               repo_root_dir: str) -> Tuple[List[str], List[str]]:
-  """Run metadata validation on the given file, and return all validation
+def _construct_file_read_error(filepath: str, cause: str) -> vr.ValidationError:
+  """Helper function to create a validation error for a file reading issue."""
+  result = vr.ValidationError(f"Cannot read '{filepath}' - {cause}.")
+  result.set_tag(tag="reason", value="read error")
+  return result
+
+
+def validate_file(
+    filepath: str,
+    repo_root_dir: str,
+    reader: Callable[[str], Union[str, bytes]] = None,
+) -> List[vr.ValidationResult]:
+  """Validate the item located at the given filepath is a valid dependency
+  metadata file.
+
+  Args:
+    filepath: the path to validate,
+              e.g. "/chromium/src/third_party/libname/README.chromium"
+    repo_root_dir: the repository's root directory; this is needed to construct
+                   file paths to license files.
+    reader (optional): callable function/method to read the content of the file.
+
+  Returns: the validation results for the given filepath and its contents, if
+           it exists.
+  """
+  if reader is None:
+    reader = gclient_utils.FileRead
+
+  try:
+    content = reader(filepath)
+  except FileNotFoundError:
+    return [_construct_file_read_error(filepath, "file not found")]
+  except PermissionError:
+    return [_construct_file_read_error(filepath, "access denied")]
+  except Exception as e:
+    return [_construct_file_read_error(filepath, f"unexpected error: '{e}'")]
+  else:
+    if not isinstance(content, str):
+      return [_construct_file_read_error(filepath, "decoding failed")]
+
+    # Get the directory the metadata file is in.
+    source_file_dir = os.path.dirname(filepath)
+    return validate_content(content=content,
+                            source_file_dir=source_file_dir,
+                            repo_root_dir=repo_root_dir)
+
+
+def check_file(
+    filepath: str,
+    repo_root_dir: str,
+    reader: Callable[[str], Union[str, bytes]] = None,
+) -> Tuple[List[str], List[str]]:
+  """Run metadata validation on the given filepath, and return all validation
   errors and validation warnings.
 
   Args:
@@ -51,6 +107,7 @@ def check_file(filepath: str,
               e.g. "/chromium/src/third_party/libname/README.chromium"
     repo_root_dir: the repository's root directory; this is needed to construct
                    file paths to license files.
+    reader (optional): callable function/method to read the content of the file.
 
   Returns:
     error_messages: the fatal validation issues present in the file;
@@ -58,20 +115,20 @@ def check_file(filepath: str,
     warning_messages: the non-fatal validation issues present in the file;
                       i.e. presubmit should still pass.
   """
+  results = validate_file(filepath=filepath,
+                          repo_root_dir=repo_root_dir,
+                          reader=reader)
+
   error_messages = []
   warning_messages = []
-  for result in validate_file(filepath, repo_root_dir):
-    # Construct the message.
-    if result.get_tag("reason") == "file not found":
-      message = result.get_message(postscript="", width=60)
-    else:
-      message = result.get_message(width=60)
+  for result in results:
+    message = result.get_message(width=60)
 
     # TODO(aredulla): Actually distinguish between validation errors and
     # warnings. The quality of metadata is currently being uplifted, but is not
     # yet guaranteed to pass validation. So for now, all validation results will
     # be returned as warnings so CLs are not blocked by invalid metadata in
-    # presubmits yet.
+    # presubmits yet. Bug: b/285453019.
     # if result.is_fatal():
     #   error_messages.append(message)
     # else:
diff --git a/presubmit_canned_checks.py b/presubmit_canned_checks.py
index 2ffcfb2ee..e0ea28304 100644
--- a/presubmit_canned_checks.py
+++ b/presubmit_canned_checks.py
@@ -10,6 +10,9 @@ import io as _io
 import os as _os
 import zlib
 
+import metadata.discover
+import metadata.validate
+
 _HERE = _os.path.dirname(_os.path.abspath(__file__))
 
 # These filters will be disabled if callers do not explicitly supply a
@@ -375,6 +378,7 @@ def CheckChangeHasNoCrAndHasOnlyOneEol(input_api, output_api,
       items=eof_files))
   return outputs
 
+
 def CheckGenderNeutral(input_api, output_api, source_file_filter=None):
   """Checks that there are no gendered pronouns in any of the text files to be
   submitted.
@@ -398,7 +402,6 @@ def CheckGenderNeutral(input_api, output_api, source_file_filter=None):
   return []
 
 
-
 def _ReportErrorFileAndLine(filename, line_num, dummy_line):
   """Default error formatter for _FindNewViolationsOfRule."""
   return '%s:%s' % (filename, line_num)
@@ -789,6 +792,41 @@ def CheckLicense(input_api, output_api, license_re_param=None,
   return results
 
 
+def CheckChromiumDependencyMetadata(input_api, output_api, file_filter=None):
+  """Check files for Chromium third party dependency metadata have sufficient
+  information, and are correctly formatted.
+
+  See the README.chromium.template at
+  https://chromium.googlesource.com/chromium/src/+/main/third_party/README.chromium.template
+  """
+  # If the file filter is unspecified, filter to known Chromium metadata files.
+  if file_filter is None:
+    file_filter = lambda f: metadata.discover.is_metadata_file(f.LocalPath())
+
+  # The repo's root directory is required to check license files.
+  repo_root_dir = input_api.change.RepositoryRoot()
+
+  outputs = []
+  for f in input_api.AffectedFiles(file_filter=file_filter):
+    if f.Action() == 'D':
+      # No need to validate a deleted file.
+      continue
+
+    errors, warnings = metadata.validate.check_file(
+        filepath=f.AbsoluteLocalPath(),
+        repo_root_dir=repo_root_dir,
+        reader=input_api.ReadFile,
+    )
+
+    for warning in warnings:
+      outputs.append(output_api.PresubmitPromptWarning(warning, [f]))
+
+    for error in errors:
+      outputs.append(output_api.PresubmitError(error, [f]))
+
+  return outputs
+
+
 ### Other checks
 
 def CheckDoNotSubmit(input_api, output_api):
@@ -843,6 +881,7 @@ def CheckTreeIsOpen(input_api, output_api,
                                       long_text=str(e))]
   return []
 
+
 def GetUnitTestsInDirectory(input_api,
                             output_api,
                             directory,
diff --git a/tests/presubmit_canned_checks_test.py b/tests/presubmit_canned_checks_test.py
index cee18da78..592504973 100644
--- a/tests/presubmit_canned_checks_test.py
+++ b/tests/presubmit_canned_checks_test.py
@@ -328,6 +328,69 @@ class DescriptionChecksTest(unittest.TestCase):
     self.assertEqual(0, len(errors))
 
 
+class ChromiumDependencyMetadataCheckTest(unittest.TestCase):
+  def testDefaultFileFilter(self):
+    """Checks the default file filter limits the scope to Chromium dependency
+    metadata files.
+    """
+    input_api = MockInputApi()
+    input_api.change.RepositoryRoot = lambda: ''
+    input_api.files = [
+        MockFile(os.path.normpath('foo/README.md'), ['Shipped: no?']),
+        MockFile(os.path.normpath('foo/main.py'), ['Shipped: yes?']),
+    ]
+    results = presubmit_canned_checks.CheckChromiumDependencyMetadata(
+        input_api, MockOutputApi())
+    self.assertEqual(len(results), 0)
+
+  def testSkipDeletedFiles(self):
+    """Checks validation is skipped for deleted files."""
+    input_api = MockInputApi()
+    input_api.change.RepositoryRoot = lambda: ''
+    input_api.files = [
+        MockFile(os.path.normpath('foo/README.chromium'), ['No fields'],
+                 action='D'),
+    ]
+    results = presubmit_canned_checks.CheckChromiumDependencyMetadata(
+        input_api, MockOutputApi())
+    self.assertEqual(len(results), 0)
+
+  def testFeedbackForNoMetadata(self):
+    """Checks presubmit results are returned for files without any metadata."""
+    input_api = MockInputApi()
+    input_api.change.RepositoryRoot = lambda: ''
+    input_api.files = [
+        MockFile(os.path.normpath('foo/README.chromium'), ['No fields']),
+    ]
+    results = presubmit_canned_checks.CheckChromiumDependencyMetadata(
+        input_api, MockOutputApi())
+    self.assertEqual(len(results), 1)
+    self.assertTrue("No dependency metadata" in results[0].message)
+
+  def testFeedbackForInvalidMetadata(self):
+    """Checks presubmit results are returned for files with invalid metadata."""
+    input_api = MockInputApi()
+    input_api.change.RepositoryRoot = lambda: ''
+    test_file = MockFile(os.path.normpath('foo/README.chromium'),
+                         ['Shipped: yes?'])
+    input_api.files = [test_file]
+    results = presubmit_canned_checks.CheckChromiumDependencyMetadata(
+        input_api, MockOutputApi())
+
+    # There should be 10 results due to
+    # - missing 5 mandatory fields: Name, URL, Version, License, and
+    #                               Security Critical
+    # - missing 4 required fields: Date, Revision, License File, and
+    #                              License Android Compatible
+    # - Shipped should be only 'yes' or 'no'.
+    self.assertEqual(len(results), 10)
+
+    # Check each presubmit result is associated with the test file.
+    for result in results:
+      self.assertEqual(len(result.items), 1)
+      self.assertEqual(result.items[0], test_file)
+
+
 class CheckUpdateOwnersFileReferences(unittest.TestCase):
   def testShowsWarningIfDeleting(self):
     input_api = MockInputApi()