[ssci] Added parser for README validator

Bug: b:277147404 Change-Id: I7ee0fe35e1017eb477255f12045d00e855f7dfb4 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4787830 Reviewed-by: Rachael Newitt <renewitt@google.com> Auto-Submit: Anne Redulla <aredulla@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com>
2 years ago · 2b583af7e1
parent ea99f9a083
commit 2b583af7e1
6 changed files with 249 additions and 1 deletions
--- a/metadata/dependency_metadata.py
+++ b/metadata/dependency_metadata.py
@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+# Copyright 2023 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+from typing import List, Tuple
+
+
+class DependencyMetadata:
+  """The metadata for a single dependency."""
+  def __init__(self):
+    self._entries = []
+
+  def add_entry(self, field_name: str, field_value: str):
+    self._entries.append((field_name, field_value.strip()))
+
+  def has_entries(self) -> bool:
+    return len(self._entries) > 0
+
+  def get_entries(self) -> List[Tuple[str, str]]:
+    return list(self._entries)
--- a/metadata/fields/custom/cpe_prefix.py
+++ b/metadata/fields/custom/cpe_prefix.py
@ -25,7 +25,7 @@ _PATTERN_CPE_PREFIX = re.compile(r"^cpe:/.+:.+:.+(:.+)*$")
 class CPEPrefixField(field_types.MetadataField):
  """Custom field for the package's CPE."""
  def __init__(self):
-    super().__init__(name="CPEPrefix", one_liner=False)
+    super().__init__(name="CPEPrefix", one_liner=True)

  def validate(self, value: str) -> Union[vr.ValidationResult, None]:
    """Checks the given value is either 'unknown', or a valid
--- a/metadata/fields/types.py
+++ b/metadata/fields/types.py
@ -40,6 +40,9 @@ class MetadataField:
  def get_name(self):
    return self._name

+  def is_one_liner(self):
+    return self._one_liner
+
  def validate(self, value: str) -> Union[vr.ValidationResult, None]:
    """Checks the given value is acceptable for the field.

--- a/metadata/parse.py
+++ b/metadata/parse.py
@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+# Copyright 2023 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import re
+import sys
+from typing import List
+
+_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
+# The repo's root directory.
+_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
+
+# Add the repo's root directory for clearer imports.
+sys.path.insert(0, _ROOT_DIR)
+
+import metadata.fields.known as known_fields
+import metadata.dependency_metadata as dm
+
+# Line used to separate dependencies within the same metadata file.
+DEPENDENCY_DIVIDER = re.compile(r"^-{20} DEPENDENCY DIVIDER -{20}$")
+
+# Delimiter used to separate a field's name from its value.
+FIELD_DELIMITER = ":"
+
+# Pattern used to check if a line from a metadata file declares a new field.
+_PATTERN_FIELD_DECLARATION = re.compile(
+    "^({}){}".format("|".join(known_fields.ALL_FIELD_NAMES), FIELD_DELIMITER),
+    re.IGNORECASE,
+)
+
+
+def parse_file(filepath: str) -> List[dm.DependencyMetadata]:
+  """Reads and parses the metadata in the given file.
+
+    Args:
+        filepath: path to metadata file.
+
+    Returns:
+        each dependency's metadata described in the file.
+  """
+  with open(filepath, "r") as f:
+    lines = f.readlines()
+
+  dependencies = []
+  current_metadata = dm.DependencyMetadata()
+  current_field_name = None
+  current_field_value = ""
+  for line in lines:
+    # Check if a new dependency is being described.
+    if DEPENDENCY_DIVIDER.match(line):
+      if current_field_name:
+        # Save the field value for the previous dependency.
+        current_metadata.add_entry(current_field_name, current_field_value)
+      if current_metadata.has_entries():
+        # Add the previous dependency to the results.
+        dependencies.append(current_metadata)
+      # Reset for the new dependency's metadata, and reset the field state.
+      current_metadata = dm.DependencyMetadata()
+      current_field_name = None
+      current_field_value = ""
+
+    elif _PATTERN_FIELD_DECLARATION.match(line):
+      # Save the field value to the current dependency's metadata.
+      if current_field_name:
+        current_metadata.add_entry(current_field_name, current_field_value)
+
+      current_field_name, current_field_value = line.split(FIELD_DELIMITER, 1)
+      field = known_fields.get_field(current_field_name)
+      if field and field.is_one_liner():
+        # The field should be on one line, so it can be added now.
+        current_metadata.add_entry(current_field_name, current_field_value)
+        # Reset the field state.
+        current_field_name = None
+        current_field_value = ""
+
+    elif current_field_name:
+      # The field is on multiple lines, so add this line to the field value.
+      current_field_value += line
+
+  # At this point, the end of the file has been reached. Save any remaining
+  # field data and metadata.
+  if current_field_name:
+    current_metadata.add_entry(current_field_name, current_field_value)
+  if current_metadata.has_entries():
+    dependencies.append(current_metadata)
+
+  return dependencies
--- a/metadata/tests/data/README.chromium.test
+++ b/metadata/tests/data/README.chromium.test
@ -0,0 +1,50 @@
+Name: Test-A README for Chromium metadata
+Short Name: metadata-test-valid
+URL: https://www.example.com/metadata,
+     https://www.example.com/parser
+Version: 1.0.12
+Date: 2020-12-03
+License: Apache, 2.0 and MIT
+License File: LICENSE
+Security Critical: yes
+Shipped: yes
+CPEPrefix: unknown
+This line should be ignored because CPEPrefix is a one-liner field.
+Description:
+A test metadata file, with a
+ multi-line description.
+
+Local Modifications:
+None,
+EXCEPT:
+* nothing.
+
+-------------------- DEPENDENCY DIVIDER --------------------
+
+Name: Test-B README for Chromium metadata
+SHORT NAME: metadata-test-invalid
+URL: file://home/drive/chromium/src/metadata
+Version:0
+Date: 2020-12-03
+License: MIT
+Security critical: yes
+Shipped:    Yes
+
+Description:
+
+Local Modifications:     None.
+
+-------------------- DEPENDENCY DIVIDER --------------------
+-------------------- DEPENDENCY DIVIDER --------------------
+
+Name: Test-C README for Chromium metadata
+URL: https://www.example.com/first
+URL: https://www.example.com/second
+Version: N/A
+Date: 2020-12-03
+License: Custom license
+Security Critical: yes
+
+Description:
+Test metadata with multiple entries for one field, and
+missing a mandatory field.
--- a/metadata/tests/parse_test.py
+++ b/metadata/tests/parse_test.py
@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+# Copyright 2023 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import os
+import sys
+import unittest
+
+_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
+# The repo's root directory.
+_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
+
+# Add the repo's root directory for clearer imports.
+sys.path.insert(0, _ROOT_DIR)
+
+import metadata.parse
+
+
+class ParseTest(unittest.TestCase):
+  def test_parse(self):
+    filepath = os.path.join(_THIS_DIR, "data", "README.chromium.test")
+    all_metadata = metadata.parse.parse_file(filepath)
+
+    # Dependency metadata with no entries at all are ignored.
+    self.assertEqual(len(all_metadata), 3)
+
+    # Check entries are added according to fields being one-liners.
+    self.assertListEqual(
+        all_metadata[0].get_entries(),
+        [
+            ("Name", "Test-A README for Chromium metadata"),
+            ("Short Name", "metadata-test-valid"),
+            ("URL", "https://www.example.com/metadata,\n"
+             "     https://www.example.com/parser"),
+            ("Version", "1.0.12"),
+            ("Date", "2020-12-03"),
+            ("License", "Apache, 2.0 and MIT"),
+            ("License File", "LICENSE"),
+            ("Security Critical", "yes"),
+            ("Shipped", "yes"),
+            ("CPEPrefix", "unknown"),
+            ("Description", "A test metadata file, with a\n"
+             " multi-line description."),
+            ("Local Modifications", "None,\nEXCEPT:\n* nothing."),
+        ],
+    )
+
+    # Check the parser handles different casing for field names, and strips
+    # leading and trailing whitespace from values.
+    self.assertListEqual(
+        all_metadata[1].get_entries(),
+        [
+            ("Name", "Test-B README for Chromium metadata"),
+            ("SHORT NAME", "metadata-test-invalid"),
+            ("URL", "file://home/drive/chromium/src/metadata"),
+            ("Version", "0"),
+            ("Date", "2020-12-03"),
+            ("License", "MIT"),
+            ("Security critical", "yes"),
+            ("Shipped", "Yes"),
+            ("Description", ""),
+            ("Local Modifications", "None."),
+        ],
+    )
+
+    # Check repeated fields persist in the metadata's entries.
+    self.assertListEqual(
+        all_metadata[2].get_entries(),
+        [
+            ("Name", "Test-C README for Chromium metadata"),
+            ("URL", "https://www.example.com/first"),
+            ("URL", "https://www.example.com/second"),
+            ("Version", "N/A"),
+            ("Date", "2020-12-03"),
+            ("License", "Custom license"),
+            ("Security Critical", "yes"),
+            ("Description", "Test metadata with multiple entries for one "
+             "field, and\nmissing a mandatory field."),
+        ],
+    )
+
+
+if __name__ == "__main__":
+  unittest.main()