From 2b583af7e1b603bf2620130a9840adf4128749a0 Mon Sep 17 00:00:00 2001 From: Anne Redulla Date: Thu, 17 Aug 2023 05:05:13 +0000 Subject: [PATCH] [ssci] Added parser for README validator Bug: b:277147404 Change-Id: I7ee0fe35e1017eb477255f12045d00e855f7dfb4 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4787830 Reviewed-by: Rachael Newitt Auto-Submit: Anne Redulla Commit-Queue: Rachael Newitt --- metadata/dependency_metadata.py | 21 ++++++ metadata/fields/custom/cpe_prefix.py | 2 +- metadata/fields/types.py | 3 + metadata/parse.py | 89 ++++++++++++++++++++++++ metadata/tests/data/README.chromium.test | 50 +++++++++++++ metadata/tests/parse_test.py | 85 ++++++++++++++++++++++ 6 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 metadata/dependency_metadata.py create mode 100644 metadata/parse.py create mode 100644 metadata/tests/data/README.chromium.test create mode 100644 metadata/tests/parse_test.py diff --git a/metadata/dependency_metadata.py b/metadata/dependency_metadata.py new file mode 100644 index 0000000000..64904ab4f3 --- /dev/null +++ b/metadata/dependency_metadata.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +# Copyright 2023 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +from typing import List, Tuple + + +class DependencyMetadata: + """The metadata for a single dependency.""" + def __init__(self): + self._entries = [] + + def add_entry(self, field_name: str, field_value: str): + self._entries.append((field_name, field_value.strip())) + + def has_entries(self) -> bool: + return len(self._entries) > 0 + + def get_entries(self) -> List[Tuple[str, str]]: + return list(self._entries) diff --git a/metadata/fields/custom/cpe_prefix.py b/metadata/fields/custom/cpe_prefix.py index f3023af0e5..f50f45ea95 100644 --- a/metadata/fields/custom/cpe_prefix.py +++ b/metadata/fields/custom/cpe_prefix.py @@ -25,7 +25,7 @@ _PATTERN_CPE_PREFIX = re.compile(r"^cpe:/.+:.+:.+(:.+)*$") class CPEPrefixField(field_types.MetadataField): """Custom field for the package's CPE.""" def __init__(self): - super().__init__(name="CPEPrefix", one_liner=False) + super().__init__(name="CPEPrefix", one_liner=True) def validate(self, value: str) -> Union[vr.ValidationResult, None]: """Checks the given value is either 'unknown', or a valid diff --git a/metadata/fields/types.py b/metadata/fields/types.py index f36dded607..9a301163b7 100644 --- a/metadata/fields/types.py +++ b/metadata/fields/types.py @@ -40,6 +40,9 @@ class MetadataField: def get_name(self): return self._name + def is_one_liner(self): + return self._one_liner + def validate(self, value: str) -> Union[vr.ValidationResult, None]: """Checks the given value is acceptable for the field. diff --git a/metadata/parse.py b/metadata/parse.py new file mode 100644 index 0000000000..b507036f40 --- /dev/null +++ b/metadata/parse.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +# Copyright 2023 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import os +import re +import sys +from typing import List + +_THIS_DIR = os.path.abspath(os.path.dirname(__file__)) +# The repo's root directory. +_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..")) + +# Add the repo's root directory for clearer imports. +sys.path.insert(0, _ROOT_DIR) + +import metadata.fields.known as known_fields +import metadata.dependency_metadata as dm + +# Line used to separate dependencies within the same metadata file. +DEPENDENCY_DIVIDER = re.compile(r"^-{20} DEPENDENCY DIVIDER -{20}$") + +# Delimiter used to separate a field's name from its value. +FIELD_DELIMITER = ":" + +# Pattern used to check if a line from a metadata file declares a new field. +_PATTERN_FIELD_DECLARATION = re.compile( + "^({}){}".format("|".join(known_fields.ALL_FIELD_NAMES), FIELD_DELIMITER), + re.IGNORECASE, +) + + +def parse_file(filepath: str) -> List[dm.DependencyMetadata]: + """Reads and parses the metadata in the given file. + + Args: + filepath: path to metadata file. + + Returns: + each dependency's metadata described in the file. + """ + with open(filepath, "r") as f: + lines = f.readlines() + + dependencies = [] + current_metadata = dm.DependencyMetadata() + current_field_name = None + current_field_value = "" + for line in lines: + # Check if a new dependency is being described. + if DEPENDENCY_DIVIDER.match(line): + if current_field_name: + # Save the field value for the previous dependency. + current_metadata.add_entry(current_field_name, current_field_value) + if current_metadata.has_entries(): + # Add the previous dependency to the results. + dependencies.append(current_metadata) + # Reset for the new dependency's metadata, and reset the field state. + current_metadata = dm.DependencyMetadata() + current_field_name = None + current_field_value = "" + + elif _PATTERN_FIELD_DECLARATION.match(line): + # Save the field value to the current dependency's metadata. + if current_field_name: + current_metadata.add_entry(current_field_name, current_field_value) + + current_field_name, current_field_value = line.split(FIELD_DELIMITER, 1) + field = known_fields.get_field(current_field_name) + if field and field.is_one_liner(): + # The field should be on one line, so it can be added now. + current_metadata.add_entry(current_field_name, current_field_value) + # Reset the field state. + current_field_name = None + current_field_value = "" + + elif current_field_name: + # The field is on multiple lines, so add this line to the field value. + current_field_value += line + + # At this point, the end of the file has been reached. Save any remaining + # field data and metadata. + if current_field_name: + current_metadata.add_entry(current_field_name, current_field_value) + if current_metadata.has_entries(): + dependencies.append(current_metadata) + + return dependencies diff --git a/metadata/tests/data/README.chromium.test b/metadata/tests/data/README.chromium.test new file mode 100644 index 0000000000..84bf001b6d --- /dev/null +++ b/metadata/tests/data/README.chromium.test @@ -0,0 +1,50 @@ +Name: Test-A README for Chromium metadata +Short Name: metadata-test-valid +URL: https://www.example.com/metadata, + https://www.example.com/parser +Version: 1.0.12 +Date: 2020-12-03 +License: Apache, 2.0 and MIT +License File: LICENSE +Security Critical: yes +Shipped: yes +CPEPrefix: unknown +This line should be ignored because CPEPrefix is a one-liner field. +Description: +A test metadata file, with a + multi-line description. + +Local Modifications: +None, +EXCEPT: +* nothing. + +-------------------- DEPENDENCY DIVIDER -------------------- + +Name: Test-B README for Chromium metadata +SHORT NAME: metadata-test-invalid +URL: file://home/drive/chromium/src/metadata +Version:0 +Date: 2020-12-03 +License: MIT +Security critical: yes +Shipped: Yes + +Description: + +Local Modifications: None. + +-------------------- DEPENDENCY DIVIDER -------------------- +-------------------- DEPENDENCY DIVIDER -------------------- + +Name: Test-C README for Chromium metadata +URL: https://www.example.com/first +URL: https://www.example.com/second +Version: N/A +Date: 2020-12-03 +License: Custom license +Security Critical: yes + +Description: +Test metadata with multiple entries for one field, and +missing a mandatory field. \ No newline at end of file diff --git a/metadata/tests/parse_test.py b/metadata/tests/parse_test.py new file mode 100644 index 0000000000..8e8a722f44 --- /dev/null +++ b/metadata/tests/parse_test.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +# Copyright 2023 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import os +import sys +import unittest + +_THIS_DIR = os.path.abspath(os.path.dirname(__file__)) +# The repo's root directory. +_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..")) + +# Add the repo's root directory for clearer imports. +sys.path.insert(0, _ROOT_DIR) + +import metadata.parse + + +class ParseTest(unittest.TestCase): + def test_parse(self): + filepath = os.path.join(_THIS_DIR, "data", "README.chromium.test") + all_metadata = metadata.parse.parse_file(filepath) + + # Dependency metadata with no entries at all are ignored. + self.assertEqual(len(all_metadata), 3) + + # Check entries are added according to fields being one-liners. + self.assertListEqual( + all_metadata[0].get_entries(), + [ + ("Name", "Test-A README for Chromium metadata"), + ("Short Name", "metadata-test-valid"), + ("URL", "https://www.example.com/metadata,\n" + " https://www.example.com/parser"), + ("Version", "1.0.12"), + ("Date", "2020-12-03"), + ("License", "Apache, 2.0 and MIT"), + ("License File", "LICENSE"), + ("Security Critical", "yes"), + ("Shipped", "yes"), + ("CPEPrefix", "unknown"), + ("Description", "A test metadata file, with a\n" + " multi-line description."), + ("Local Modifications", "None,\nEXCEPT:\n* nothing."), + ], + ) + + # Check the parser handles different casing for field names, and strips + # leading and trailing whitespace from values. + self.assertListEqual( + all_metadata[1].get_entries(), + [ + ("Name", "Test-B README for Chromium metadata"), + ("SHORT NAME", "metadata-test-invalid"), + ("URL", "file://home/drive/chromium/src/metadata"), + ("Version", "0"), + ("Date", "2020-12-03"), + ("License", "MIT"), + ("Security critical", "yes"), + ("Shipped", "Yes"), + ("Description", ""), + ("Local Modifications", "None."), + ], + ) + + # Check repeated fields persist in the metadata's entries. + self.assertListEqual( + all_metadata[2].get_entries(), + [ + ("Name", "Test-C README for Chromium metadata"), + ("URL", "https://www.example.com/first"), + ("URL", "https://www.example.com/second"), + ("Version", "N/A"), + ("Date", "2020-12-03"), + ("License", "Custom license"), + ("Security Critical", "yes"), + ("Description", "Test metadata with multiple entries for one " + "field, and\nmissing a mandatory field."), + ], + ) + + +if __name__ == "__main__": + unittest.main()