From 68c038603f05f0b797e67b308597d67845fa2a6a Mon Sep 17 00:00:00 2001 From: Jiewei Qian Date: Tue, 30 Jul 2024 02:06:14 +0000 Subject: [PATCH] metadata: add line number reporting Adds support to report line numbers when validation fails. Change-Id: Iba94c5b3582d7e51f15d266d188909d3a82b75cb Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5740963 Reviewed-by: Jordan Brown Commit-Queue: Jiewei Qian Reviewed-by: Anne Redulla --- metadata/dependency_metadata.py | 52 +++++++++++++++++-- metadata/parse.py | 22 +++++++- ...EADME.chromium.test.validation-line-number | 13 +++++ metadata/tests/parse_test.py | 48 ++++++++++++++++- metadata/tests/validate_test.py | 37 +++++++++++++ metadata/validation_result.py | 7 +++ 6 files changed, 172 insertions(+), 7 deletions(-) create mode 100644 metadata/tests/data/README.chromium.test.validation-line-number diff --git a/metadata/dependency_metadata.py b/metadata/dependency_metadata.py index 12755c84a..6a196d190 100644 --- a/metadata/dependency_metadata.py +++ b/metadata/dependency_metadata.py @@ -6,6 +6,7 @@ from collections import defaultdict import os import sys +import itertools from typing import Dict, List, Set, Tuple, Union, Optional, Literal, Any _THIS_DIR = os.path.abspath(os.path.dirname(__file__)) @@ -64,6 +65,15 @@ class DependencyMetadata: # The current value of each field. self._metadata: Dict[field_types.MetadataField, str] = {} + # The line numbers of each metadata fields. + self._metadata_line_numbers: Dict[field_types.MetadataField, + Set[int]] = defaultdict(lambda: set()) + + # The line numbers of the first and the last line (in the text file) + # of this dependency metadata. + self._first_line = float('inf') + self._last_line = -1 + # The record of how many times a field entry was added. self._occurrences: Dict[field_types.MetadataField, int] = defaultdict(int) @@ -83,6 +93,22 @@ class DependencyMetadata: def get_entries(self) -> List[Tuple[str, str]]: return list(self._entries) + def record_line(self, line_number): + """Records `line_number` to be part of this metadata.""" + self._first_line = min(self._first_line, line_number) + self._last_line = max(self._last_line, line_number) + + def record_field_line_number(self, field: field_types.MetadataField, + line_number: int): + self._metadata_line_numbers[field].add(line_number) + + def get_first_and_last_line_number(self) -> Tuple[int, int]: + return (self._first_line, self._last_line) + + def get_field_line_numbers(self, + field: field_types.MetadataField) -> List[int]: + return sorted(self._metadata_line_numbers[field]) + def _assess_required_fields(self) -> Set[field_types.MetadataField]: """Returns the set of required fields, based on the current metadata. @@ -131,16 +157,26 @@ class DependencyMetadata: results = [] # Check for duplicate fields. - repeated_field_info = [ - f"{field.get_name()} ({count})" - for field, count in self._occurrences.items() if count > 1 + repeated_fields = [ + field for field, count in self._occurrences.items() if count > 1 ] - if repeated_field_info: - repeated = ", ".join(repeated_field_info) + if repeated_fields: + repeated = ", ".join([ + f"{field.get_name()} ({self._occurrences[field]})" + for field in repeated_fields + ]) error = vr.ValidationError(reason="There is a repeated field.", additional=[ f"Repeated fields: {repeated}", ]) + # Merge line numbers. + lines = sorted( + set( + itertools.chain.from_iterable([ + self.get_field_line_numbers(field) + for field in repeated_fields + ]))) + error.set_lines(lines) results.append(error) # Process alias fields. @@ -155,6 +191,8 @@ class DependencyMetadata: if field_result: field_result.set_tag(tag="field", value=main_field.get_name()) + field_result.set_lines( + self.get_field_line_numbers(main_field)) results.append(field_result) self._metadata[main_field] = self._metadata[alias_field] @@ -167,6 +205,8 @@ class DependencyMetadata: field_result = source_field.validate(value) if field_result: field_result.set_tag(tag="field", value=source_field.get_name()) + field_result.set_lines( + self.get_field_line_numbers(source_field)) results.append(field_result) # Check required fields are present. @@ -210,6 +250,8 @@ class DependencyMetadata: if result: result.set_tag(tag="field", value=known_fields.LICENSE_FILE.get_name()) + result.set_lines( + self.get_field_line_numbers(known_fields.LICENSE_FILE)) results.append(result) return results diff --git a/metadata/parse.py b/metadata/parse.py index 4ad4fea41..debaa34ce 100644 --- a/metadata/parse.py +++ b/metadata/parse.py @@ -53,7 +53,7 @@ def parse_content(content: str) -> List[dm.DependencyMetadata]: current_field_name = None current_field_value = "" - for line in content.splitlines(keepends=True): + for line_number, line in enumerate(content.splitlines(keepends=True), 1): # Whether the current line should be part of a structured value. if current_field_spec: expect_structured_field_value = current_field_spec.is_structured() @@ -89,15 +89,35 @@ def parse_content(content: str) -> List[dm.DependencyMetadata]: FIELD_DELIMITER, 1) current_field_spec = known_fields.get_field(current_field_name) + current_metadata.record_line(line_number) + if current_field_spec: + current_metadata.record_field_line_number( + current_field_spec, line_number) + elif current_field_name: + if line.strip(): + current_metadata.record_line(line_number) + if current_field_spec: + current_metadata.record_field_line_number( + current_field_spec, line_number) # The field is on multiple lines, so add this line to the # field value. current_field_value += line + else: + # Text that aren't part of any field (e.g. free form text). + # Record the line number if the line is non-empty. + if line.strip(): + current_metadata.record_line(line_number) + # Check if current field value indicates end of the field. if current_field_spec and current_field_spec.should_terminate_field( current_field_value): assert current_field_name + current_metadata.record_line(line_number) + if current_field_spec: + current_metadata.record_field_line_number( + current_field_spec, line_number) current_metadata.add_entry(current_field_name, current_field_value) current_field_spec = None current_field_name = None diff --git a/metadata/tests/data/README.chromium.test.validation-line-number b/metadata/tests/data/README.chromium.test.validation-line-number new file mode 100644 index 000000000..8e69ef21a --- /dev/null +++ b/metadata/tests/data/README.chromium.test.validation-line-number @@ -0,0 +1,13 @@ +Short Name: foo +URL: https://www.example.com/metadata, + https://example.com/duplicate_url, + i_am_not_a_url +NAME: Repeated Name + +Version: N/A + +License: BAD_LICENSE_VALUE +License File: DOES_NOT_EXIST + +Security Critical: yes +Shipped in Chromium: DONT_KNOW diff --git a/metadata/tests/parse_test.py b/metadata/tests/parse_test.py index 1453976fa..e32d1c83b 100644 --- a/metadata/tests/parse_test.py +++ b/metadata/tests/parse_test.py @@ -16,7 +16,7 @@ sys.path.insert(0, _ROOT_DIR) import gclient_utils import metadata.parse - +import metadata.fields.known class ParseTest(unittest.TestCase): def test_parse_single(self): @@ -52,6 +52,10 @@ class ParseTest(unittest.TestCase): ], ) + # Check line numbers are recorded correctly. + self.assertEqual((1, 23), + all_metadata[0].get_first_and_last_line_number()) + def test_parse_multiple(self): """Check parsing works for multiple dependencies' metadata.""" filepath = os.path.join(_THIS_DIR, "data", @@ -83,6 +87,8 @@ class ParseTest(unittest.TestCase): ("Local Modifications", "None,\nEXCEPT:\n* nothing."), ], ) + self.assertEqual((1, 20), + all_metadata[0].get_first_and_last_line_number()) # Check the parser handles different casing for field names, and # strips leading and trailing whitespace from values. @@ -102,6 +108,8 @@ class ParseTest(unittest.TestCase): ("Local Modifications", "None."), ], ) + self.assertEqual((24, 35), + all_metadata[1].get_first_and_last_line_number()) # Check repeated fields persist in the metadata's entries. self.assertListEqual( @@ -119,6 +127,8 @@ class ParseTest(unittest.TestCase): "field, and\nmissing a mandatory field."), ], ) + self.assertEqual((40, 50), + all_metadata[2].get_first_and_last_line_number()) def test_parse_multiple_local_modifications(self): """Check parsing works for multiple dependencies, each with different local modifications.""" @@ -137,6 +147,8 @@ class ParseTest(unittest.TestCase): "1. Modified X file\n2. Deleted Y file"), ], ) + self.assertEqual((1, 5), + all_metadata[0].get_first_and_last_line_number()) self.assertListEqual( all_metadata[1].get_entries(), @@ -145,6 +157,8 @@ class ParseTest(unittest.TestCase): ("Local Modifications", "None"), ], ) + self.assertEqual((9, 10), + all_metadata[1].get_first_and_last_line_number()) self.assertListEqual( all_metadata[2].get_entries(), @@ -153,6 +167,8 @@ class ParseTest(unittest.TestCase): ("Local Modifications", "None."), ], ) + self.assertEqual((14, 24), + all_metadata[2].get_first_and_last_line_number()) self.assertListEqual( all_metadata[3].get_entries(), @@ -161,6 +177,36 @@ class ParseTest(unittest.TestCase): ("Local Modifications", "None,\nExcept modified file X."), ], ) + self.assertEqual((28, 30), + all_metadata[3].get_first_and_last_line_number()) + + def test_parse_per_field_line_numbers(self): + """Check parsing marks the line numbers of each individual fields.""" + filepath = os.path.join(_THIS_DIR, "data", + "README.chromium.test.single-valid") + content = gclient_utils.FileRead(filepath) + all_metadata = metadata.parse.parse_content(content) + + self.assertEqual(len(all_metadata), 1) + + dm = all_metadata[0] + field_spec = metadata.fields.known + expected_line_numbers = { + field_spec.NAME: [1], + field_spec.SHORT_NAME: [2], + field_spec.URL: [3, 4], + field_spec.VERSION: [8], + field_spec.DATE: [9], + field_spec.LICENSE: [10], + field_spec.LICENSE_FILE: [11], + field_spec.SECURITY_CRITICAL: [12], + field_spec.SHIPPED: [13], + field_spec.CPE_PREFIX: [14], + field_spec.DESCRIPTION: [16, 17, 18], + field_spec.LOCAL_MODIFICATIONS: [20, 21], + } + self.assertEqual(dm.get_field_line_numbers(metadata.fields.known.NAME), + [1]) if __name__ == "__main__": unittest.main() diff --git a/metadata/tests/validate_test.py b/metadata/tests/validate_test.py index c45ebe70d..420c0a9f3 100644 --- a/metadata/tests/validate_test.py +++ b/metadata/tests/validate_test.py @@ -6,6 +6,7 @@ import os import sys import unittest +import unittest.mock _THIS_DIR = os.path.abspath(os.path.dirname(__file__)) # The repo's root directory. @@ -17,6 +18,7 @@ sys.path.insert(0, _ROOT_DIR) import gclient_utils import metadata.validate import metadata.validation_result +import metadata.fields.known # Common paths for tests. _SOURCE_FILE_DIR = os.path.join(_THIS_DIR, "data") @@ -182,5 +184,40 @@ class ValidationResultTest(unittest.TestCase): self.assertEqual(["message1", "message2"], ve.get_additional()) +class ValidationWithLineNumbers(unittest.TestCase): + + def test_reports_line_number(self): + """Checks validate reports line number if available.""" + filepath = os.path.join(_THIS_DIR, "data", + "README.chromium.test.validation-line-number") + content = gclient_utils.FileRead(filepath) + unittest.mock.patch( + 'metadata.fields.known.LICENSE_FILE.validate_on_disk', + return_value=metadata.validation_result.ValidationError( + "File doesn't exist.")) + + results = metadata.validate.validate_content(content, + "chromium/src/test_dir", + "chromium/src") + + for r in results: + if r.get_reason() == 'License File is invalid.': + self.assertEqual(r.get_lines(), [10]) + elif r.get_reason( + ) == "Required field 'License Android Compatible' is missing.": + # We can't add a line number to errors caused by missing fields. + self.assertEqual(r.get_lines(), []) + elif r.get_reason() == "Versioning fields are insufficient.": + # We can't add a line number to errors caused by missing fields. + self.assertEqual(r.get_lines(), []) + elif r.get_reason( + ) == "License has a license not in the allowlist.": + self.assertEqual(r.get_lines(), [9]) + elif r.get_reason() == "URL is invalid.": + self.assertEqual(r.get_lines(), [2, 3, 4]) + elif r.get_reason() == "Shipped in Chromium is invalid": + self.assertEqual(r.get_lines(), [13]) + + if __name__ == "__main__": unittest.main() diff --git a/metadata/validation_result.py b/metadata/validation_result.py index a4adae6aa..bd0517056 100644 --- a/metadata/validation_result.py +++ b/metadata/validation_result.py @@ -28,6 +28,7 @@ class ValidationResult: self._fatal = fatal self._additional = additional self._tags = {} + self._lines = [] def __str__(self) -> str: prefix = self.get_severity_prefix() @@ -98,6 +99,12 @@ class ValidationResult: return message + def set_lines(self, lines: List[int]): + self._lines = lines + + def get_lines(self) -> List[int]: + return self._lines + class ValidationError(ValidationResult): """Fatal validation issue. Presubmit should fail."""