metadata: add line number reporting

Adds support to report line numbers when validation fails.

Change-Id: Iba94c5b3582d7e51f15d266d188909d3a82b75cb
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5740963
Reviewed-by: Jordan Brown <rop@google.com>
Commit-Queue: Jiewei Qian <qjw@chromium.org>
Reviewed-by: Anne Redulla <aredulla@google.com>
changes/63/5740963/3
Jiewei Qian 9 months ago committed by LUCI CQ
parent 370d588239
commit 68c038603f

@ -6,6 +6,7 @@
from collections import defaultdict from collections import defaultdict
import os import os
import sys import sys
import itertools
from typing import Dict, List, Set, Tuple, Union, Optional, Literal, Any from typing import Dict, List, Set, Tuple, Union, Optional, Literal, Any
_THIS_DIR = os.path.abspath(os.path.dirname(__file__)) _THIS_DIR = os.path.abspath(os.path.dirname(__file__))
@ -64,6 +65,15 @@ class DependencyMetadata:
# The current value of each field. # The current value of each field.
self._metadata: Dict[field_types.MetadataField, str] = {} self._metadata: Dict[field_types.MetadataField, str] = {}
# The line numbers of each metadata fields.
self._metadata_line_numbers: Dict[field_types.MetadataField,
Set[int]] = defaultdict(lambda: set())
# The line numbers of the first and the last line (in the text file)
# of this dependency metadata.
self._first_line = float('inf')
self._last_line = -1
# The record of how many times a field entry was added. # The record of how many times a field entry was added.
self._occurrences: Dict[field_types.MetadataField, self._occurrences: Dict[field_types.MetadataField,
int] = defaultdict(int) int] = defaultdict(int)
@ -83,6 +93,22 @@ class DependencyMetadata:
def get_entries(self) -> List[Tuple[str, str]]: def get_entries(self) -> List[Tuple[str, str]]:
return list(self._entries) return list(self._entries)
def record_line(self, line_number):
"""Records `line_number` to be part of this metadata."""
self._first_line = min(self._first_line, line_number)
self._last_line = max(self._last_line, line_number)
def record_field_line_number(self, field: field_types.MetadataField,
line_number: int):
self._metadata_line_numbers[field].add(line_number)
def get_first_and_last_line_number(self) -> Tuple[int, int]:
return (self._first_line, self._last_line)
def get_field_line_numbers(self,
field: field_types.MetadataField) -> List[int]:
return sorted(self._metadata_line_numbers[field])
def _assess_required_fields(self) -> Set[field_types.MetadataField]: def _assess_required_fields(self) -> Set[field_types.MetadataField]:
"""Returns the set of required fields, based on the current """Returns the set of required fields, based on the current
metadata. metadata.
@ -131,16 +157,26 @@ class DependencyMetadata:
results = [] results = []
# Check for duplicate fields. # Check for duplicate fields.
repeated_field_info = [ repeated_fields = [
f"{field.get_name()} ({count})" field for field, count in self._occurrences.items() if count > 1
for field, count in self._occurrences.items() if count > 1
] ]
if repeated_field_info: if repeated_fields:
repeated = ", ".join(repeated_field_info) repeated = ", ".join([
f"{field.get_name()} ({self._occurrences[field]})"
for field in repeated_fields
])
error = vr.ValidationError(reason="There is a repeated field.", error = vr.ValidationError(reason="There is a repeated field.",
additional=[ additional=[
f"Repeated fields: {repeated}", f"Repeated fields: {repeated}",
]) ])
# Merge line numbers.
lines = sorted(
set(
itertools.chain.from_iterable([
self.get_field_line_numbers(field)
for field in repeated_fields
])))
error.set_lines(lines)
results.append(error) results.append(error)
# Process alias fields. # Process alias fields.
@ -155,6 +191,8 @@ class DependencyMetadata:
if field_result: if field_result:
field_result.set_tag(tag="field", field_result.set_tag(tag="field",
value=main_field.get_name()) value=main_field.get_name())
field_result.set_lines(
self.get_field_line_numbers(main_field))
results.append(field_result) results.append(field_result)
self._metadata[main_field] = self._metadata[alias_field] self._metadata[main_field] = self._metadata[alias_field]
@ -167,6 +205,8 @@ class DependencyMetadata:
field_result = source_field.validate(value) field_result = source_field.validate(value)
if field_result: if field_result:
field_result.set_tag(tag="field", value=source_field.get_name()) field_result.set_tag(tag="field", value=source_field.get_name())
field_result.set_lines(
self.get_field_line_numbers(source_field))
results.append(field_result) results.append(field_result)
# Check required fields are present. # Check required fields are present.
@ -210,6 +250,8 @@ class DependencyMetadata:
if result: if result:
result.set_tag(tag="field", result.set_tag(tag="field",
value=known_fields.LICENSE_FILE.get_name()) value=known_fields.LICENSE_FILE.get_name())
result.set_lines(
self.get_field_line_numbers(known_fields.LICENSE_FILE))
results.append(result) results.append(result)
return results return results

@ -53,7 +53,7 @@ def parse_content(content: str) -> List[dm.DependencyMetadata]:
current_field_name = None current_field_name = None
current_field_value = "" current_field_value = ""
for line in content.splitlines(keepends=True): for line_number, line in enumerate(content.splitlines(keepends=True), 1):
# Whether the current line should be part of a structured value. # Whether the current line should be part of a structured value.
if current_field_spec: if current_field_spec:
expect_structured_field_value = current_field_spec.is_structured() expect_structured_field_value = current_field_spec.is_structured()
@ -89,15 +89,35 @@ def parse_content(content: str) -> List[dm.DependencyMetadata]:
FIELD_DELIMITER, 1) FIELD_DELIMITER, 1)
current_field_spec = known_fields.get_field(current_field_name) current_field_spec = known_fields.get_field(current_field_name)
current_metadata.record_line(line_number)
if current_field_spec:
current_metadata.record_field_line_number(
current_field_spec, line_number)
elif current_field_name: elif current_field_name:
if line.strip():
current_metadata.record_line(line_number)
if current_field_spec:
current_metadata.record_field_line_number(
current_field_spec, line_number)
# The field is on multiple lines, so add this line to the # The field is on multiple lines, so add this line to the
# field value. # field value.
current_field_value += line current_field_value += line
else:
# Text that aren't part of any field (e.g. free form text).
# Record the line number if the line is non-empty.
if line.strip():
current_metadata.record_line(line_number)
# Check if current field value indicates end of the field. # Check if current field value indicates end of the field.
if current_field_spec and current_field_spec.should_terminate_field( if current_field_spec and current_field_spec.should_terminate_field(
current_field_value): current_field_value):
assert current_field_name assert current_field_name
current_metadata.record_line(line_number)
if current_field_spec:
current_metadata.record_field_line_number(
current_field_spec, line_number)
current_metadata.add_entry(current_field_name, current_field_value) current_metadata.add_entry(current_field_name, current_field_value)
current_field_spec = None current_field_spec = None
current_field_name = None current_field_name = None

@ -0,0 +1,13 @@
Short Name: foo
URL: https://www.example.com/metadata,
https://example.com/duplicate_url,
i_am_not_a_url
NAME: Repeated Name
Version: N/A
License: BAD_LICENSE_VALUE
License File: DOES_NOT_EXIST
Security Critical: yes
Shipped in Chromium: DONT_KNOW

@ -16,7 +16,7 @@ sys.path.insert(0, _ROOT_DIR)
import gclient_utils import gclient_utils
import metadata.parse import metadata.parse
import metadata.fields.known
class ParseTest(unittest.TestCase): class ParseTest(unittest.TestCase):
def test_parse_single(self): def test_parse_single(self):
@ -52,6 +52,10 @@ class ParseTest(unittest.TestCase):
], ],
) )
# Check line numbers are recorded correctly.
self.assertEqual((1, 23),
all_metadata[0].get_first_and_last_line_number())
def test_parse_multiple(self): def test_parse_multiple(self):
"""Check parsing works for multiple dependencies' metadata.""" """Check parsing works for multiple dependencies' metadata."""
filepath = os.path.join(_THIS_DIR, "data", filepath = os.path.join(_THIS_DIR, "data",
@ -83,6 +87,8 @@ class ParseTest(unittest.TestCase):
("Local Modifications", "None,\nEXCEPT:\n* nothing."), ("Local Modifications", "None,\nEXCEPT:\n* nothing."),
], ],
) )
self.assertEqual((1, 20),
all_metadata[0].get_first_and_last_line_number())
# Check the parser handles different casing for field names, and # Check the parser handles different casing for field names, and
# strips leading and trailing whitespace from values. # strips leading and trailing whitespace from values.
@ -102,6 +108,8 @@ class ParseTest(unittest.TestCase):
("Local Modifications", "None."), ("Local Modifications", "None."),
], ],
) )
self.assertEqual((24, 35),
all_metadata[1].get_first_and_last_line_number())
# Check repeated fields persist in the metadata's entries. # Check repeated fields persist in the metadata's entries.
self.assertListEqual( self.assertListEqual(
@ -119,6 +127,8 @@ class ParseTest(unittest.TestCase):
"field, and\nmissing a mandatory field."), "field, and\nmissing a mandatory field."),
], ],
) )
self.assertEqual((40, 50),
all_metadata[2].get_first_and_last_line_number())
def test_parse_multiple_local_modifications(self): def test_parse_multiple_local_modifications(self):
"""Check parsing works for multiple dependencies, each with different local modifications.""" """Check parsing works for multiple dependencies, each with different local modifications."""
@ -137,6 +147,8 @@ class ParseTest(unittest.TestCase):
"1. Modified X file\n2. Deleted Y file"), "1. Modified X file\n2. Deleted Y file"),
], ],
) )
self.assertEqual((1, 5),
all_metadata[0].get_first_and_last_line_number())
self.assertListEqual( self.assertListEqual(
all_metadata[1].get_entries(), all_metadata[1].get_entries(),
@ -145,6 +157,8 @@ class ParseTest(unittest.TestCase):
("Local Modifications", "None"), ("Local Modifications", "None"),
], ],
) )
self.assertEqual((9, 10),
all_metadata[1].get_first_and_last_line_number())
self.assertListEqual( self.assertListEqual(
all_metadata[2].get_entries(), all_metadata[2].get_entries(),
@ -153,6 +167,8 @@ class ParseTest(unittest.TestCase):
("Local Modifications", "None."), ("Local Modifications", "None."),
], ],
) )
self.assertEqual((14, 24),
all_metadata[2].get_first_and_last_line_number())
self.assertListEqual( self.assertListEqual(
all_metadata[3].get_entries(), all_metadata[3].get_entries(),
@ -161,6 +177,36 @@ class ParseTest(unittest.TestCase):
("Local Modifications", "None,\nExcept modified file X."), ("Local Modifications", "None,\nExcept modified file X."),
], ],
) )
self.assertEqual((28, 30),
all_metadata[3].get_first_and_last_line_number())
def test_parse_per_field_line_numbers(self):
"""Check parsing marks the line numbers of each individual fields."""
filepath = os.path.join(_THIS_DIR, "data",
"README.chromium.test.single-valid")
content = gclient_utils.FileRead(filepath)
all_metadata = metadata.parse.parse_content(content)
self.assertEqual(len(all_metadata), 1)
dm = all_metadata[0]
field_spec = metadata.fields.known
expected_line_numbers = {
field_spec.NAME: [1],
field_spec.SHORT_NAME: [2],
field_spec.URL: [3, 4],
field_spec.VERSION: [8],
field_spec.DATE: [9],
field_spec.LICENSE: [10],
field_spec.LICENSE_FILE: [11],
field_spec.SECURITY_CRITICAL: [12],
field_spec.SHIPPED: [13],
field_spec.CPE_PREFIX: [14],
field_spec.DESCRIPTION: [16, 17, 18],
field_spec.LOCAL_MODIFICATIONS: [20, 21],
}
self.assertEqual(dm.get_field_line_numbers(metadata.fields.known.NAME),
[1])
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

@ -6,6 +6,7 @@
import os import os
import sys import sys
import unittest import unittest
import unittest.mock
_THIS_DIR = os.path.abspath(os.path.dirname(__file__)) _THIS_DIR = os.path.abspath(os.path.dirname(__file__))
# The repo's root directory. # The repo's root directory.
@ -17,6 +18,7 @@ sys.path.insert(0, _ROOT_DIR)
import gclient_utils import gclient_utils
import metadata.validate import metadata.validate
import metadata.validation_result import metadata.validation_result
import metadata.fields.known
# Common paths for tests. # Common paths for tests.
_SOURCE_FILE_DIR = os.path.join(_THIS_DIR, "data") _SOURCE_FILE_DIR = os.path.join(_THIS_DIR, "data")
@ -182,5 +184,40 @@ class ValidationResultTest(unittest.TestCase):
self.assertEqual(["message1", "message2"], ve.get_additional()) self.assertEqual(["message1", "message2"], ve.get_additional())
class ValidationWithLineNumbers(unittest.TestCase):
def test_reports_line_number(self):
"""Checks validate reports line number if available."""
filepath = os.path.join(_THIS_DIR, "data",
"README.chromium.test.validation-line-number")
content = gclient_utils.FileRead(filepath)
unittest.mock.patch(
'metadata.fields.known.LICENSE_FILE.validate_on_disk',
return_value=metadata.validation_result.ValidationError(
"File doesn't exist."))
results = metadata.validate.validate_content(content,
"chromium/src/test_dir",
"chromium/src")
for r in results:
if r.get_reason() == 'License File is invalid.':
self.assertEqual(r.get_lines(), [10])
elif r.get_reason(
) == "Required field 'License Android Compatible' is missing.":
# We can't add a line number to errors caused by missing fields.
self.assertEqual(r.get_lines(), [])
elif r.get_reason() == "Versioning fields are insufficient.":
# We can't add a line number to errors caused by missing fields.
self.assertEqual(r.get_lines(), [])
elif r.get_reason(
) == "License has a license not in the allowlist.":
self.assertEqual(r.get_lines(), [9])
elif r.get_reason() == "URL is invalid.":
self.assertEqual(r.get_lines(), [2, 3, 4])
elif r.get_reason() == "Shipped in Chromium is invalid":
self.assertEqual(r.get_lines(), [13])
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

@ -28,6 +28,7 @@ class ValidationResult:
self._fatal = fatal self._fatal = fatal
self._additional = additional self._additional = additional
self._tags = {} self._tags = {}
self._lines = []
def __str__(self) -> str: def __str__(self) -> str:
prefix = self.get_severity_prefix() prefix = self.get_severity_prefix()
@ -98,6 +99,12 @@ class ValidationResult:
return message return message
def set_lines(self, lines: List[int]):
self._lines = lines
def get_lines(self) -> List[int]:
return self._lines
class ValidationError(ValidationResult): class ValidationError(ValidationResult):
"""Fatal validation issue. Presubmit should fail.""" """Fatal validation issue. Presubmit should fail."""

Loading…
Cancel
Save