[ssci] Recognize other date formats for third party metadata

This CL expands on the date format validation for third party
metadata. Now, values that are recognized to be using a different format
from the preferred format of YYYY-MM-DD will only return a warning,
instead of an error.

Bug: b:285453019
Change-Id: I344dc863601b4e03e801cdfb3cc5912cfe13b762
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4961973
Reviewed-by: Rachael Newitt <renewitt@google.com>
Commit-Queue: Anne Redulla <aredulla@google.com>
changes/73/4961973/4
Anne Redulla 2 years ago committed by LUCI CQ
parent b49c84d812
commit 10cd8e406d

@ -3,8 +3,8 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import datetime
import os
import re
import sys
from typing import Union
@ -16,10 +16,56 @@ _ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..", ".."))
sys.path.insert(0, _ROOT_DIR)
import metadata.fields.field_types as field_types
import metadata.fields.util as util
import metadata.validation_result as vr
_PATTERN_DATE = re.compile(r"^\d{4}-(0|1)\d-[0-3]\d$")
# The preferred date format for the start of date values.
_PREFERRED_PREFIX_FORMAT = "%Y-%m-%d"
# Formats for the start of date values that are recognized as
# alternative date formats.
_RECOGNIZED_PREFIX_FORMATS = (
"%d-%m-%Y",
"%m-%d-%Y",
"%d-%m-%y",
"%m-%d-%y",
"%d/%m/%Y",
"%m/%d/%Y",
"%d/%m/%y",
"%m/%d/%y",
"%d.%m.%Y",
"%m.%d.%Y",
"%d.%m.%y",
"%m.%d.%y",
"%Y/%m/%d",
"%Y.%m.%d",
"%Y%m%d",
)
# Formats recognized as alternative date formats (entire value must
# match).
_RECOGNIZED_DATE_FORMATS = (
"%d %b %Y",
"%d %b, %Y",
"%b %d %Y",
"%b %d, %Y",
"%Y %b %d",
"%d %B %Y",
"%d %B, %Y",
"%B %d %Y",
"%B %d, %Y",
"%Y %B %d",
"%a %b %d %H:%M:%S %Y",
"%a %b %d %H:%M:%S %Y %z",
)
def format_matches(value: str, date_format: str):
"""Returns whether the given value matches the date format."""
try:
datetime.datetime.strptime(value, date_format)
except ValueError:
return False
return True
class DateField(field_types.MetadataField):
@ -29,11 +75,38 @@ class DateField(field_types.MetadataField):
def validate(self, value: str) -> Union[vr.ValidationResult, None]:
"""Checks the given value is a YYYY-MM-DD date."""
if util.matches(_PATTERN_DATE, value):
value = value.strip()
if not value:
return vr.ValidationError(
reason=f"{self._name} is empty.",
additional=["Provide date in format YYYY-MM-DD."])
# Check if the first part (to ignore timezone info) uses the
# preferred format.
parts = value.split()
if format_matches(parts[0], _PREFERRED_PREFIX_FORMAT):
return None
return vr.ValidationError(reason=f"{self._name} is invalid.",
additional=[
"The correct format is YYYY-MM-DD.",
f"Current value is '{value}'.",
])
# Check if the first part (to ignore timezone info) uses a
# recognized format.
for prefix_format in _RECOGNIZED_PREFIX_FORMATS:
if format_matches(parts[0], prefix_format):
return vr.ValidationWarning(
reason=f"{self._name} is not in the preferred format.",
additional=[
"Use YYYY-MM-DD.", f"Current value is '{value}'."
])
# Check the entire value for recognized date formats.
for date_format in _RECOGNIZED_DATE_FORMATS:
if format_matches(value, date_format):
return vr.ValidationWarning(
reason=f"{self._name} is not in the preferred format.",
additional=[
"Use YYYY-MM-DD.", f"Current value is '{value}'."
])
# Return an error as the value's format was not recognized.
return vr.ValidationError(
reason=f"{self._name} is invalid.",
additional=["Use YYYY-MM-DD.", f"Current value is '{value}'."])

@ -55,8 +55,10 @@ def main() -> None:
invalid_file_count = 0
# Key is constructed from the result severity and reason;
# Value is a list of files affected by that reason at that severity.
all_reasons = defaultdict(list)
# Value is a dict for:
# * list of files affected by that reason at that severity; and
# * list of validation result strings for that reason and severity.
all_reasons = defaultdict(lambda: {"files": [], "results": set()})
for filepath in metadata_files:
file_results = metadata.validate.validate_file(filepath,
repo_root_dir=src_dir)
@ -69,21 +71,34 @@ def main() -> None:
summary_key = "{severity} - {reason}".format(
severity=result.get_severity_prefix(),
reason=result.get_reason())
all_reasons[summary_key].append(relpath)
all_reasons[summary_key]["files"].append(relpath)
all_reasons[summary_key]["results"].add(str(result))
if result.is_fatal():
invalid = True
if invalid:
invalid_file_count += 1
print("\n\nDone.\nSummary:")
for summary_key, affected_files in all_reasons.items():
print("\n\nDone.")
print("\nSummary of files:")
for summary_key, data in all_reasons.items():
affected_files = data["files"]
count = len(affected_files)
plural = "s" if count > 1 else ""
print(f"\n {count} file{plural}: {summary_key}")
for affected_file in affected_files:
print(f" {affected_file}")
print("\nSummary of results:")
for summary_key, data in all_reasons.items():
results = data["results"]
count = len(results)
plural = "s" if count > 1 else ""
print(f"\n {count} issue{plural}: {summary_key}")
for result in results:
print(f" {result}")
print(f"\n\n{invalid_file_count} / {file_count} metadata files are "
"invalid, i.e. the file has at least one fatal validation issue.")

@ -97,8 +97,19 @@ class FieldValidationTest(unittest.TestCase):
def test_date_validation(self):
self._run_field_validation(
field=known_fields.DATE,
valid_values=["2012-03-04"],
error_values=["", "\n", "April 3, 2012", "2012/03/04"],
valid_values=[
"2012-03-04", "2012-03-04 UTC", "2012-03-04 UTC+10:00"
],
error_values=[
"",
"\n",
"N/A",
],
warning_values=[
"2012/03/04 UTC+10:00", "20120304", "April 3, 2012",
"3 Apr 2012", "03-04-12", "04/03/2012",
"Tue Apr 3 05:06:07 2012 +0800"
],
)
def test_license_validation(self):

Loading…
Cancel
Save