From 422f4d59a426aaf889dbe8d7534ce3da39db4c49 Mon Sep 17 00:00:00 2001
From: Yiwei Zhang <yiwzhang@google.com>
Date: Mon, 12 Jun 2023 16:03:10 +0000
Subject: [PATCH] config: use lucicfg validate to validate the config files

The new flow is that if any affected file is in the config directory
of a config set, the script will call `lucicfg validate` to validate
the whole config directory and then filter out the validation result
that does not belong to affected file(s). This will introduce a bit
more latency in valdiation because before, only affected files are
sent to LUCI Config for validation. However, this is not latency
senstively operation and always validating the full snapshot will
faciliate us breaking down the config as it's possible in order to
validate the affected file, LUCI Config will need an unaffected file.

R=iannucci, yuanjunh

Bug: 1449933
Change-Id: Id330973f59b882569a8ece36edb4b6a8ff0ed2d0
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/4591233
Reviewed-by: Robbie Iannucci <iannucci@chromium.org>
Commit-Queue: Yiwei Zhang <yiwzhang@google.com>
Reviewed-by: Josip Sokcevic <sokcevic@chromium.org>
Reviewed-by: Yuanjun Huang <yuanjunh@google.com>
---
 presubmit_canned_checks.py  | 149 +++++++++++++++-----------------
 presubmit_support.py        |   1 -
 tests/presubmit_unittest.py | 168 +++++++++++++++++++++++++++++++++---
 3 files changed, 227 insertions(+), 91 deletions(-)

diff --git a/presubmit_canned_checks.py b/presubmit_canned_checks.py
index 8dc3012fd..c28e6e8f1 100644
--- a/presubmit_canned_checks.py
+++ b/presubmit_canned_checks.py
@@ -1816,15 +1816,17 @@ def CheckVPythonSpec(input_api, output_api, file_filter=None):
   return commands
 
 
-# Use this limit to decide whether to split one request into multiple requests.
-# It preemptively prevents configs are too large even after the compression.
-# The GFE limit is 32 MiB and assume the compression ratio > 5:1.
-_CONFIG_SIZE_LIMIT_PER_REQUEST = 5 * 32 * 1024 * 1024
+def CheckChangedLUCIConfigs(input_api, output_api):
+  """Validates the changed config file against LUCI Config.
 
+  Only return the warning and/or error for files in input_api.AffectedFiles().
 
-def CheckChangedLUCIConfigs(input_api, output_api):
-  import collections
-  import base64
+  Assumes `lucicfg` binary is in PATH and the user is logged in.
+
+  Returns:
+    A list presubmit errors and/or warnings from the validation result of files
+    in input_api.AffectedFiles()
+  """
   import json
   import logging
 
@@ -1879,12 +1881,13 @@ def CheckChangedLUCIConfigs(input_api, output_api):
     return [output_api.PresubmitPromptWarning('No config_sets were returned')]
   loc_pref = '%s/+/%s/' % (remote_host_url, remote_branch)
   logging.debug('Derived location prefix: %s', loc_pref)
-  dir_to_config_set = {
-    '%s/' % cs['location'][len(loc_pref):].rstrip('/'): cs['config_set']
-    for cs in config_sets
-    if cs['location'].startswith(loc_pref) or
-    ('%s/' % cs['location']) == loc_pref
-  }
+  dir_to_config_set = {}
+  for cs in config_sets:
+    if cs['location'].startswith(loc_pref) or ('%s/' %
+                                               cs['location']) == loc_pref:
+      path = cs['location'][len(loc_pref):].rstrip('/')
+      d = input_api.os_path.join(*path.split('/')) if path else '.'
+      dir_to_config_set[d] = cs['config_set']
   if not dir_to_config_set:
     warning_long_text_lines = [
         'No config_set found for %s.' % loc_pref,
@@ -1900,75 +1903,63 @@ def CheckChangedLUCIConfigs(input_api, output_api):
     return [output_api.PresubmitPromptWarning(
         warning_long_text_lines[0],
         long_text='\n'.join(warning_long_text_lines))]
-  cs_to_files = collections.defaultdict(list)
+
+  dir_to_fileSet = {}
   for f in input_api.AffectedFiles(include_deletes=False):
-    # windows
-    file_path = f.LocalPath().replace(_os.sep, '/')
-    logging.debug('Affected file path: %s', file_path)
-    for dr, cs in dir_to_config_set.items():
-      if dr == '/' or file_path.startswith(dr):
-        cs_to_files[cs].append({
-          'path': file_path[len(dr):] if dr != '/' else file_path,
-          'content': base64.b64encode(
-              '\n'.join(f.NewContents()).encode('utf-8')).decode('utf-8')
-        })
-  outputs = []
+    for d in dir_to_config_set:
+      if d != '.' and not f.LocalPath().startswith(d):
+        continue  # file doesn't belong to this config set
+      rel_path = f.LocalPath() if d == '.' else input_api.os_path.relpath(
+          f.LocalPath(), start=d)
+      fileSet = dir_to_fileSet.setdefault(d, set())
+      fileSet.add(rel_path.replace(_os.sep, '/'))
+      dir_to_fileSet[d] = fileSet
 
-  def do_one_validation(config_set, files):
-    if not files:
-      return
-    res = request('validate-config',
-                  body={
-                      'config_set': config_set,
-                      'files': files,
-                  })
-
-    for msg in res.get('messages', []):
-      sev = msg['severity']
-      if sev == 'WARNING':
-        out_f = output_api.PresubmitPromptWarning
-      elif sev in ('ERROR', 'CRITICAL'):
-        out_f = output_api.PresubmitError
-      else:
-        out_f = output_api.PresubmitNotifyResult
-      outputs.append(
-          out_f('Config validation for %s: %s' %
-                ([str(file['path']) for file in files], msg['text'])))
-
-  for cs, files in cs_to_files.items():
-    # make the first pass to ensure none of the config standalone exceeds the
-    # size limit.
-    for f in files:
-      if len(f['content']) > _CONFIG_SIZE_LIMIT_PER_REQUEST:
-        return [
+  outputs = []
+  lucicfg = 'lucicfg' if not input_api.is_windows else 'lucicfg.bat'
+  log_level = 'debug' if input_api.verbose else 'warning'
+  for d, fileSet in dir_to_fileSet.items():
+    config_set = dir_to_config_set[d]
+    with input_api.CreateTemporaryFile() as f:
+      cmd = [
+          lucicfg, 'validate', d, '-config-set', config_set, '-log-level',
+          log_level, '-json-output', f.name
+      ]
+      # return code is not important as the validation failure will be retrieved
+      # from the output json file.
+      out, _ = input_api.subprocess.communicate(
+          cmd,
+          stderr=input_api.subprocess.PIPE,
+          shell=input_api.is_windows,  # to resolve *.bat
+          cwd=input_api.change.RepositoryRoot(),
+      )
+      logging.debug('running %s\nSTDOUT:\n%s\nSTDERR:\n%s', cmd, out[0], out[1])
+      try:
+        result = json.load(f)
+      except json.JSONDecodeError as e:
+        outputs.append(
             output_api.PresubmitError(
-                ('File %s grows too large, it is now ~%.2f MiB. '
-                 'The limit is %.2f MiB') %
-                (f['path'], len(f['content']) /
-                 (1024 * 1024), _CONFIG_SIZE_LIMIT_PER_REQUEST / (1024 * 1024)))
-        ]
-
-    # Split the request for the same config set into smaller requests so that
-    # the config content size in each request does not exceed
-    # _CONFIG_SIZE_LIMIT_PER_REQUEST.
-    startIdx = 0
-    curSize = 0
-    for idx, f in enumerate(files):
-      content = f['content']
-      if curSize + len(content) > _CONFIG_SIZE_LIMIT_PER_REQUEST:
-        try:
-          do_one_validation(cs, files[startIdx:idx])
-        except input_api.urllib_error.HTTPError as e:
-          return [
-              output_api.PresubmitError(
-                  'Validation request to luci-config failed', long_text=str(e))
-          ]
-
-        curSize = 0
-        startIdx = idx
-      curSize += len(content)
-    do_one_validation(cs, files[startIdx:])  # validate all the remaining config
-
+                'Error when parsing lucicfg validate output', long_text=str(e)))
+      else:
+        result = result.get('result', None)
+        if result:
+          for validation_result in (result.get('validation', None) or []):
+            for msg in (validation_result.get('messages', None) or []):
+              if d != '.' and msg['path'] not in fileSet:
+                # TODO(yiwzhang): This is the message from non-affected file.
+                # Output presubmit warning for those files if needed in the
+                # future.
+                continue
+              sev = msg['severity']
+              if sev == 'WARNING':
+                out_f = output_api.PresubmitPromptWarning
+              elif sev in ('ERROR', 'CRITICAL'):
+                out_f = output_api.PresubmitError
+              else:
+                out_f = output_api.PresubmitNotifyResult
+              outputs.append(
+                  out_f('Config validation for file(%s): %s' %
+                        (msg['path'], msg['text'])))
   return outputs
 
 
diff --git a/presubmit_support.py b/presubmit_support.py
index 6738cfc7a..08c06190d 100755
--- a/presubmit_support.py
+++ b/presubmit_support.py
@@ -837,7 +837,6 @@ class InputApi(object):
 
       with input_api.CreateTemporaryFile() as f:
         f.write('xyz')
-        f.close()
         input_api.subprocess.check_output(['script-that', '--reads-from',
                                            f.name])
 
diff --git a/tests/presubmit_unittest.py b/tests/presubmit_unittest.py
index ed85658c0..f0fc6bb94 100755
--- a/tests/presubmit_unittest.py
+++ b/tests/presubmit_unittest.py
@@ -64,6 +64,9 @@ class MockTemporaryFile(object):
   def __exit__(self, *args):
     pass
 
+  def close(self):
+    pass
+
 
 class PresubmitTestsBase(TestCaseUtils, unittest.TestCase):
   """Sets up and tears down the mocks but doesn't test anything as-is."""
@@ -2055,28 +2058,26 @@ class CannedChecksUnittest(PresubmitTestsBase):
 
   @mock.patch('git_cl.Changelist')
   @mock.patch('auth.Authenticator')
-  def testCannedCheckChangedLUCIConfigs(self, mockGetAuth, mockChangelist):
+  def testCannedCheckChangedLUCIConfigsRoot(self, mockGetAuth, mockCl):
     affected_file1 = mock.MagicMock(presubmit.GitAffectedFile)
     affected_file1.LocalPath.return_value = 'foo.cfg'
-    affected_file1.NewContents.return_value = ['test', 'foo']
     affected_file2 = mock.MagicMock(presubmit.GitAffectedFile)
-    affected_file2.LocalPath.return_value = 'bar.cfg'
-    affected_file2.NewContents.return_value = ['test', 'bar']
+    affected_file2.LocalPath.return_value = 'sub/bar.cfg'
 
     mockGetAuth().get_access_token().token = 123
 
     host = 'https://host.com'
     branch = 'branch'
     http_resp = {
-      'messages': [{'severity': 'ERROR', 'text': 'deadbeef'}],
-      'config_sets': [{'config_set': 'deadbeef',
-                       'location': '%s/+/%s' % (host, branch)}]
+        'config_sets': [{
+            'config_set': 'project/deadbeef',
+            'location': '%s/+/%s' % (host, branch)
+        }]
     }
     urllib_request.urlopen.return_value = http_resp
-    json.load.return_value = http_resp
 
-    mockChangelist().GetRemoteBranch.return_value = ('remote', branch)
-    mockChangelist().GetRemoteUrl.return_value = host
+    mockCl().GetRemoteBranch.return_value = ('remote', branch)
+    mockCl().GetRemoteUrl.return_value = host
 
     change1 = presubmit.Change(
       'foo', 'foo1', self.fake_root_dir, None, 0, 0, None)
@@ -2085,11 +2086,156 @@ class CannedChecksUnittest(PresubmitTestsBase):
 
     input_api.AffectedFiles = lambda **_: affected_files
 
+    proc = mock.Mock()
+    proc.communicate.return_value = ('This is STDOUT', 'This is STDERR')
+    proc.returncode = 0
+    subprocess.Popen.return_value = proc
+    input_api.CreateTemporaryFile.return_value = MockTemporaryFile('tmp_file')
+
+    validation_result = {
+        'result': {
+            'validation': [{
+                'messages': [{
+                    'path': 'foo.cfg',
+                    'severity': 'ERROR',
+                    'text': 'deadbeef',
+                }, {
+                    'path': 'sub/bar.cfg',
+                    'severity': 'WARNING',
+                    'text': 'cafecafe',
+                }]
+            }]
+        }
+    }
+    json.load.side_effect = [http_resp, validation_result]
+
+    results = presubmit_canned_checks.CheckChangedLUCIConfigs(
+        input_api, presubmit.OutputApi)
+    self.assertEqual(len(results), 2)
+    self.assertEqual(results[0].json_format()['message'],
+                     "Config validation for file(foo.cfg): deadbeef")
+    self.assertEqual(results[1].json_format()['message'],
+                     "Config validation for file(sub/bar.cfg): cafecafe")
+    subprocess.Popen.assert_called_once_with([
+        'lucicfg' + ('.bat' if input_api.is_windows else ''), 'validate', '.',
+        '-config-set', 'project/deadbeef', '-log-level',
+        'debug' if input_api.verbose else 'warning', '-json-output', 'tmp_file'
+    ],
+                                             cwd=self.fake_root_dir,
+                                             stderr=subprocess.PIPE,
+                                             shell=input_api.is_windows)
+
+  @mock.patch('git_cl.Changelist')
+  @mock.patch('auth.Authenticator')
+  def testCannedCheckChangedLUCIConfigsNoFile(self, mockGetAuth, mockCl):
+    affected_file1 = mock.MagicMock(presubmit.GitAffectedFile)
+    affected_file1.LocalPath.return_value = 'foo.cfg'
+    affected_file2 = mock.MagicMock(presubmit.GitAffectedFile)
+    affected_file2.LocalPath.return_value = 'bar.cfg'
+
+    mockGetAuth().get_access_token().token = 123
+
+    host = 'https://host.com'
+    branch = 'branch'
+    http_resp = {
+        'config_sets': [{
+            'config_set': 'project/deadbeef',
+            'location': '%s/+/%s/generated' % (host, branch)
+            # no affected file in generated folder
+        }]
+    }
+    urllib_request.urlopen.return_value = http_resp
+    json.load.return_value = http_resp
+
+    mockCl().GetRemoteBranch.return_value = ('remote', branch)
+    mockCl().GetRemoteUrl.return_value = host
+
+    change1 = presubmit.Change('foo', 'foo1', self.fake_root_dir, None, 0, 0,
+                               None)
+    input_api = self.MockInputApi(change1, False)
+    affected_files = (affected_file1, affected_file2)
+
+    input_api.AffectedFiles = lambda **_: affected_files
+
+    results = presubmit_canned_checks.CheckChangedLUCIConfigs(
+        input_api, presubmit.OutputApi)
+    self.assertEqual(len(results), 0)
+
+  @mock.patch('git_cl.Changelist')
+  @mock.patch('auth.Authenticator')
+  def testCannedCheckChangedLUCIConfigsNonRoot(self, mockGetAuth, mockCl):
+    affected_file1 = mock.MagicMock(presubmit.GitAffectedFile)
+    affected_file1.LocalPath.return_value = 'generated/foo.cfg'
+    affected_file2 = mock.MagicMock(presubmit.GitAffectedFile)
+    affected_file2.LocalPath.return_value = 'generated/bar.cfg'
+
+    mockGetAuth().get_access_token().token = 123
+
+    host = 'https://host.com'
+    branch = 'branch'
+    http_resp = {
+        'config_sets': [{
+            'config_set': 'project/deadbeef',
+            'location': '%s/+/%s/generated' % (host, branch)
+        }]
+    }
+    urllib_request.urlopen.return_value = http_resp
+
+    mockCl().GetRemoteBranch.return_value = ('remote', branch)
+    mockCl().GetRemoteUrl.return_value = host
+
+    change1 = presubmit.Change('foo', 'foo1', self.fake_root_dir, None, 0, 0,
+                               None)
+    input_api = self.MockInputApi(change1, False)
+    affected_files = (affected_file1, affected_file2)
+
+    input_api.AffectedFiles = lambda **_: affected_files
+
+    proc = mock.Mock()
+    proc.communicate.return_value = ('This is STDOUT', 'This is STDERR')
+    proc.returncode = 0
+    subprocess.Popen.return_value = proc
+    input_api.CreateTemporaryFile.return_value = MockTemporaryFile('tmp_file')
+
+    validation_result = {
+        'result': {
+            'validation': [{
+                'messages': [
+                    {
+                        'path': 'bar.cfg',
+                        'severity': 'ERROR',
+                        'text': 'deadbeef',
+                    },
+                    {
+                        'path': 'sub/baz.cfg',  # not an affected file
+                        'severity': 'ERROR',
+                        'text': 'cafecafe',
+                    }
+                ]
+            }]
+        }
+    }
+    json.load.side_effect = [http_resp, validation_result]
+
     results = presubmit_canned_checks.CheckChangedLUCIConfigs(
         input_api, presubmit.OutputApi)
     self.assertEqual(len(results), 1)
     self.assertEqual(results[0].json_format()['message'],
-                     "Config validation for ['foo.cfg', 'bar.cfg']: deadbeef")
+                     "Config validation for file(bar.cfg): deadbeef")
+    subprocess.Popen.assert_called_once_with([
+        'lucicfg' + ('.bat' if input_api.is_windows else ''),
+        'validate',
+        'generated',
+        '-config-set',
+        'project/deadbeef',
+        '-log-level',
+        'debug' if input_api.verbose else 'warning',
+        '-json-output',
+        'tmp_file',
+    ],
+                                             cwd=self.fake_root_dir,
+                                             stderr=subprocess.PIPE,
+                                             shell=input_api.is_windows)
 
   def testCannedCheckChangeHasNoTabs(self):
     self.ContentTest(presubmit_canned_checks.CheckChangeHasNoTabs,