diff --git a/git_cl.py b/git_cl.py index 39c6c5ee8f..3837d4dd7a 100755 --- a/git_cl.py +++ b/git_cl.py @@ -532,7 +532,7 @@ def fetch_try_jobs(auth_config, changelist, buildbucket_host, Returns list of buildbucket.v2.Build with the try jobs for the changelist. """ - fields = ['id', 'builder', 'status'] + fields = ['id', 'builder', 'status', 'createTime', 'tags'] request = { 'predicate': { 'gerritChanges': [changelist.GetGerritChange(patchset)], @@ -588,11 +588,11 @@ def _fetch_latest_builds( return [], 0 -def _filter_failed(builds): - """Returns a list of buckets/builders that had failed builds. +def _filter_failed_for_retry(all_builds): + """Returns a list of buckets/builders that are worth retrying. Args: - builds (list): Builds, in the format returned by fetch_try_jobs, + all_builds (list): Builds, in the format returned by fetch_try_jobs, i.e. a list of buildbucket.v2.Builds which includes status and builder info. @@ -600,14 +600,30 @@ def _filter_failed(builds): A dict of bucket to builder to tests (empty list). This is the same format accepted by _trigger_try_jobs and returned by _get_bucket_map. """ - buckets = collections.defaultdict(dict) - for build in builds: - if build['status'] in ('FAILURE', 'INFRA_FAILURE'): - project = build['builder']['project'] - bucket = build['builder']['bucket'] - builder = build['builder']['builder'] - buckets[project + '/' + bucket][builder] = [] - return buckets + + def _builder_of(build): + builder = build['builder'] + return (builder['project'], builder['bucket'], builder['builder']) + + res = collections.defaultdict(dict) + ordered = sorted(all_builds, key=lambda b: (_builder_of(b), b['createTime'])) + for (proj, buck, bldr), builds in itertools.groupby(ordered, key=_builder_of): + # If builder had several builds, retry only if the last one failed. + # This is a bit different from CQ, which would re-use *any* SUCCESS-full + # build, but in case of retrying failed jobs retrying a flaky one makes + # sense. + builds = list(builds) + if builds[-1]['status'] not in ('FAILURE', 'INFRA_FAILURE'): + continue + if any(t['key'] == 'cq_experimental' and t['value'] == 'true' + for t in builds[-1]['tags']): + # Don't retry experimental build previously triggered by CQ. + continue + if any(b['status'] in ('STARTED', 'SCHEDULED') for b in builds): + # Don't retry if any are running. + continue + res[proj + '/' + buck][bldr] = [] + return res def print_try_jobs(options, builds): @@ -4429,7 +4445,7 @@ def CMDupload(parser, args): builds, _ = _fetch_latest_builds( auth_config, cl, options.buildbucket_host, latest_patchset=patchset) - buckets = _filter_failed(builds) + buckets = _filter_failed_for_retry(builds) if len(buckets) == 0: print('No failed tryjobs, so --retry-failed has no effect.') return ret @@ -4717,7 +4733,7 @@ def CMDtry(parser, args): auth_config, cl, options.buildbucket_host) if options.verbose: print('Got %d builds in patchset #%d' % (len(builds), patchset)) - buckets = _filter_failed(builds) + buckets = _filter_failed_for_retry(builds) if not buckets: print('There are no failed jobs in the latest set of jobs ' '(patchset #%d), doing nothing.' % patchset) diff --git a/tests/git_cl_test.py b/tests/git_cl_test.py index 0da416dc05..f7de42b836 100755 --- a/tests/git_cl_test.py +++ b/tests/git_cl_test.py @@ -3037,18 +3037,17 @@ class CMDTestCaseBase(unittest.TestCase): }, } _DEFAULT_RESPONSE = { - 'builds': [ - { - 'id': str(100 + idx), - 'builder': { - 'project': 'chromium', - 'bucket': 'try', - 'builder': 'bot_' + status.lower(), - }, - 'status': status, - } - for idx, status in enumerate(_STATUSES) - ] + 'builds': [{ + 'id': str(100 + idx), + 'builder': { + 'project': 'chromium', + 'bucket': 'try', + 'builder': 'bot_' + status.lower(), + }, + 'createTime': '2019-10-09T08:00:0%d.854286Z' % (idx % 10), + 'tags': [], + 'status': status, + } for idx, status in enumerate(_STATUSES)] } def setUp(self): @@ -3073,14 +3072,15 @@ class CMDTestCaseBase(unittest.TestCase): class CMDTryResultsTestCase(CMDTestCaseBase): _DEFAULT_REQUEST = { 'predicate': { - "gerritChanges": [{ - "project": "depot_tools", - "host": "chromium-review.googlesource.com", - "patchset": 7, - "change": 123456, - }], + "gerritChanges": [{ + "project": "depot_tools", + "host": "chromium-review.googlesource.com", + "patchset": 7, + "change": 123456, + }], }, - 'fields': 'builds.*.id,builds.*.builder,builds.*.status', + 'fields': ('builds.*.id,builds.*.builder,builds.*.status' + + ',builds.*.createTime,builds.*.tags'), } def testNoJobs(self): @@ -3147,11 +3147,58 @@ class CMDTryResultsTestCase(CMDTestCaseBase): mockJsonDump.assert_called_once_with( 'file.json', self._DEFAULT_RESPONSE['builds']) - def test_filter_failed(self): - self.assertEqual({}, git_cl._filter_failed([])) + def test_filter_failed_for_one_simple(self): + self.assertEqual({}, git_cl._filter_failed_for_retry([])) self.assertEqual({ - 'chromium/try': {'bot_failure': [], 'bot_infra_failure': []}, - }, git_cl._filter_failed(self._DEFAULT_RESPONSE['builds'])) + 'chromium/try': { + 'bot_failure': [], + 'bot_infra_failure': [] + }, + }, git_cl._filter_failed_for_retry(self._DEFAULT_RESPONSE['builds'])) + + def test_filter_failed_for_retry_many_builds(self): + + def _build(name, created_sec, status, experimental=False): + assert 0 <= created_sec < 100, created_sec + b = { + 'id': 112112, + 'builder': { + 'project': 'chromium', + 'bucket': 'try', + 'builder': name, + }, + 'createTime': '2019-10-09T08:00:%02d.854286Z' % created_sec, + 'status': status, + 'tags': [], + } + if experimental: + b['tags'].append({'key': 'cq_experimental', 'value': 'true'}) + return b + + builds = [ + _build('flaky-last-green', 1, 'FAILURE'), + _build('flaky-last-green', 2, 'SUCCESS'), + _build('flaky', 1, 'SUCCESS'), + _build('flaky', 2, 'FAILURE'), + _build('running', 1, 'FAILED'), + _build('running', 2, 'SCHEDULED'), + _build('yep-still-running', 1, 'STARTED'), + _build('yep-still-running', 2, 'FAILURE'), + _build('cq-experimental', 1, 'SUCCESS', experimental=True), + _build('cq-experimental', 2, 'FAILURE', experimental=True), + + # Simulate experimental in CQ builder, which developer decided + # to retry manually which resulted in 2nd build non-experimental. + _build('sometimes-experimental', 1, 'FAILURE', experimental=True), + _build('sometimes-experimental', 2, 'FAILURE', experimental=False), + ] + builds.sort(key=lambda b: b['status']) # ~deterministic shuffle. + self.assertEqual({ + 'chromium/try': { + 'flaky': [], + 'sometimes-experimental': [] + }, + }, git_cl._filter_failed_for_retry(builds)) class CMDTryTestCase(CMDTestCaseBase): @@ -3228,11 +3275,10 @@ class CMDTryTestCase(CMDTestCaseBase): 'builder': { 'project': 'chromium', 'bucket': 'try', - 'builder': 'linux', - }, - 'status': 'FAILURE', - }], - }[kw['patchset']] + 'builder': 'linux',}, + 'createTime': '2019-10-09T08:00:01.854286Z', + 'tags': [], + 'status': 'FAILURE',}],}[kw['patchset']] mockCallBuildbucket.return_value = {} self.assertEqual(0, git_cl.main(['try', '--retry-failed'])) @@ -3318,18 +3364,17 @@ class CMDUploadTestCase(CMDTestCaseBase): # Latest patchset: No builds. [], # Patchset before latest: Some builds. - [ - { - 'id': str(100 + idx), - 'builder': { - 'project': 'chromium', - 'bucket': 'try', - 'builder': 'bot_' + status.lower(), - }, - 'status': status, - } - for idx, status in enumerate(self._STATUSES) - ], + [{ + 'id': str(100 + idx), + 'builder': { + 'project': 'chromium', + 'bucket': 'try', + 'builder': 'bot_' + status.lower(), + }, + 'createTime': '2019-10-09T08:00:0%d.854286Z' % (idx % 10), + 'tags': [], + 'status': status, + } for idx, status in enumerate(self._STATUSES)], ] self.assertEqual(0, git_cl.main(['upload', '--retry-failed']))