From 23c3bdc950643bf41b5d3e5f0c625a7c9095dacc Mon Sep 17 00:00:00 2001 From: Edward Lesmes Date: Thu, 11 Mar 2021 20:37:32 +0000 Subject: [PATCH] owners: Simplify logic to score and suggest owners. Keep adding owners in the order suggested by Gerrit until coverage is achieved instead of finding a minimal set of owners, as that takes too long Bug: 1186420 Change-Id: Id2ab172db0b2e83ab950191958813163099181be Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/2751343 Reviewed-by: Josip Sokcevic Reviewed-by: Gavin Mak Commit-Queue: Edward Lesmes Auto-Submit: Edward Lesmes --- gerrit_util.py | 4 +- owners_client.py | 91 +++++++++----------------- tests/owners_client_test.py | 125 +++++++++++++++--------------------- 3 files changed, 83 insertions(+), 137 deletions(-) diff --git a/gerrit_util.py b/gerrit_util.py index 7704e2387..2fd436632 100644 --- a/gerrit_util.py +++ b/gerrit_util.py @@ -779,13 +779,15 @@ def IsCodeOwnersEnabled(host): def GetOwnersForFile(host, project, branch, path, limit=100, - resolve_all_users=True, o_params=('DETAILS',)): + resolve_all_users=True, seed=None, o_params=('DETAILS',)): """Gets information about owners attached to a file.""" path = 'projects/%s/branches/%s/code_owners/%s' % ( urllib.parse.quote(project, ''), urllib.parse.quote(branch, ''), urllib.parse.quote(path, '')) q = ['resolve-all-users=%s' % json.dumps(resolve_all_users)] + if seed: + q.append('seed=%d' % seed) if limit: q.append('n=%d' % limit) if o_params: diff --git a/owners_client.py b/owners_client.py index 4d7dd5942..51c1763eb 100644 --- a/owners_client.py +++ b/owners_client.py @@ -13,28 +13,6 @@ import owners as owners_db import scm -def _owner_combinations(owners, num_owners): - """Iterate owners combinations by decrasing score. - - The score of an owner is its position on the owners list. - The score of a set of owners is the maximum score of all owners on the set. - - Returns all combinations of up to `num_owners` sorted by decreasing score: - _owner_combinations(['0', '1', '2', '3'], 2) == [ - # score 1 - ('1', '0'), - # score 2 - ('2', '0'), - ('2', '1'), - # score 3 - ('3', '0'), - ('3', '1'), - ('3', '2'), - ] - """ - return reversed(list(itertools.combinations(reversed(owners), num_owners))) - - class OwnersClient(object): """Interact with OWNERS files in a repository. @@ -106,55 +84,38 @@ class OwnersClient(object): def ScoreOwners(self, paths, exclude=None): """Get sorted list of owners for the given paths.""" + if not paths: + return [] exclude = exclude or [] - positions_by_owner = {} - owners_by_path = self.BatchListOwners(paths) - for owners in owners_by_path.values(): - for i, owner in enumerate(owners): - if owner in exclude: - continue - # Gerrit API lists owners of a path sorted by an internal score, so - # owners that appear first should be prefered. - # We define the score of an owner based on the pair - # (# of files owned, minimum position on all owned files) - positions_by_owner.setdefault(owner, []).append(i) - - # Sort owners by their score. Rank owners higher for more files owned and - # lower for a larger minimum position across all owned files. Randomize - # order for owners with same score to avoid bias. - return sorted( - positions_by_owner, - key=lambda o: (-len(positions_by_owner[o]), - min(positions_by_owner[o]) + random.random())) + owners = [] + queues = self.BatchListOwners(paths).values() + for i in range(max(len(q) for q in queues)): + for q in queues: + if i < len(q) and q[i] not in owners and q[i] not in exclude: + owners.append(q[i]) + return owners def SuggestOwners(self, paths, exclude=None): """Suggest a set of owners for the given paths.""" exclude = exclude or [] + paths_by_owner = {} owners_by_path = self.BatchListOwners(paths) for path, owners in owners_by_path.items(): for owner in owners: - if owner not in exclude: - paths_by_owner.setdefault(owner, set()).add(path) - - # Select the minimum number of owners that can approve all paths. - # We start at 2 to avoid sending all changes that require multiple - # reviewers to top-level owners. - owners = self.ScoreOwners(paths, exclude=exclude) - if len(owners) < 2: - return owners + paths_by_owner.setdefault(owner, set()).add(path) - # Note that we have to iterate up to len(owners) + 1. - # e.g. if there are only 2 owners, we should consider num_owners = 2. - for num_owners in range(2, len(owners) + 1): - # Iterate all combinations of `num_owners` by decreasing score, and - # select the first one that covers all paths. - for selected in _owner_combinations(owners, num_owners): - covered = set.union(*(paths_by_owner[o] for o in selected)) - if len(covered) == len(paths): - return list(selected) + selected = [] + missing = set(paths) + for owner in self.ScoreOwners(paths, exclude=exclude): + missing_len = len(missing) + missing.difference_update(paths_by_owner[owner]) + if missing_len > len(missing): + selected.append(owner) + if not missing: + break - return [] + return selected class DepotToolsClient(OwnersClient): @@ -207,16 +168,22 @@ class GerritClient(OwnersClient): self._branch = branch self._owners_cache = {} + # Seed used by Gerrit to shuffle code owners that have the same score. Can + # be used to make the sort order stable across several requests, e.g. to get + # the same set of random code owners for different file paths that have the + # same code owners. + self._seed = random.getrandbits(30) + def ListOwners(self, path): # Always use slashes as separators. path = path.replace(os.sep, '/') if path not in self._owners_cache: # GetOwnersForFile returns a list of account details sorted by order of # best reviewer for path. If owners have the same score, the order is - # random. + # random, seeded by `self._seed`. data = gerrit_util.GetOwnersForFile( self._host, self._project, self._branch, path, - resolve_all_users=False) + resolve_all_users=False, seed=self._seed) self._owners_cache[path] = [ d['account']['email'] for d in data['code_owners'] diff --git a/tests/owners_client_test.py b/tests/owners_client_test.py index 9e823436a..37c5fa389 100644 --- a/tests/owners_client_test.py +++ b/tests/owners_client_test.py @@ -102,7 +102,7 @@ class GerritClientTest(unittest.TestCase): # Always use slashes as separators. gerrit_util.GetOwnersForFile.assert_called_once_with( 'host', 'project', 'branch', 'bar/everyone/foo.txt', - resolve_all_users=False) + resolve_all_users=False, seed=mock.ANY) def testListOwnersOwnedByAll(self): mock.patch( @@ -180,21 +180,6 @@ class OwnersClientTest(unittest.TestCase): self.client.GetFilesApprovalStatus(['everyone'], [], []), {'everyone': owners_client.OwnersClient.INSUFFICIENT_REVIEWERS}) - def test_owner_combinations(self): - owners = [alice, bob, chris, dave, emily] - self.assertEqual( - list(owners_client._owner_combinations(owners, 2)), - [(bob, alice), - (chris, alice), - (chris, bob), - (dave, alice), - (dave, bob), - (dave, chris), - (emily, alice), - (emily, bob), - (emily, chris), - (emily, dave)]) - def testScoreOwners(self): self.client.owners_by_path = { 'a': [alice, bob, chris] @@ -211,7 +196,7 @@ class OwnersClientTest(unittest.TestCase): } self.assertEqual( self.client.ScoreOwners(self.client.owners_by_path.keys()), - [bob, alice, chris] + [alice, bob, chris] ) self.client.owners_by_path = { @@ -222,7 +207,7 @@ class OwnersClientTest(unittest.TestCase): self.assertEqual( self.client.ScoreOwners( self.client.owners_by_path.keys(), exclude=[chris]), - [bob, alice], + [alice, bob], ) self.client.owners_by_path = { @@ -233,68 +218,60 @@ class OwnersClientTest(unittest.TestCase): } self.assertEqual( self.client.ScoreOwners(self.client.owners_by_path.keys()), - [chris, dave, alice, bob] + [alice, chris, bob, dave] ) - def testSuggestOwners(self): - self.client.owners_by_path = {'a': [alice]} - self.assertEqual( - self.client.SuggestOwners(['a']), - [alice]) + def assertSuggestsOwners(self, owners_by_path, exclude=None): + self.client.owners_by_path = owners_by_path + suggested = self.client.SuggestOwners( + owners_by_path.keys(), exclude=exclude) - self.client.owners_by_path = {'abcd': [alice, bob, chris, dave]} - self.assertEqual( - sorted(self.client.SuggestOwners(['abcd'])), - [alice, bob]) + # Owners should appear only once + self.assertEqual(len(suggested), len(set(suggested))) - self.client.owners_by_path = {'abcd': [alice, bob, chris, dave]} - self.assertEqual( - sorted(self.client.SuggestOwners(['abcd'], exclude=[alice, bob])), - [chris, dave]) + # All paths should be covered. + suggested = set(suggested) + for owners in owners_by_path.values(): + self.assertTrue(suggested & set(owners)) - self.client.owners_by_path = { - 'ae': [alice, emily], - 'be': [bob, emily], - 'ce': [chris, emily], - 'de': [dave, emily], - } - suggested = self.client.SuggestOwners(['ae', 'be', 'ce', 'de']) - # emily should be selected along with anyone else. - self.assertIn(emily, suggested) - self.assertEqual(2, len(suggested)) + # No excluded owners should be present. + if exclude: + for owner in suggested: + self.assertNotIn(owner, exclude) - self.client.owners_by_path = { - 'ad': [alice, dave], - 'cad': [chris, alice, dave], - 'ead': [emily, alice, dave], - 'bd': [bob, dave], - } - self.assertEqual( - sorted(self.client.SuggestOwners(['ad', 'cad', 'ead', 'bd'])), - [alice, dave]) - - self.client.owners_by_path = { - 'a': [alice], - 'b': [bob], - 'c': [chris], - 'ad': [alice, dave], - } - self.assertEqual( - sorted(self.client.SuggestOwners(['a', 'b', 'c', 'ad'])), - [alice, bob, chris]) - - self.client.owners_by_path = { - 'abc': [alice, bob, chris], - 'acb': [alice, chris, bob], - 'bac': [bob, alice, chris], - 'bca': [bob, chris, alice], - 'cab': [chris, alice, bob], - 'cba': [chris, bob, alice] - } - suggested = self.client.SuggestOwners( - ['abc', 'acb', 'bac', 'bca', 'cab', 'cba']) - # Any two owners. - self.assertEqual(2, len(suggested)) + def testSuggestOwners(self): + self.assertSuggestsOwners({}) + self.assertSuggestsOwners({'a': [alice]}) + self.assertSuggestsOwners({'abcd': [alice, bob, chris, dave]}) + self.assertSuggestsOwners( + {'abcd': [alice, bob, chris, dave]}, + exclude=[alice, bob]) + self.assertSuggestsOwners( + {'ae': [alice, emily], + 'be': [bob, emily], + 'ce': [chris, emily], + 'de': [dave, emily]}) + self.assertSuggestsOwners( + {'ad': [alice, dave], + 'cad': [chris, alice, dave], + 'ead': [emily, alice, dave], + 'bd': [bob, dave]}) + self.assertSuggestsOwners( + {'a': [alice], + 'b': [bob], + 'c': [chris], + 'ad': [alice, dave]}) + self.assertSuggestsOwners( + {'abc': [alice, bob, chris], + 'acb': [alice, chris, bob], + 'bac': [bob, alice, chris], + 'bca': [bob, chris, alice], + 'cab': [chris, alice, bob], + 'cba': [chris, bob, alice]}) + + # Check that we can handle a large amount of files with unrelated owners. + self.assertSuggestsOwners( + {str(x): [str(x)] for x in range(100)}) def testBatchListOwners(self): self.client.owners_by_path = {