owners: Simplify logic to score and suggest owners.

Keep adding owners in the order suggested by Gerrit until coverage is
achieved instead of finding a minimal set of owners, as that takes too
long

Bug: 1186420
Change-Id: Id2ab172db0b2e83ab950191958813163099181be
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/2751343
Reviewed-by: Josip Sokcevic <sokcevic@google.com>
Reviewed-by: Gavin Mak <gavinmak@google.com>
Commit-Queue: Edward Lesmes <ehmaldonado@chromium.org>
Auto-Submit: Edward Lesmes <ehmaldonado@chromium.org>
changes/43/2751343/6
Edward Lesmes 4 years ago committed by LUCI CQ
parent 8d727180be
commit 23c3bdc950

@ -779,13 +779,15 @@ def IsCodeOwnersEnabled(host):
def GetOwnersForFile(host, project, branch, path, limit=100,
resolve_all_users=True, o_params=('DETAILS',)):
resolve_all_users=True, seed=None, o_params=('DETAILS',)):
"""Gets information about owners attached to a file."""
path = 'projects/%s/branches/%s/code_owners/%s' % (
urllib.parse.quote(project, ''),
urllib.parse.quote(branch, ''),
urllib.parse.quote(path, ''))
q = ['resolve-all-users=%s' % json.dumps(resolve_all_users)]
if seed:
q.append('seed=%d' % seed)
if limit:
q.append('n=%d' % limit)
if o_params:

@ -13,28 +13,6 @@ import owners as owners_db
import scm
def _owner_combinations(owners, num_owners):
"""Iterate owners combinations by decrasing score.
The score of an owner is its position on the owners list.
The score of a set of owners is the maximum score of all owners on the set.
Returns all combinations of up to `num_owners` sorted by decreasing score:
_owner_combinations(['0', '1', '2', '3'], 2) == [
# score 1
('1', '0'),
# score 2
('2', '0'),
('2', '1'),
# score 3
('3', '0'),
('3', '1'),
('3', '2'),
]
"""
return reversed(list(itertools.combinations(reversed(owners), num_owners)))
class OwnersClient(object):
"""Interact with OWNERS files in a repository.
@ -106,55 +84,38 @@ class OwnersClient(object):
def ScoreOwners(self, paths, exclude=None):
"""Get sorted list of owners for the given paths."""
if not paths:
return []
exclude = exclude or []
positions_by_owner = {}
owners_by_path = self.BatchListOwners(paths)
for owners in owners_by_path.values():
for i, owner in enumerate(owners):
if owner in exclude:
continue
# Gerrit API lists owners of a path sorted by an internal score, so
# owners that appear first should be prefered.
# We define the score of an owner based on the pair
# (# of files owned, minimum position on all owned files)
positions_by_owner.setdefault(owner, []).append(i)
# Sort owners by their score. Rank owners higher for more files owned and
# lower for a larger minimum position across all owned files. Randomize
# order for owners with same score to avoid bias.
return sorted(
positions_by_owner,
key=lambda o: (-len(positions_by_owner[o]),
min(positions_by_owner[o]) + random.random()))
owners = []
queues = self.BatchListOwners(paths).values()
for i in range(max(len(q) for q in queues)):
for q in queues:
if i < len(q) and q[i] not in owners and q[i] not in exclude:
owners.append(q[i])
return owners
def SuggestOwners(self, paths, exclude=None):
"""Suggest a set of owners for the given paths."""
exclude = exclude or []
paths_by_owner = {}
owners_by_path = self.BatchListOwners(paths)
for path, owners in owners_by_path.items():
for owner in owners:
if owner not in exclude:
paths_by_owner.setdefault(owner, set()).add(path)
# Select the minimum number of owners that can approve all paths.
# We start at 2 to avoid sending all changes that require multiple
# reviewers to top-level owners.
owners = self.ScoreOwners(paths, exclude=exclude)
if len(owners) < 2:
return owners
paths_by_owner.setdefault(owner, set()).add(path)
# Note that we have to iterate up to len(owners) + 1.
# e.g. if there are only 2 owners, we should consider num_owners = 2.
for num_owners in range(2, len(owners) + 1):
# Iterate all combinations of `num_owners` by decreasing score, and
# select the first one that covers all paths.
for selected in _owner_combinations(owners, num_owners):
covered = set.union(*(paths_by_owner[o] for o in selected))
if len(covered) == len(paths):
return list(selected)
selected = []
missing = set(paths)
for owner in self.ScoreOwners(paths, exclude=exclude):
missing_len = len(missing)
missing.difference_update(paths_by_owner[owner])
if missing_len > len(missing):
selected.append(owner)
if not missing:
break
return []
return selected
class DepotToolsClient(OwnersClient):
@ -207,16 +168,22 @@ class GerritClient(OwnersClient):
self._branch = branch
self._owners_cache = {}
# Seed used by Gerrit to shuffle code owners that have the same score. Can
# be used to make the sort order stable across several requests, e.g. to get
# the same set of random code owners for different file paths that have the
# same code owners.
self._seed = random.getrandbits(30)
def ListOwners(self, path):
# Always use slashes as separators.
path = path.replace(os.sep, '/')
if path not in self._owners_cache:
# GetOwnersForFile returns a list of account details sorted by order of
# best reviewer for path. If owners have the same score, the order is
# random.
# random, seeded by `self._seed`.
data = gerrit_util.GetOwnersForFile(
self._host, self._project, self._branch, path,
resolve_all_users=False)
resolve_all_users=False, seed=self._seed)
self._owners_cache[path] = [
d['account']['email']
for d in data['code_owners']

@ -102,7 +102,7 @@ class GerritClientTest(unittest.TestCase):
# Always use slashes as separators.
gerrit_util.GetOwnersForFile.assert_called_once_with(
'host', 'project', 'branch', 'bar/everyone/foo.txt',
resolve_all_users=False)
resolve_all_users=False, seed=mock.ANY)
def testListOwnersOwnedByAll(self):
mock.patch(
@ -180,21 +180,6 @@ class OwnersClientTest(unittest.TestCase):
self.client.GetFilesApprovalStatus(['everyone'], [], []),
{'everyone': owners_client.OwnersClient.INSUFFICIENT_REVIEWERS})
def test_owner_combinations(self):
owners = [alice, bob, chris, dave, emily]
self.assertEqual(
list(owners_client._owner_combinations(owners, 2)),
[(bob, alice),
(chris, alice),
(chris, bob),
(dave, alice),
(dave, bob),
(dave, chris),
(emily, alice),
(emily, bob),
(emily, chris),
(emily, dave)])
def testScoreOwners(self):
self.client.owners_by_path = {
'a': [alice, bob, chris]
@ -211,7 +196,7 @@ class OwnersClientTest(unittest.TestCase):
}
self.assertEqual(
self.client.ScoreOwners(self.client.owners_by_path.keys()),
[bob, alice, chris]
[alice, bob, chris]
)
self.client.owners_by_path = {
@ -222,7 +207,7 @@ class OwnersClientTest(unittest.TestCase):
self.assertEqual(
self.client.ScoreOwners(
self.client.owners_by_path.keys(), exclude=[chris]),
[bob, alice],
[alice, bob],
)
self.client.owners_by_path = {
@ -233,68 +218,60 @@ class OwnersClientTest(unittest.TestCase):
}
self.assertEqual(
self.client.ScoreOwners(self.client.owners_by_path.keys()),
[chris, dave, alice, bob]
[alice, chris, bob, dave]
)
def testSuggestOwners(self):
self.client.owners_by_path = {'a': [alice]}
self.assertEqual(
self.client.SuggestOwners(['a']),
[alice])
def assertSuggestsOwners(self, owners_by_path, exclude=None):
self.client.owners_by_path = owners_by_path
suggested = self.client.SuggestOwners(
owners_by_path.keys(), exclude=exclude)
self.client.owners_by_path = {'abcd': [alice, bob, chris, dave]}
self.assertEqual(
sorted(self.client.SuggestOwners(['abcd'])),
[alice, bob])
# Owners should appear only once
self.assertEqual(len(suggested), len(set(suggested)))
self.client.owners_by_path = {'abcd': [alice, bob, chris, dave]}
self.assertEqual(
sorted(self.client.SuggestOwners(['abcd'], exclude=[alice, bob])),
[chris, dave])
# All paths should be covered.
suggested = set(suggested)
for owners in owners_by_path.values():
self.assertTrue(suggested & set(owners))
self.client.owners_by_path = {
'ae': [alice, emily],
'be': [bob, emily],
'ce': [chris, emily],
'de': [dave, emily],
}
suggested = self.client.SuggestOwners(['ae', 'be', 'ce', 'de'])
# emily should be selected along with anyone else.
self.assertIn(emily, suggested)
self.assertEqual(2, len(suggested))
# No excluded owners should be present.
if exclude:
for owner in suggested:
self.assertNotIn(owner, exclude)
self.client.owners_by_path = {
'ad': [alice, dave],
'cad': [chris, alice, dave],
'ead': [emily, alice, dave],
'bd': [bob, dave],
}
self.assertEqual(
sorted(self.client.SuggestOwners(['ad', 'cad', 'ead', 'bd'])),
[alice, dave])
self.client.owners_by_path = {
'a': [alice],
'b': [bob],
'c': [chris],
'ad': [alice, dave],
}
self.assertEqual(
sorted(self.client.SuggestOwners(['a', 'b', 'c', 'ad'])),
[alice, bob, chris])
self.client.owners_by_path = {
'abc': [alice, bob, chris],
'acb': [alice, chris, bob],
'bac': [bob, alice, chris],
'bca': [bob, chris, alice],
'cab': [chris, alice, bob],
'cba': [chris, bob, alice]
}
suggested = self.client.SuggestOwners(
['abc', 'acb', 'bac', 'bca', 'cab', 'cba'])
# Any two owners.
self.assertEqual(2, len(suggested))
def testSuggestOwners(self):
self.assertSuggestsOwners({})
self.assertSuggestsOwners({'a': [alice]})
self.assertSuggestsOwners({'abcd': [alice, bob, chris, dave]})
self.assertSuggestsOwners(
{'abcd': [alice, bob, chris, dave]},
exclude=[alice, bob])
self.assertSuggestsOwners(
{'ae': [alice, emily],
'be': [bob, emily],
'ce': [chris, emily],
'de': [dave, emily]})
self.assertSuggestsOwners(
{'ad': [alice, dave],
'cad': [chris, alice, dave],
'ead': [emily, alice, dave],
'bd': [bob, dave]})
self.assertSuggestsOwners(
{'a': [alice],
'b': [bob],
'c': [chris],
'ad': [alice, dave]})
self.assertSuggestsOwners(
{'abc': [alice, bob, chris],
'acb': [alice, chris, bob],
'bac': [bob, alice, chris],
'bca': [bob, chris, alice],
'cab': [chris, alice, bob],
'cba': [chris, bob, alice]})
# Check that we can handle a large amount of files with unrelated owners.
self.assertSuggestsOwners(
{str(x): [str(x)] for x in range(100)})
def testBatchListOwners(self):
self.client.owners_by_path = {

Loading…
Cancel
Save