Try #3 to improve the owners suggesting algorithm.

This version handles the case where we need to suggest two reviewers
who have overlapping sets of directories they can review.

R=maruel@chromium.org
BUG=76727


Review URL: https://chromiumcodereview.appspot.com/11639028

git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@174216 0039d316-1c4b-4281-b951-d872f2087c98
experimental/szager/collated-output
dpranke@chromium.org 13 years ago
parent 18bc90d2d2
commit c591a700cc

@ -49,6 +49,7 @@ Examples for all of these combinations can be found in tests/owners_unittest.py.
""" """
import collections import collections
import random
import re import re
@ -252,60 +253,60 @@ class Database(object):
('%s is not a "set" directive, "*", ' ('%s is not a "set" directive, "*", '
'or an email address: "%s"' % (line_type, directive))) 'or an email address: "%s"' % (line_type, directive)))
def _covering_set_of_owners_for(self, files): def _covering_set_of_owners_for(self, files):
# Get the set of directories from the files. dirs_remaining = set(self._enclosing_dir_with_owners(f) for f in files)
dirs = set() all_possible_owners = self._all_possible_owners(dirs_remaining)
for f in files: suggested_owners = set()
dirs.add(self._enclosing_dir_with_owners(f)) while dirs_remaining:
owner = self.lowest_cost_owner(all_possible_owners, dirs_remaining)
suggested_owners.add(owner)
owned_dirs = {} dirs_to_remove = set(el[0] for el in all_possible_owners[owner])
dir_owners = {} dirs_remaining -= dirs_to_remove
return suggested_owners
def _all_possible_owners(self, dirs):
"""Returns a list of (potential owner, distance-from-dir) tuples; a
distance of 1 is the lowest/closest possible distance (which makes the
subsequent math easier)."""
all_possible_owners = {}
for current_dir in dirs: for current_dir in dirs:
# Get the list of owners for each directory.
current_owners = set()
dirname = current_dir dirname = current_dir
while dirname in self.owners_for: distance = 1
current_owners |= self.owners_for[dirname] while True:
for owner in self.owners_for.get(dirname, []):
all_possible_owners.setdefault(owner, [])
# If the same person is in multiple OWNERS files above a given
# directory, only count the closest one.
if not any(current_dir == el[0] for el in all_possible_owners[owner]):
all_possible_owners[owner].append((current_dir, distance))
if self._stop_looking(dirname): if self._stop_looking(dirname):
break break
prev_parent = dirname
dirname = self.os_path.dirname(dirname) dirname = self.os_path.dirname(dirname)
if prev_parent == dirname: distance += 1
break return all_possible_owners
# Map each directory to a list of its owners. @staticmethod
dir_owners[current_dir] = current_owners def lowest_cost_owner(all_possible_owners, dirs):
# We want to minimize both the number of reviewers and the distance
# Add the directory to the list of each owner. # from the files/dirs needing reviews. The "pow(X, 1.75)" below is
for owner in current_owners: # an arbitrarily-selected scaling factor that seems to work well - it
owned_dirs.setdefault(owner, set()).add(current_dir) # will select one reviewer in the parent directory over three reviewers
# in subdirs, but not one reviewer over just two.
final_owners = set() total_costs_by_owner = {}
while dirs: for owner in all_possible_owners:
# Find the owner that has the most directories. total_distance = 0
max_count = 0 num_directories_owned = 0
max_owner = None for dirname, distance in all_possible_owners[owner]:
owner_count = {} if dirname in dirs:
for dirname in dirs: total_distance += distance
for owner in dir_owners[dirname]: num_directories_owned += 1
count = owner_count.get(owner, 0) + 1 if num_directories_owned:
owner_count[owner] = count total_costs_by_owner[owner] = (total_distance /
if count >= max_count: pow(num_directories_owned, 1.75))
max_owner = owner
max_count = count # Return the lowest cost owner. In the case of a tie, pick one randomly.
lowest_cost = min(total_costs_by_owner.itervalues())
# If no more directories have OWNERS, we're done. lowest_cost_owners = filter(
if not max_owner: lambda owner: total_costs_by_owner[owner] == lowest_cost,
break total_costs_by_owner)
return random.Random().choice(lowest_cost_owners)
final_owners.add(max_owner)
# Remove all directories owned by the current owner from the remaining
# list.
for dirname in owned_dirs[max_owner]:
dirs.discard(dirname)
return final_owners

@ -40,6 +40,8 @@ def test_repo():
'/OWNERS': owners_file(owners.EVERYONE), '/OWNERS': owners_file(owners.EVERYONE),
'/base/vlog.h': '', '/base/vlog.h': '',
'/chrome/OWNERS': owners_file(ben, brett), '/chrome/OWNERS': owners_file(ben, brett),
'/chrome/browser/OWNERS': owners_file(brett),
'/chrome/browser/defaults.h': '',
'/chrome/gpu/OWNERS': owners_file(ken), '/chrome/gpu/OWNERS': owners_file(ken),
'/chrome/gpu/gpu_channel.h': '', '/chrome/gpu/gpu_channel.h': '',
'/chrome/renderer/OWNERS': owners_file(peter), '/chrome/renderer/OWNERS': owners_file(peter),
@ -58,7 +60,7 @@ def test_repo():
}) })
class OwnersDatabaseTest(unittest.TestCase): class _BaseTestCase(unittest.TestCase):
def setUp(self): def setUp(self):
self.repo = test_repo() self.repo = test_repo()
self.files = self.repo.files self.files = self.repo.files
@ -73,6 +75,8 @@ class OwnersDatabaseTest(unittest.TestCase):
glob = glob or self.glob glob = glob or self.glob
return owners.Database(root, fopen, os_path, glob) return owners.Database(root, fopen, os_path, glob)
class OwnersDatabaseTest(_BaseTestCase):
def test_constructor(self): def test_constructor(self):
self.assertNotEquals(self.db(), None) self.assertNotEquals(self.db(), None)
@ -205,16 +209,41 @@ class OwnersDatabaseTest(unittest.TestCase):
self.assertRaises(owners.SyntaxErrorInOwnersFile, self.assertRaises(owners.SyntaxErrorInOwnersFile,
self.db().directories_not_covered_by, ['DEPS'], [brett]) self.db().directories_not_covered_by, ['DEPS'], [brett])
def assert_reviewers_for(self, files, expected_reviewers): def assert_syntax_error(self, owners_file_contents):
db = self.db()
self.files['/foo/OWNERS'] = owners_file_contents
self.files['/foo/DEPS'] = ''
try:
db.reviewers_for(['foo/DEPS'])
self.fail() # pragma: no cover
except owners.SyntaxErrorInOwnersFile, e:
self.assertTrue(str(e).startswith('/foo/OWNERS:1'))
def test_syntax_error__unknown_token(self):
self.assert_syntax_error('{}\n')
def test_syntax_error__unknown_set(self):
self.assert_syntax_error('set myfatherisbillgates\n')
def test_syntax_error__bad_email(self):
self.assert_syntax_error('ben\n')
class ReviewersForTest(_BaseTestCase):
def assert_reviewers_for(self, files, *potential_suggested_reviewers):
db = self.db() db = self.db()
self.assertEquals(db.reviewers_for(set(files)), set(expected_reviewers)) suggested_reviewers = db.reviewers_for(set(files))
self.assertTrue(suggested_reviewers in
[set(suggestion) for suggestion in potential_suggested_reviewers])
def test_reviewers_for__basic_functionality(self): def test_reviewers_for__basic_functionality(self):
self.assert_reviewers_for(['chrome/gpu/gpu_channel.h'], self.assert_reviewers_for(['chrome/gpu/gpu_channel.h'],
[brett]) [ken])
def test_reviewers_for__set_noparent_works(self): def test_reviewers_for__set_noparent_works(self):
self.assert_reviewers_for(['content/content.gyp'], [darin]) self.assert_reviewers_for(['content/content.gyp'],
[john],
[darin])
def test_reviewers_for__valid_inputs(self): def test_reviewers_for__valid_inputs(self):
db = self.db() db = self.db()
@ -236,15 +265,16 @@ class OwnersDatabaseTest(unittest.TestCase):
self.assert_reviewers_for([ self.assert_reviewers_for([
'chrome/gpu/gpu_channel.h', 'chrome/gpu/gpu_channel.h',
'content/baz/froboz.h', 'content/baz/froboz.h',
'chrome/renderer/gpu/gpu_channel_host.h'], [brett]) 'chrome/renderer/gpu/gpu_channel_host.h'],
[brett])
def test_reviewers_for__two_owners(self): def test_reviewers_for__two_owners(self):
self.assert_reviewers_for([ self.assert_reviewers_for([
'chrome/gpu/gpu_channel.h', 'chrome/gpu/gpu_channel.h',
'content/content.gyp', 'content/content.gyp',
'content/baz/froboz.h', 'content/baz/froboz.h',
'content/views/pie.h' 'content/views/pie.h'],
], [john, brett]) [ken, john])
def test_reviewers_for__all_files(self): def test_reviewers_for__all_files(self):
self.assert_reviewers_for([ self.assert_reviewers_for([
@ -254,32 +284,111 @@ class OwnersDatabaseTest(unittest.TestCase):
'content/content.gyp', 'content/content.gyp',
'content/bar/foo.cc', 'content/bar/foo.cc',
'content/baz/froboz.h', 'content/baz/froboz.h',
'content/views/pie.h'], [john, brett]) 'content/views/pie.h'],
[peter, ken, john])
def test_reviewers_for__per_file_owners_file(self): def test_reviewers_for__per_file_owners_file(self):
self.files['/content/baz/OWNERS'] = owners_file(lines=[ self.files['/content/baz/OWNERS'] = owners_file(lines=[
'per-file ugly.*=tom@example.com']) 'per-file ugly.*=tom@example.com'])
self.assert_reviewers_for(['content/baz/OWNERS'], [darin]) self.assert_reviewers_for(['content/baz/OWNERS'],
[john],
def assert_syntax_error(self, owners_file_contents): [darin])
db = self.db()
self.files['/foo/OWNERS'] = owners_file_contents
self.files['/foo/DEPS'] = ''
try:
db.reviewers_for(['foo/DEPS'])
self.fail() # pragma: no cover
except owners.SyntaxErrorInOwnersFile, e:
self.assertTrue(str(e).startswith('/foo/OWNERS:1'))
def test_syntax_error__unknown_token(self):
self.assert_syntax_error('{}\n')
def test_syntax_error__unknown_set(self):
self.assert_syntax_error('set myfatherisbillgates\n')
def test_syntax_error__bad_email(self):
self.assert_syntax_error('ben\n')
def test_reviewers_for__per_file(self):
self.files['/content/baz/OWNERS'] = owners_file(lines=[
'per-file ugly.*=tom@example.com'])
self.assert_reviewers_for(['content/baz/ugly.cc'],
[tom])
def test_reviewers_for__two_nested_dirs(self):
# The same owner is listed in two directories (one above the other)
self.assert_reviewers_for(['chrome/browser/defaults.h'],
[brett])
# Here, although either ben or brett could review both files,
# someone closer to the gpu_channel_host.h should also be suggested.
# This also tests that we can handle two suggested reviewers
# with overlapping sets of directories properly.
self.files['/chrome/renderer/gpu/OWNERS'] = owners_file(ken)
self.assert_reviewers_for(['chrome/OWNERS',
'chrome/renderer/gpu/gpu_channel_host.h'],
[ben, ken],
[brett, ken])
class LowestCostOwnersTest(_BaseTestCase):
# Keep the data in the test_lowest_cost_owner* methods as consistent with
# test_repo() where possible to minimize confusion.
def check(self, possible_owners, dirs, *possible_lowest_cost_owners):
suggested_owner = owners.Database.lowest_cost_owner(possible_owners, dirs)
self.assertTrue(suggested_owner in possible_lowest_cost_owners)
def test_one_dir_with_owner(self):
# brett is the only immediate owner for stuff in baz; john is also
# an owner, but further removed. We should always get brett.
self.check({brett: [('content/baz', 1)],
john: [('content/baz', 2)]},
['content/baz'],
brett)
# john and darin are owners for content; the suggestion could be either.
def test_one_dir_with_two_owners(self):
self.check({john: [('content', 1)],
darin: [('content', 1)]},
['content'],
john, darin)
def test_one_dir_with_two_owners_in_parent(self):
# As long as the distance is the same, it shouldn't matter (brett isn't
# listed in this case).
self.check({john: [('content/baz', 2)],
darin: [('content/baz', 2)]},
['content/baz'],
john, darin)
def test_two_dirs_two_owners(self):
# If they both match both dirs, they should be treated equally.
self.check({john: [('content/baz', 2), ('content/bar', 2)],
darin: [('content/baz', 2), ('content/bar', 2)]},
['content/baz', 'content/bar'],
john, darin)
# Here brett is better since he's closer for one of the two dirs.
self.check({brett: [('content/baz', 1), ('content/views', 1)],
darin: [('content/baz', 2), ('content/views', 1)]},
['content/baz', 'content/views'],
brett)
def test_hierarchy(self):
# the choices in these tests are more arbitrary value judgements;
# also, here we drift away from test_repo() to cover more cases.
# Here ben isn't picked, even though he can review both; we prefer
# closer reviewers.
self.check({ben: [('chrome/gpu', 2), ('chrome/renderer', 2)],
ken: [('chrome/gpu', 1)],
peter: [('chrome/renderer', 1)]},
['chrome/gpu', 'chrome/renderer'],
ken, peter)
# Here we always pick ben since he can review either dir as well as
# the others but can review both (giving us fewer total reviewers).
self.check({ben: [('chrome/gpu', 1), ('chrome/renderer', 1)],
ken: [('chrome/gpu', 1)],
peter: [('chrome/renderer', 1)]},
['chrome/gpu', 'chrome/renderer'],
ben)
# However, three reviewers is too many, so ben gets this one.
self.check({ben: [('chrome/gpu', 2), ('chrome/renderer', 2),
('chrome/browser', 2)],
ken: [('chrome/gpu', 1)],
peter: [('chrome/renderer', 1)],
brett: [('chrome/browser', 1)]},
['chrome/gpu', 'chrome/renderer',
'chrome/browser'],
ben)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

Loading…
Cancel
Save