From 470b543fcff84348178977c8f0b5633cfe7d280f Mon Sep 17 00:00:00 2001 From: "maruel@chromium.org" Date: Tue, 11 Oct 2011 18:18:19 +0000 Subject: [PATCH] Stop modifiying requirements out of thread and generate it instead. This fixes GClientSmokeBoth.testMultiSolutionsJobs flakiness by having consistent ordering. Now no out of thread modification is ever done, which result in much saner code. R=dpranke@chromium.org BUG=60725 TEST=tested manually with gclient sync --jobs 100 on chrome and with gclient_smotetest.py Review URL: http://codereview.chromium.org/8174014 git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@104920 0039d316-1c4b-4281-b951-d872f2087c98 --- gclient.py | 110 ++++++++++++++++++++----------------- gclient_utils.py | 12 ---- tests/gclient_smoketest.py | 60 ++++++++++---------- tests/gclient_test.py | 94 +++++++++++++++---------------- 4 files changed, 138 insertions(+), 138 deletions(-) diff --git a/gclient.py b/gclient.py index 22e5f990a..cbba142e6 100644 --- a/gclient.py +++ b/gclient.py @@ -261,33 +261,14 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): if not self.name and self.parent: raise gclient_utils.Error('Dependency without name') - def setup_requirements(self): - """Setup self.requirements and find any other dependency who would have self - as a requirement. - - Returns True if this entry should be added, False if it is a duplicate of - another entry. - """ - if self.name in [s.name for s in self.parent.dependencies]: - raise gclient_utils.Error( - 'The same name "%s" appears multiple times in the deps section' % - self.name) - if self.should_process: - siblings = [d for d in self.root.subtree(False) if d.name == self.name] - for sibling in siblings: - if self.url != sibling.url: - raise gclient_utils.Error( - 'Dependency %s specified more than once:\n %s\nvs\n %s' % - (self.name, sibling.hierarchy(), self.hierarchy())) - # In theory we could keep it as a shadow of the other one. In - # practice, simply ignore it. - logging.warn('Won\'t process duplicate dependency %s' % sibling) - return False - + @property + def requirements(self): + """Calculate the list of requirements.""" + requirements = set() # self.parent is implicitly a requirement. This will be recursive by # definition. if self.parent and self.parent.name: - self.add_requirement(self.parent.name) + requirements.add(self.parent.name) # For a tree with at least 2 levels*, the leaf node needs to depend # on the level higher up in an orderly way. @@ -300,27 +281,48 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): # Interestingly enough, the following condition only works in the case we # want: self is a 2nd level node. 3nd level node wouldn't need this since # they already have their parent as a requirement. - root_deps = self.root.dependencies - if self.parent in root_deps: - for i in root_deps: - if i is self.parent: - break - if i.name: - self.add_requirement(i.name) + if self.parent and self.parent.parent and not self.parent.parent.parent: + requirements |= set(i.name for i in self.root.dependencies if i.name) if isinstance(self.url, self.FromImpl): - self.add_requirement(self.url.module_name) + requirements.add(self.url.module_name) - if self.name and self.should_process: - for obj in self.root.depth_first_tree(): - if obj is self or not obj.name: - continue - # Step 1: Find any requirements self may need. - if self.name.startswith(posixpath.join(obj.name, '')): - self.add_requirement(obj.name) - # Step 2: Find any requirements self may impose. - if obj.name.startswith(posixpath.join(self.name, '')): - obj.add_requirement(self.name) + if self.name: + requirements |= set( + obj.name for obj in self.root.subtree(False) + if (obj is not self + and obj.name and + self.name.startswith(posixpath.join(obj.name, '')))) + requirements = tuple(sorted(requirements)) + logging.info('Dependency(%s).requirements = %s' % (self.name, requirements)) + return requirements + + def verify_validity(self): + """Verifies that this Dependency is fine to add as a child of another one. + + Returns True if this entry should be added, False if it is a duplicate of + another entry. + """ + logging.info('Dependency(%s).verify_validity()' % self.name) + if self.name in [s.name for s in self.parent.dependencies]: + raise gclient_utils.Error( + 'The same name "%s" appears multiple times in the deps section' % + self.name) + if not self.should_process: + # Return early, no need to set requirements. + return True + + # This require a full tree traversal with locks. + siblings = [d for d in self.root.subtree(False) if d.name == self.name] + for sibling in siblings: + if self.url != sibling.url: + raise gclient_utils.Error( + 'Dependency %s specified more than once:\n %s\nvs\n %s' % + (self.name, sibling.hierarchy(), self.hierarchy())) + # In theory we could keep it as a shadow of the other one. In + # practice, simply ignore it. + logging.warn('Won\'t process duplicate dependency %s' % sibling) + return False return True def LateOverride(self, url): @@ -331,10 +333,13 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): assert self.parsed_url == None or not self.should_process, self.parsed_url parsed_url = self.get_custom_deps(self.name, url) if parsed_url != url: - logging.info('LateOverride(%s, %s) -> %s' % (self.name, url, parsed_url)) + logging.info( + 'Dependency(%s).LateOverride(%s) -> %s' % + (self.name, url, parsed_url)) return parsed_url if isinstance(url, self.FromImpl): + # Requires tree traversal. ref = [ dep for dep in self.root.subtree(True) if url.module_name == dep.name ] @@ -355,7 +360,8 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): found_dep = found_deps[0] parsed_url = found_dep.LateOverride(found_dep.url) logging.info( - 'LateOverride(%s, %s) -> %s (From)' % (self.name, url, parsed_url)) + 'Dependency(%s).LateOverride(%s) -> %s (From)' % + (self.name, url, parsed_url)) return parsed_url if isinstance(url, basestring): @@ -374,15 +380,20 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): parsed_url = scm.FullUrlForRelativeUrl(url) else: parsed_url = url - logging.info('LateOverride(%s, %s) -> %s' % (self.name, url, parsed_url)) + logging.info( + 'Dependency(%s).LateOverride(%s) -> %s' % + (self.name, url, parsed_url)) return parsed_url if isinstance(url, self.FileImpl): - logging.info('LateOverride(%s, %s) -> %s (File)' % (self.name, url, url)) + logging.info( + 'Dependency(%s).LateOverride(%s) -> %s (File)' % + (self.name, url, url)) return url if url is None: - logging.info('LateOverride(%s, %s) -> %s' % (self.name, url, url)) + logging.info( + 'Dependency(%s).LateOverride(%s) -> %s' % (self.name, url, url)) return url raise gclient_utils.Error('Unknown url type') @@ -459,8 +470,8 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): def add_dependencies_and_close(self, deps_to_add, hooks): """Adds the dependencies, hooks and mark the parsing as done.""" - for dep in deps_to_add: - if dep.setup_requirements(): + for dep in sorted(deps_to_add, key=lambda x: x.name): + if dep.verify_validity(): self.add_dependency(dep) self._mark_as_parsed(hooks) @@ -505,6 +516,7 @@ class Dependency(gclient_utils.WorkItem, DependencySettings): # pylint: disable=W0221 def run(self, revision_overrides, command, args, work_queue, options): """Runs |command| then parse the DEPS file.""" + logging.info('Dependency(%s).run()' % self.name) assert self._file_list == [] if not self.should_process: return diff --git a/gclient_utils.py b/gclient_utils.py index 958034dfc..235d41458 100644 --- a/gclient_utils.py +++ b/gclient_utils.py @@ -479,13 +479,10 @@ def lockedmethod(method): class WorkItem(object): """One work item.""" def __init__(self, name): - # A list of string, each being a WorkItem name. - self._requirements = set() # A unique string representing this work item. self._name = name self.lock = threading.RLock() - @lockedmethod def run(self, work_queue): """work_queue is passed as keyword argument so it should be the last parameters of the function when you override it.""" @@ -495,15 +492,6 @@ class WorkItem(object): def name(self): return self._name - @property - @lockedmethod - def requirements(self): - return tuple(self._requirements) - - @lockedmethod - def add_requirement(self, new): - self._requirements.add(new) - class ExecutionQueue(object): """Runs a set of WorkItem that have interdependencies and were WorkItem are diff --git a/tests/gclient_smoketest.py b/tests/gclient_smoketest.py index b8673236b..b88209379 100755 --- a/tests/gclient_smoketest.py +++ b/tests/gclient_smoketest.py @@ -53,25 +53,28 @@ class GClientSmokeBase(FakeReposTestBase): return (stdout.replace('\r\n', '\n'), stderr.replace('\r\n', '\n'), process.returncode) + def untangle(self, stdout): + tasks = {} + remaining = [] + for line in stdout.splitlines(False): + m = re.match(r'^(\d)+>(.*)$', line) + if not m: + remaining.append(line) + else: + self.assertEquals([], remaining) + tasks.setdefault(int(m.group(1)), []).append(m.group(2)) + out = [] + for key in sorted(tasks.iterkeys()): + out.extend(tasks[key]) + out.extend(remaining) + return '\n'.join(out) + def parseGclient(self, cmd, items, expected_stderr='', untangle=False): """Parse gclient's output to make it easier to test. If untangle is True, tries to sort out the output from parallel checkout.""" (stdout, stderr, returncode) = self.gclient(cmd) if untangle: - tasks = {} - remaining = [] - for line in stdout.splitlines(False): - m = re.match(r'^(\d)+>(.*)$', line) - if not m: - remaining.append(line) - else: - self.assertEquals([], remaining) - tasks.setdefault(int(m.group(1)), []).append(m.group(2)) - out = [] - for key in sorted(tasks.iterkeys()): - out.extend(tasks[key]) - out.extend(remaining) - stdout = '\n'.join(out) + stdout = self.untangle(stdout) self.checkString(expected_stderr, stderr) self.assertEquals(0, returncode) return self.checkBlock(stdout, items) @@ -610,17 +613,17 @@ class GClientSmokeSVN(GClientSmokeBase): out = self.parseGclient( ['status', '--deps', 'mac', '--verbose', '--jobs', '1'], [['running', join(self.root_dir, 'src')], - ['running', join(self.root_dir, 'src', 'third_party', 'fpp')], ['running', join(self.root_dir, 'src', 'other')], + ['running', join(self.root_dir, 'src', 'third_party', 'fpp')], ['running', join(self.root_dir, 'src', 'third_party', 'prout')]]) out = self.svnBlockCleanup(out) self.checkString('other', out[0][1]) self.checkString(join('third_party', 'fpp'), out[0][2]) self.checkString(join('third_party', 'prout'), out[0][3]) - self.checkString('hi', out[2][1]) + self.checkString('hi', out[1][1]) self.assertEquals(4, len(out[0])) - self.assertEquals(1, len(out[1])) - self.assertEquals(2, len(out[2])) + self.assertEquals(2, len(out[1])) + self.assertEquals(1, len(out[2])) self.assertEquals(1, len(out[3])) self.assertEquals(4, len(out)) @@ -1048,11 +1051,6 @@ class GClientSmokeBoth(GClientSmokeBase): self.assertTree(tree) def testMultiSolutionsJobs(self): - print >> sys.stderr, ( - 'Warning: testMultiSolutionsJobs is temporarily disabled') - return - # unreachable code - # pylint: disable=W0101 if not self.enabled: return self.gclient(['config', '--spec', @@ -1061,14 +1059,14 @@ class GClientSmokeBoth(GClientSmokeBase): ' "url": "' + self.svn_base + 'trunk/src/"},' '{"name": "src-git",' '"url": "' + self.git_base + 'repo_1"}]']) - self.parseGclient(['sync', '--deps', 'mac', '--jobs', '8'], - ['running', 'running', 'running', - # This is due to the way svn update is called for a single - # file when File() is used in a DEPS file. - ('running', self.root_dir + '/src/file/other'), - 'running', 'running', 'running', 'running', 'running', 'running', - 'running', 'running'], - untangle=True) + # There is no guarantee that the ordering will be consistent. + (stdout, stderr, returncode) = self.gclient( + ['sync', '--deps', 'mac', '--jobs', '8']) + stdout = self.untangle(stdout) + self.checkString('', stderr) + self.assertEquals(0, returncode) + results = self.splitBlock(stdout) + self.assertEquals(12, len(results)) tree = self.mangle_git_tree(('repo_1@2', 'src-git'), ('repo_2@1', 'src/repo2'), ('repo_3@2', 'src/repo2/repo_renamed')) diff --git a/tests/gclient_test.py b/tests/gclient_test.py index 764970a4a..e547c8a6e 100755 --- a/tests/gclient_test.py +++ b/tests/gclient_test.py @@ -72,29 +72,22 @@ class GclientTest(trial_dir.TestCase): return SCMMock(self, parsed_url) def testDependencies(self): - self._dependencies('1', False) - - def testDependenciesReverse(self): - self._dependencies('1', True) + self._dependencies('1') def testDependenciesJobs(self): - # TODO(maruel): Reenable once parallel processing works. - #self._dependencies('1000', False) - pass + self._dependencies('1000') + + def _dependencies(self, jobs): + """Verifies that dependencies are processed in the right order. - def testDependenciesJobsReverse(self): - # TODO(maruel): Reenable once parallel processing works. - #self._dependencies('1000', True) - pass + e.g. if there is a dependency 'src' and another 'src/third_party/bar', that + bar isn't fetched until 'src' is done. + Also test that a From() dependency should not be processed when it is listed + as a requirement. - def _dependencies(self, jobs, reverse): - # Verify that dependencies are processed in the right order, e.g. if there - # is a dependency 'src' and another 'src/third_party/bar', that bar isn't - # fetched until 'src' is done. - # jobs is the number of parallel jobs simulated. reverse is to reshuffle the - # list to see if it is still processed in order correctly. - # Also test that a From() dependency that should not be processed is listed - # as a requirement. + Args: + |jobs| is the number of parallel jobs simulated. + """ parser = gclient.Parser() options, args = parser.parse_args(['--jobs', jobs]) write( @@ -117,6 +110,7 @@ class GclientTest(trial_dir.TestCase): write( os.path.join('bar', 'DEPS'), 'deps = {\n' + # There is two foo/dir1/dir2. This one is fetched as bar/dir1/dir2. ' "foo/dir1/dir2": "/dir1/dir2",\n' '}') write( @@ -129,6 +123,7 @@ class GclientTest(trial_dir.TestCase): 'deps = {\n' # This one should not be fetched or set as a requirement. ' "foo/dir1/dir2/dir5": "svn://example.com/x",\n' + # This foo/dir1/dir2 points to a different url than the one in bar. ' "foo/dir1/dir2": "/dir1/another",\n' '}') @@ -136,48 +131,55 @@ class GclientTest(trial_dir.TestCase): self._check_requirements(obj.dependencies[0], {}) self._check_requirements(obj.dependencies[1], {}) obj.RunOnDeps('None', args) - # The trick here is to manually process the list to make sure it's out of - # order. - for i in obj.dependencies: - # pylint: disable=W0212 - i._dependencies.sort(key=lambda x: x.name, reverse=reverse) actual = self._get_processed() - # We don't care of the ordering of these items: - self.assertEquals( - ['svn://example.com/bar', 'svn://example.com/foo'], sorted(actual[0:2])) - actual = actual[2:] - # Ordering may not be exact in case of parallel jobs. - self.assertTrue( - actual.index('svn://example.com/bar/dir1/dir2') > - actual.index('svn://example.com/foo/dir1')) - actual.remove('svn://example.com/bar/dir1/dir2') - - # Ordering may not be exact in case of parallel jobs. - actual.remove('svn://example.com/bar_empty') + first_3 = [ + 'svn://example.com/bar', + 'svn://example.com/bar_empty', + 'svn://example.com/foo', + ] + if jobs != 1: + # We don't care of the ordering of these items except that bar must be + # before bar/empty. + self.assertTrue( + actual.index('svn://example.com/bar') < + actual.index('svn://example.com/bar_empty')) + self.assertEquals(first_3, sorted(actual[0:3])) + else: + self.assertEquals(first_3, actual[0:3]) self.assertEquals( [ 'svn://example.com/foo/dir1', + 'svn://example.com/bar/dir1/dir2', 'svn://example.com/foo/dir1/dir2/dir3', 'svn://example.com/foo/dir1/dir2/dir3/dir4', - # TODO(maruel): This is probably wrong. 'svn://example.com/foo/dir1/dir2/dir3/dir4/dir1/another', ], - actual) + actual[3:]) + self.assertEquals(3, len(obj.dependencies)) + self.assertEquals('bar', obj.dependencies[0].name) + self.assertEquals('bar/empty', obj.dependencies[1].name) + self.assertEquals('foo', obj.dependencies[2].name) self._check_requirements( obj.dependencies[0], { - 'foo/dir1': ['foo'], - 'foo/dir1/dir2/dir3': ['foo', 'foo/dir1', 'foo/dir1/dir2'], - 'foo/dir1/dir2/dir3/dir4': - ['foo', 'foo/dir1', 'foo/dir1/dir2', 'foo/dir1/dir2/dir3'], - 'foo/dir1/dir2/dir5/dir6': - ['foo', 'foo/dir1', 'foo/dir1/dir2', 'foo/dir1/dir2/dir3/dir4'], + 'foo/dir1/dir2': ['bar', 'bar/empty', 'foo', 'foo/dir1'], }) self._check_requirements( obj.dependencies[1], + {}) + self._check_requirements( + obj.dependencies[2], { - 'foo/dir1/dir2': ['bar', 'foo', 'foo/dir1'], + 'foo/dir1': ['bar', 'bar/empty', 'foo'], + 'foo/dir1/dir2/dir3': + ['bar', 'bar/empty', 'foo', 'foo/dir1', 'foo/dir1/dir2'], + 'foo/dir1/dir2/dir3/dir4': + [ 'bar', 'bar/empty', 'foo', 'foo/dir1', 'foo/dir1/dir2', + 'foo/dir1/dir2/dir3'], + 'foo/dir1/dir2/dir5/dir6': + [ 'bar', 'bar/empty', 'foo', 'foo/dir1', 'foo/dir1/dir2', + 'foo/dir1/dir2/dir3/dir4'], }) self._check_requirements( obj, @@ -243,7 +245,7 @@ class GclientTest(trial_dir.TestCase): # pylint: disable=W0212 obj.dependencies[0]._file_list.append('foo') str_obj = str(obj) - self.assertEquals(472, len(str_obj), '%d\n%s' % (len(str_obj), str_obj)) + self.assertEquals(471, len(str_obj), '%d\n%s' % (len(str_obj), str_obj)) if __name__ == '__main__':