From ae97943bbcac71a0d99efcc340bf85ace0cc9d8e Mon Sep 17 00:00:00 2001 From: Raphael Kubo da Costa Date: Thu, 2 Nov 2017 19:49:57 +0100 Subject: [PATCH] watchlists: Precompile filepath regular expressions before using them. Instead of calling re.search() for every entry in WATCHLIST_DEFINITIONS for every file being processed, create the regular expressions object beforehand when parsing the watchlist file. Processing a Chromium commit with 17k files went down from about 25 minutes to 10 seconds with this change. Bug: 780055 Change-Id: I6493971b67a7466ce8e1e3b28537018a724bbf47 Reviewed-on: https://chromium-review.googlesource.com/751463 Reviewed-by: Aaron Gable Commit-Queue: Raphael Kubo da Costa (rakuco) --- watchlists.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/watchlists.py b/watchlists.py index a6b5582aa..5c23d2c63 100755 --- a/watchlists.py +++ b/watchlists.py @@ -38,6 +38,7 @@ class Watchlists(object): _RULES_FILENAME = _RULES _repo_root = None _defns = {} # Definitions + _path_regexps = {} # Name -> Regular expression mapping _watchlists = {} # name to email mapping def __init__(self, repo_root): @@ -88,6 +89,15 @@ class Watchlists(object): self._defns = defns self._watchlists = watchlists + # Compile the regular expressions ahead of time to avoid creating them + # on-the-fly multiple times per file. + self._path_regexps = {} + for name, rule in defns.iteritems(): + filepath = rule.get('filepath') + if not filepath: + continue + self._path_regexps[name] = re.compile(filepath) + # Verify that all watchlist names are defined for name in watchlists: if name not in defns: @@ -105,13 +115,10 @@ class Watchlists(object): watchers = set() # A set, to avoid duplicates for path in paths: path = path.replace(os.sep, '/') - for name, rule in self._defns.iteritems(): + for name, rule in self._path_regexps.iteritems(): if name not in self._watchlists: continue - rex_str = rule.get('filepath') - if not rex_str: - continue - if re.search(rex_str, path): + if rule.search(path): map(watchers.add, self._watchlists[name]) return list(watchers)