diff --git a/metrics_utils.py b/metrics_utils.py index 9bdae9e68..7cff58810 100644 --- a/metrics_utils.py +++ b/metrics_utils.py @@ -5,9 +5,11 @@ from __future__ import print_function +import re import scm import subprocess2 import sys +import urlparse from third_party import colorama @@ -68,6 +70,64 @@ KNOWN_PROJECT_URLS = { 'https://webrtc.googlesource.com/src', } +KNOWN_HTTP_HOSTS = { + 'chrome-internal-review.googlesource.com', + 'chromium-review.googlesource.com', + 'dart-review.googlesource.com', + 'eu1-mirror-chromium-review.googlesource.com', + 'pdfium-review.googlesource.com', + 'skia-review.googlesource.com', + 'us1-mirror-chromium-review.googlesource.com', + 'us2-mirror-chromium-review.googlesource.com', + 'us3-mirror-chromium-review.googlesource.com', + 'webrtc-review.googlesource.com', +} + +KNOWN_HTTP_METHODS = { + 'DELETE', + 'GET', + 'PATCH', + 'POST', + 'PUT', +} + +KNOWN_HTTP_PATHS = { + 'accounts': + re.compile(r'(/a)?/accounts/.*'), + 'changes': + re.compile(r'(/a)?/changes/([^/]+)?$'), + 'changes/abandon': + re.compile(r'(/a)?/changes/.*/abandon'), + 'changes/comments': + re.compile(r'(/a)?/changes/.*/comments'), + 'changes/detail': + re.compile(r'(/a)?/changes/.*/detail'), + 'changes/edit': + re.compile(r'(/a)?/changes/.*/edit'), + 'changes/message': + re.compile(r'(/a)?/changes/.*/message'), + 'changes/restore': + re.compile(r'(/a)?/changes/.*/restore'), + 'changes/reviewers': + re.compile(r'(/a)?/changes/.*/reviewers/.*'), + 'changes/revisions/commit': + re.compile(r'(/a)?/changes/.*/revisions/.*/commit'), + 'changes/revisions/review': + re.compile(r'(/a)?/changes/.*/revisions/.*/review'), + 'changes/submit': + re.compile(r'(/a)?/changes/.*/submit'), + 'projects/branches': + re.compile(r'(/a)?/projects/.*/branches/.*'), +} + +KNOWN_HTTP_ARGS = { + 'ALL_REVISIONS', + 'CURRENT_COMMIT', + 'CURRENT_REVISION', + 'DETAILED_ACCOUNTS', + 'LABELS', +} + def get_python_version(): """Return the python version in the major.minor.micro format.""" @@ -92,6 +152,54 @@ def seconds_to_weeks(duration): return int(duration) >> 19 +def extract_http_metrics(request_uri, method, status, response_time): + """Extract metrics from the request URI. + + Extracts the host, path, and arguments from the request URI, and returns them + along with the method, status and response time. + + The host, method, path and arguments must be in the KNOWN_HTTP_* constants + defined above. + + Arguments are the values of the o= url parameter. In Gerrit, additional fields + can be obtained by adding o parameters, each option requires more database + lookups and slows down the query response time to the client, so we make an + effort to collect them. + + The regex defined in KNOWN_HTTP_PATH_RES are checked against the path, and + those that match will be returned. + """ + http_metrics = { + 'status': status, + 'response_time': response_time, + } + + if method in KNOWN_HTTP_METHODS: + http_metrics['method'] = method + + parsed_url = urlparse.urlparse(request_uri) + + if parsed_url.netloc in KNOWN_HTTP_HOSTS: + http_metrics['host'] = parsed_url.netloc + + for name, path_re in KNOWN_HTTP_PATHS.iteritems(): + if path_re.match(parsed_url.path): + http_metrics['path'] = name + break + + parsed_query = urlparse.parse_qs(parsed_url.query) + + # Collect o-parameters from the request. + args = [ + arg for arg in parsed_query.get('o', []) + if arg in KNOWN_HTTP_ARGS + ] + if args: + http_metrics['arguments'] = args + + return http_metrics + + def get_repo_timestamp(path_to_repo): """Get an approximate timestamp for the upstream of |path_to_repo|. diff --git a/tests/metrics_test.py b/tests/metrics_test.py index d80f5aca7..3c33d1e2d 100644 --- a/tests/metrics_test.py +++ b/tests/metrics_test.py @@ -12,6 +12,7 @@ ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, ROOT_DIR) import metrics +import metrics_utils import cStringIO from third_party import mock @@ -592,5 +593,108 @@ class MetricsCollectorTest(unittest.TestCase): self.assert_collects_metrics({'fun': [1, 2, 5]}) +class MetricsUtilsTest(unittest.TestCase): + + def test_extracts_host(self): + """Test that we extract the host from the requested URI.""" + # Regular case + http_metrics = metrics_utils.extract_http_metrics( + 'https://chromium-review.googlesource.com/foo/bar?q=baz', '', 0, 0) + self.assertEqual('chromium-review.googlesource.com', http_metrics['host']) + + # Unexpected host + http_metrics = metrics_utils.extract_http_metrics( + 'https://foo-review.googlesource.com/', '', 0, 0) + self.assertNotIn('host', http_metrics) + + def test_extracts_path(self): + """Test that we extract the matching path from the requested URI.""" + # Regular case + http_metrics = metrics_utils.extract_http_metrics( + 'https://review.example.com/changes/1234/revisions/deadbeef/commit', + '', 0, 0) + self.assertEqual('changes/revisions/commit', http_metrics['path']) + + # No matching paths + http_metrics = metrics_utils.extract_http_metrics( + 'https://review.example.com/changes/1234/unexpected/path', '', 0, 0) + self.assertNotIn('path', http_metrics) + + def test_extracts_path_changes(self): + """Tests that we extract paths for /changes/.""" + # /changes/ + http_metrics = metrics_utils.extract_http_metrics( + 'https://review.example.com/changes/proj%2Fsrc%7Emaster%7EI1234abcd', + '', 0, 0) + self.assertEqual('changes', http_metrics['path']) + + # /changes/?q= + http_metrics = metrics_utils.extract_http_metrics( + 'https://review.example.com/changes/?q=owner:me+OR+cc:me', + '', 0, 0) + self.assertEqual('changes', http_metrics['path']) + + # /changes/# + http_metrics = metrics_utils.extract_http_metrics( + 'https://review.example.com/changes/#something', + '', 0, 0) + self.assertEqual('changes', http_metrics['path']) + + # /changes// does not map to changes. + http_metrics = metrics_utils.extract_http_metrics( + 'https://review.example.com/changes/12345678/message', + '', 0, 0) + self.assertNotEqual('changes', http_metrics['path']) + + def test_extracts_arguments(self): + """Test that we can extract arguments from the requested URI.""" + # Regular case + http_metrics = metrics_utils.extract_http_metrics( + 'https://review.example.com/?q=123&foo=bar&o=ALL_REVISIONS', '', 0, 0) + self.assertEqual(['ALL_REVISIONS'], http_metrics['arguments']) + + # Some unexpected arguments are filtered out. + http_metrics = metrics_utils.extract_http_metrics( + 'https://review.example.com/?o=ALL_REVISIONS&o=LABELS&o=UNEXPECTED', + '', 0, 0) + self.assertEqual(['ALL_REVISIONS', 'LABELS'], http_metrics['arguments']) + + # No valid arguments, so arguments is not present + http_metrics = metrics_utils.extract_http_metrics( + 'https://review.example.com/?o=bar&baz=1', '', 0, 0) + self.assertNotIn('arguments', http_metrics) + + # No valid arguments, so arguments is not present + http_metrics = metrics_utils.extract_http_metrics( + 'https://review.example.com/?foo=bar&baz=1', '', 0, 0) + self.assertNotIn('arguments', http_metrics) + + def test_validates_method(self): + """Test that we validate the HTTP method used.""" + # Regular case + http_metrics = metrics_utils.extract_http_metrics('', 'POST', 0, 0) + self.assertEqual('POST', http_metrics['method']) + + # Unexpected method is not reported + http_metrics = metrics_utils.extract_http_metrics('', 'DEMAND', 0, 0) + self.assertNotIn('method', http_metrics) + + def test_status(self): + """Tests that the response status we passed is returned.""" + http_metrics = metrics_utils.extract_http_metrics('', '', 123, 0) + self.assertEqual(123, http_metrics['status']) + + http_metrics = metrics_utils.extract_http_metrics('', '', 404, 0) + self.assertEqual(404, http_metrics['status']) + + def test_response_time(self): + """Tests that the response time we passed is returned.""" + http_metrics = metrics_utils.extract_http_metrics('', '', 0, 0.25) + self.assertEqual(0.25, http_metrics['response_time']) + + http_metrics = metrics_utils.extract_http_metrics('', '', 0, 12345.25) + self.assertEqual(12345.25, http_metrics['response_time']) + + if __name__ == '__main__': unittest.main()