From 5a9ff4355446e8aa36dc06d5fa8fcf5596b68ce9 Mon Sep 17 00:00:00 2001 From: Edward Lemur Date: Tue, 30 Oct 2018 19:00:22 +0000 Subject: [PATCH] git-cl: Report Gerrit RPC requests information. Bug: 897394 Change-Id: I055e844299e262be81d5ac52ef24571b8fdfd47c Reviewed-on: https://chromium-review.googlesource.com/c/1292245 Commit-Queue: Edward Lesmes Reviewed-by: Andy Perelson Reviewed-by: Andrii Shyshkalov --- gerrit_util.py | 10 ++++++++++ metrics.README.md | 27 ++++++++++++++++++++++++++- metrics_utils.py | 16 +++++++++++++--- tests/metrics_test.py | 2 ++ 4 files changed, 51 insertions(+), 4 deletions(-) diff --git a/gerrit_util.py b/gerrit_util.py index 3003421edd..ef9b1022e8 100644 --- a/gerrit_util.py +++ b/gerrit_util.py @@ -30,6 +30,8 @@ from multiprocessing.pool import ThreadPool import auth import gclient_utils +import metrics +import metrics_utils import subprocess2 from third_party import httplib2 @@ -425,8 +427,16 @@ def ReadHttpResponse(conn, accept_statuses=frozenset([200])): sleep_time = 1.5 failed = False for idx in range(TRY_LIMIT): + before_response = time.time() response, contents = conn.request(**conn.req_params) + response_time = time.time() - before_response + metrics.collector.add_repeated( + 'http_requests', + metrics_utils.extract_http_metrics( + conn.req_params['uri'], conn.req_params['method'], response.status, + response_time)) + # Check if this is an authentication issue. www_authenticate = response.get('www-authenticate') if (response.status in (httplib.UNAUTHORIZED, httplib.FOUND) and diff --git a/metrics.README.md b/metrics.README.md index 04674d6598..a7aa43322d 100644 --- a/metrics.README.md +++ b/metrics.README.md @@ -28,12 +28,16 @@ First, some words about what data we are **NOT** collecting: - We won't record any information that identifies you personally. - We won't record the command line flag values. - We won't record information about the current directory or environment flags. +- We won't record arbitrary strings. We only collect a string if it is in the + list available at + https://chromium.googlesource.com/infra/infra/+/master/go/src/infra/appengine/depot_tools_metrics/metrics/metrics_schema.json#45 The metrics we're collecting are: - A timestamp, with a week resolution. - The age of your depot\_tools checkout, with a week resolution. - Your version of Python (in the format major.minor.micro). +- Your version of Git (in the format major.minor.micro). - The OS of your machine (i.e. win, linux or mac). - The arch of your machine (e.g. x64, arm, etc). - The command that you ran (e.g. `gclient sync`). @@ -45,8 +49,29 @@ The metrics we're collecting are: fetch using depot\_tools' fetch command (e.g. Chromium, WebRTC, V8, etc) - The age of your project checkout, with a week resolution. - What features are you using in your DEPS and .gclient files. For example: - - Are you setting `use\_relative\_paths=True`? + - Are you setting `use_relative_paths=True`? - Are you using `recursedeps`? +- Information about the http requests that depot_tools makes: + - What host are we making the request to? + Only collected for well known hosts like chromium-review.googlesource.com. + - What path did we access on the server? + We map the path to an enum to make sure we're not collecting PII. + i.e. we report 'changes/' instead of 'changes/12345' + - What arguments were used on the request? + We collect only known argument names, but not their values. + - What known headers were present or absent? + - How long did the execution take? + - What was the response code? + - What HTTP method was used? (i.e. GET, PUT, POST, etc.) +- Information about the commands that depot_tools runs: + - What command was executed? (i.e. git or cipd) + - How long did the command execute for? + - What argument names (but not values) were passed to the program. + (e.g. --checkout but not the branch name). + - What was the exit code? + +The list of all known strings we collect can be found at +https://chromium.googlesource.com/infra/infra/+/master/go/src/infra/appengine/depot_tools_metrics/metrics/metrics_schema.json#45 ## Why am I seeing this message *again*? diff --git a/metrics_utils.py b/metrics_utils.py index 7cff588107..920934486d 100644 --- a/metrics_utils.py +++ b/metrics_utils.py @@ -17,7 +17,7 @@ from third_party import colorama # Current version of metrics recording. # When we add new metrics, the version number will be increased, we display the # user what has changed, and ask the user to agree again. -CURRENT_VERSION = 0 +CURRENT_VERSION = 1 APP_URL = 'https://cit-cli-metrics.appspot.com' @@ -34,18 +34,28 @@ NOTICE_COLLECTION_HEADER = ( ) NOTICE_VERSION_CHANGE_HEADER = ( '*****************************************************\n' - '* WE ARE COLLECTING ADDITIONAL METRICS *' + '* WE ARE COLLECTING ADDITIONAL METRICS *\n' + '* *\n' + '* Please review the changes and opt-in again. *' ) NOTICE_FOOTER = ( '* For more information, and for how to disable this *\n' '* message, please see metrics.README.md in your *\n' - '* depot_tools checkout. *\n' + '* depot_tools checkout or visit *\n' + '* https://bit.ly/2ufRS4p. *\n' '*****************************************************\n' ) CHANGE_NOTICE = { # No changes for version 0 0: '', + 1: ('* We want to collect the Git version. *\n' + '* We want to collect information about the HTTP *\n' + '* requests that depot_tools makes, and the git and *\n' + '* cipd commands it executes. *\n' + '* *\n' + '* We only collect known strings to make sure we *\n' + '* don\'t record PII. *') } diff --git a/tests/metrics_test.py b/tests/metrics_test.py index 3c33d1e2d1..feba7a3c2c 100644 --- a/tests/metrics_test.py +++ b/tests/metrics_test.py @@ -40,6 +40,8 @@ class MetricsCollectorTest(unittest.TestCase): self.FileWrite = mock.Mock() self.FileRead = mock.Mock() + # So that we don't have to update the tests everytime we change the version. + mock.patch('metrics.metrics_utils.CURRENT_VERSION', 0).start() mock.patch('metrics.urllib2', self.urllib2).start() mock.patch('metrics.subprocess.Popen', self.Popen).start() mock.patch('metrics.gclient_utils.FileWrite', self.FileWrite).start()