From c8069af59488f83054fad89a1c44e880be53305a Mon Sep 17 00:00:00 2001 From: Takuto Ikuta Date: Wed, 9 Jan 2019 06:24:56 +0000 Subject: [PATCH] [ninjalog] fully anonymize build stats before sending We cannot send PII data. This CL tried to remove following data, * build config that can contain build directory path * build directory path * hostname * cwd I use per build uuid instead of per user uuid. Bug: 900161 Change-Id: Id533762749806ad8616b7dc07f21b27dfe632c9a Reviewed-on: https://chromium-review.googlesource.com/c/1369473 Reviewed-by: Shinya Kawanaka Reviewed-by: Fumitoshi Ukai Commit-Queue: Takuto Ikuta --- ninjalog_uploader.py | 51 +++++++++++++++++++++++++++------ ninjalog_uploader_wrapper.py | 14 +++++---- tests/ninjalog_uploader_test.py | 31 +++++++++++++++++++- 3 files changed, 81 insertions(+), 15 deletions(-) diff --git a/ninjalog_uploader.py b/ninjalog_uploader.py index 255d6eb95..843f106a8 100755 --- a/ninjalog_uploader.py +++ b/ninjalog_uploader.py @@ -29,6 +29,14 @@ import sys from third_party import httplib2 +# These build configs affect build performance a lot. +# TODO(tikuta): Add 'blink_symbol_level', 'closure_compile' and +# 'use_jumbo_build'. +WHITELISTED_CONFIGS = ( + 'symbol_level', 'use_goma', 'is_debug', 'is_component_build', 'enable_nacl', + 'host_os', 'host_cpu', 'target_os', 'target_cpu' +) + def IsGoogler(server): """Check whether this script run inside corp network.""" try: @@ -42,8 +50,16 @@ def ParseGNArgs(gn_args): """Parse gn_args as json and return config dictionary.""" configs = json.loads(gn_args) build_configs = {} + for config in configs: - build_configs[config["name"]] = config["current"]["value"] + key = config["name"] + if key not in WHITELISTED_CONFIGS: + continue + if 'current' in config: + build_configs[key] = config['current']['value'] + else: + build_configs[key] = config['default']['value'] + return build_configs def GetBuildTargetFromCommandLine(cmdline): @@ -74,17 +90,34 @@ def GetBuildTargetFromCommandLine(cmdline): return targets +def GetJflag(cmdline): + """Parse cmdline to get flag value for -j""" + + for i in range(len(cmdline)): + if (cmdline[i] == '-j' and i + 1 < len(cmdline) and + cmdline[i+1].isdigit()): + return int(cmdline[i+1]) + + if (cmdline[i].startswith('-j') and + cmdline[i][len('-j'):].isdigit()): + return int(cmdline[i][len('-j'):]) + def GetMetadata(cmdline, ninjalog): - """Get metadata for uploaded ninjalog.""" + """Get metadata for uploaded ninjalog. + + Returned metadata has schema defined in + https://cs.chromium.org?q="type+Metadata+struct+%7B"+file:%5Einfra/go/src/infra/appengine/chromium_build_stats/ninjalog/ + + TODO(tikuta): Collect GOMA_* env var. + """ build_dir = os.path.dirname(ninjalog) build_configs = {} try: - args = ['gn', 'args', build_dir, '--list', '--overrides-only', - '--short', '--json'] + args = ['gn', 'args', build_dir, '--list', '--short', '--json'] if sys.platform == 'win32': # gn in PATH is bat file in windows environment (except cygwin). args = ['cmd', '/c'] + args @@ -101,13 +134,15 @@ def GetMetadata(cmdline, ninjalog): metadata = { 'platform': platform.system(), - 'cwd': build_dir, - 'hostname': socket.gethostname(), 'cpu_core': multiprocessing.cpu_count(), - 'cmdline': cmdline, 'build_configs': build_configs, + 'targets': GetBuildTargetFromCommandLine(cmdline), } + jflag = GetJflag(cmdline) + if jflag is not None: + metadata['jobs'] = jflag + return metadata def GetNinjalog(cmdline): @@ -165,7 +200,7 @@ def main(): g.write('# end of ninja log\n') metadata = GetMetadata(args.cmdline, ninjalog) - logging.info('send metadata: %s', metadata) + logging.info('send metadata: %s', json.dumps(metadata)) g.write(json.dumps(metadata)) h = httplib2.Http() diff --git a/ninjalog_uploader_wrapper.py b/ninjalog_uploader_wrapper.py index 9735201f3..406aa0026 100755 --- a/ninjalog_uploader_wrapper.py +++ b/ninjalog_uploader_wrapper.py @@ -15,7 +15,7 @@ import ninjalog_uploader THIS_DIR = os.path.dirname(__file__) UPLOADER = os.path.join(THIS_DIR, 'ninjalog_uploader.py') CONFIG = os.path.join(THIS_DIR, 'ninjalog.cfg') -VERSION = 1 +VERSION = 2 def LoadConfig(): @@ -40,17 +40,19 @@ def SaveConfig(config): def ShowMessage(countdown): + whitelisted = '\n'.join([' * %s' % config for config in + ninjalog_uploader.WHITELISTED_CONFIGS]) print """ Your ninjalog will be uploaded to build stats server. The uploaded log will be used to analyze user side build performance. The following information will be uploaded with ninjalog. * OS (e.g. Win, Mac or Linux) -* build directory (e.g. /home/foo/chromium/src/out/Release) -* hostname * number of cpu cores of building machine -* cmdline passed to ninja (e.g. ninja -C out/Default -j1024 chrome) -* build config (e.g. use_goma=true, is_component_build=true, etc) +* build targets (e.g. chrome, browser_tests) +* parallelism passed by -j flag +* following build configs +%s Uploading ninjalog will be started after you run autoninja another %d time. @@ -66,7 +68,7 @@ If you have questions about this, please send mail to infra-dev@chromium.org You can find a more detailed explanation in %s -""" % (countdown, __file__, __file__, +""" % (whitelisted, countdown, __file__, __file__, os.path.abspath(os.path.join(THIS_DIR, "ninjalog.README.md"))) diff --git a/tests/ninjalog_uploader_test.py b/tests/ninjalog_uploader_test.py index a80ae08c6..7716794d8 100755 --- a/tests/ninjalog_uploader_test.py +++ b/tests/ninjalog_uploader_test.py @@ -25,7 +25,14 @@ class NinjalogUploaderTest(unittest.TestCase): 'default': {'value': 'false'}, 'name': 'is_component_build' }, - ])), {'is_component_build': 'true'}) + { + 'default': {'value': '"x64"'}, + 'name': 'host_cpu' + }, + ])), { + 'is_component_build': 'true', + 'host_cpu': '"x64"', + }) self.assertEqual(ninjalog_uploader.ParseGNArgs(json.dumps([ { @@ -85,6 +92,28 @@ class NinjalogUploaderTest(unittest.TestCase): self.assertEqual(ninjalog_uploader.GetBuildTargetFromCommandLine( ['ninja', '-C', 'out/Release', 'chrome', 'all']), ['chrome', 'all']) + def test_get_j_flag(self): + self.assertEqual(ninjalog_uploader.GetJflag( + ['ninja']), None) + + self.assertEqual(ninjalog_uploader.GetJflag( + ['ninja','-j', '1000']), 1000) + + self.assertEqual(ninjalog_uploader.GetJflag( + ['ninja','-j', '1000a']), None) + + self.assertEqual(ninjalog_uploader.GetJflag( + ['ninja','-j', 'a']), None) + + self.assertEqual(ninjalog_uploader.GetJflag( + ['ninja','-j1000']), 1000) + + self.assertEqual(ninjalog_uploader.GetJflag( + ['ninja','-ja']), None) + + self.assertEqual(ninjalog_uploader.GetJflag( + ['ninja','-j']), None) + if __name__ == '__main__': unittest.main()