From 7872beb4896330a06038248cbac312da29304bcc Mon Sep 17 00:00:00 2001 From: Bruce Dawson Date: Tue, 25 Aug 2020 21:10:31 +0000 Subject: [PATCH] Shorten hash to avoid MAX_PATH limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Windows SDK contains some very long paths. When those are added to the 40-character hash directory and the other path components it is easy to hit the Windows 260-character MAX_PATH limit. In addition, the 40 character hash makes paths unwieldy in VsChromium search results and elsewhere. A ten character hash should be more than enough to avoid collisions - if we upload a million toolchain packages then there will be about a 50% chance of a name collision - we won't upload more than a thousand. This was tested by copying the current toolchain file on Google storage to a truncated name and then changing to that hash in vs_toolchain.py. It was also necessary to copy the updates from depot_tools to third_party\depot_tools. This means that we can't actually start using short hashes until depot_tools has rolled. We probably won't use a shorter hash until we next roll the toolchain, just because changing the toolchain hash is mildly disruptive. Bug: 1120785 Change-Id: I878b058857cbe9cb72a72b535864404eede33f3f Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/2376030 Reviewed-by: Sébastien Marchand Commit-Queue: Bruce Dawson --- win_toolchain/get_toolchain_if_necessary.py | 6 +++++- win_toolchain/package_from_installed.py | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/win_toolchain/get_toolchain_if_necessary.py b/win_toolchain/get_toolchain_if_necessary.py index fb8a87adb..a5956055e 100755 --- a/win_toolchain/get_toolchain_if_necessary.py +++ b/win_toolchain/get_toolchain_if_necessary.py @@ -183,8 +183,12 @@ def CalculateHash(root, expected_hash): digest.update(f.read()) # Save the timestamp file if the calculated hash is the expected one. - if digest.hexdigest() == expected_hash: + # The expected hash may be shorter, to reduce path lengths, in which case just + # compare that many characters. + if expected_hash and digest.hexdigest()[:len(expected_hash)] == expected_hash: SaveTimestampsAndHash(root, digest.hexdigest()) + # Return the (potentially truncated) expected_hash. + return expected_hash return digest.hexdigest() diff --git a/win_toolchain/package_from_installed.py b/win_toolchain/package_from_installed.py index 6794f955b..d3c3f23f4 100644 --- a/win_toolchain/package_from_installed.py +++ b/win_toolchain/package_from_installed.py @@ -424,6 +424,10 @@ def RenameToSha1(output): zf.extractall(rel_dir) print('Hashing...') sha1 = get_toolchain_if_necessary.CalculateHash(rel_dir, None) + # Shorten from forty characters to ten. This is still enough to avoid + # collisions, while being less unwieldy and reducing the risk of MAX_PATH + # failures. + sha1 = sha1[:10] os.chdir(old_dir) shutil.rmtree(tempdir) final_name = sha1 + '.zip'