Fix git diff calls when a lot of files modified in a branch

Running a command line longer that 32k on Windows lead to issues (as
usual). We run git diff in multiple scenarios on a list of changed
files. This list is not partitioned and may lead to a command line
much longer than OS can allow.

This change replaces BuildGitDiffCmd with RunGitDiffCmd that splits
the file list internally and runs git diff multiple times if required.

Change-Id: Icfd8efc7520105b8d98125e90bb96c2bb9bd3a0a
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5613673
Reviewed-by: Bruce Dawson <brucedawson@chromium.org>
Commit-Queue: Aleksei Khoroshilov <akhoroshilov@brave.com>
Reviewed-by: Gavin Mak <gavinmak@google.com>
changes/73/5613673/10
Aleksey Khoroshilov 12 months ago committed by LUCI CQ
parent e757e08663
commit c0883c509e

@ -644,11 +644,10 @@ def _ComputeFormatDiffLineRanges(files, upstream_commit, expand=0):
return {}
# Take the git diff and find the line ranges where there are changes.
diff_cmd = BuildGitDiffCmd(['-U0'],
upstream_commit,
files,
allow_prefix=True)
diff_output = RunGit(diff_cmd)
diff_output = RunGitDiffCmd(['-U0'],
upstream_commit,
files,
allow_prefix=True)
pattern = r'(?:^diff --git a/(?:.*) b/(.*))|(?:^@@.*\+(.*) @@)'
# 2 capture groups
@ -6146,8 +6145,34 @@ def CMDowners(parser, args):
ignore_author=options.ignore_self).run()
def BuildGitDiffCmd(diff_type, upstream_commit, args, allow_prefix=False):
"""Generates a diff command."""
def _SplitArgsByCmdLineLimit(args):
"""Splits a list of arguments into shorter lists that fit within the command
line limit."""
# The maximum command line length is 32768 characters on Windows and 2097152
# characters on other platforms. Use a lower limit to be safe.
command_line_limit = 30000 if sys.platform.startswith('win32') else 2000000
batch_args = []
batch_length = 0
for arg in args:
# Add 1 to account for the space between arguments.
arg_length = len(arg) + 1
# If the current argument is too long to fit in a single command line,
# split it into smaller parts.
if batch_length + arg_length > command_line_limit and batch_args:
yield batch_args
batch_args = []
batch_length = 0
batch_args.append(arg)
batch_length += arg_length
if batch_args:
yield batch_args
def RunGitDiffCmd(diff_type, upstream_commit, files, allow_prefix=False):
"""Generates and runs diff command."""
# Generate diff for the current branch's changes.
diff_cmd = ['-c', 'core.quotePath=false', 'diff', '--no-ext-diff']
@ -6161,14 +6186,18 @@ def BuildGitDiffCmd(diff_type, upstream_commit, args, allow_prefix=False):
diff_cmd += diff_type
diff_cmd += [upstream_commit, '--']
if args:
for arg in args:
if arg == '-' or os.path.isdir(arg) or os.path.isfile(arg):
diff_cmd.append(arg)
else:
DieWithError('Argument "%s" is not a file or a directory' % arg)
if not files:
return RunGit(diff_cmd)
for file in files:
if file != '-' and not os.path.isdir(file) and not os.path.isfile(file):
DieWithError('Argument "%s" is not a file or a directory' % file)
output = ''
for files_batch in _SplitArgsByCmdLineLimit(files):
output += RunGit(diff_cmd + files_batch)
return diff_cmd
return output
def _RunClangFormatDiff(opts, clang_diff_files, top_dir, upstream_commit):
@ -6212,8 +6241,7 @@ def _RunClangFormatDiff(opts, clang_diff_files, top_dir, upstream_commit):
if not opts.dry_run and not opts.diff:
cmd.append('-i')
diff_cmd = BuildGitDiffCmd(['-U0'], upstream_commit, clang_diff_files)
diff_output = RunGit(diff_cmd).encode('utf-8')
diff_output = RunGitDiffCmd(['-U0'], upstream_commit, clang_diff_files)
env = os.environ.copy()
env['PATH'] = (str(os.path.dirname(clang_format_tool)) + os.pathsep +
@ -6223,7 +6251,7 @@ def _RunClangFormatDiff(opts, clang_diff_files, top_dir, upstream_commit):
# to die with an error if `error_ok` is not set.
stdout = RunCommand(cmd,
error_ok=True,
stdin=diff_output,
stdin=diff_output.encode(),
cwd=top_dir,
env=env,
shell=sys.platform.startswith('win32'))
@ -6276,8 +6304,7 @@ def _RunGoogleJavaFormat(opts, paths, top_dir, upstream_commit):
# If --diff is passed, google-java-format will output formatted content.
# Diff it with the existing file in the checkout and output the result.
if opts.diff:
diff_cmd = BuildGitDiffCmd(['-U3'], '--no-index', [path, '-'])
stdout = RunGit(diff_cmd, stdin=stdout.encode(), **kwds)
stdout = RunGitDiffCmd(['-U3'], '--no-index', [path, '-'])
return stdout
results = []
@ -6602,11 +6629,11 @@ def CMDformat(parser, args):
action='store_true',
help='Disable auto-formatting of .java')
opts, args = parser.parse_args(args)
opts, files = parser.parse_args(args)
# Normalize any remaining args against the current path, so paths relative
# to the current directory are still resolved as expected.
args = [os.path.join(os.getcwd(), arg) for arg in args]
# Normalize files against the current path, so paths relative to the
# current directory are still resolved as expected.
files = [os.path.join(os.getcwd(), file) for file in files]
# git diff generates paths against the root of the repository. Change
# to that directory so clang-format can find files even within subdirs.
@ -6632,9 +6659,8 @@ def CMDformat(parser, args):
'Are you in detached state?')
# Filter out copied/renamed/deleted files
changed_files_cmd = BuildGitDiffCmd(['--name-only', '--diff-filter=crd'],
upstream_commit, args)
diff_output = RunGit(changed_files_cmd)
diff_output = RunGitDiffCmd(['--name-only', '--diff-filter=crd'],
upstream_commit, files)
diff_files = diff_output.splitlines()
if opts.js:

Loading…
Cancel
Save