You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
depot_tools/subprocess2.py

539 lines
16 KiB
Python

# coding=utf8
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Collection of subprocess wrapper functions.
In theory you shouldn't need anything else in subprocess, or this module failed.
"""
from __future__ import with_statement
import cStringIO
import errno
import logging
import os
import Queue
import select
import subprocess
import sys
import time
import threading
if sys.platform != 'win32':
import fcntl
# Constants forwarded from subprocess.
PIPE = subprocess.PIPE
STDOUT = subprocess.STDOUT
# Sends stdout or stderr to os.devnull.
VOID = object()
# Error code when a process was killed because it timed out.
TIMED_OUT = -2001
# Globals.
# Set to True if you somehow need to disable this hack.
SUBPROCESS_CLEANUP_HACKED = False
class CalledProcessError(subprocess.CalledProcessError):
"""Augment the standard exception with more data."""
def __init__(self, returncode, cmd, cwd, stdout, stderr):
super(CalledProcessError, self).__init__(returncode, cmd)
self.stdout = stdout
self.stderr = stderr
self.cwd = cwd
def __str__(self):
out = 'Command %s returned non-zero exit status %s' % (
' '.join(self.cmd), self.returncode)
if self.cwd:
out += ' in ' + self.cwd
return '\n'.join(filter(None, (out, self.stdout, self.stderr)))
class CygwinRebaseError(CalledProcessError):
"""Occurs when cygwin's fork() emulation fails due to rebased dll."""
## Utility functions
def kill_pid(pid):
"""Kills a process by its process id."""
try:
# Unable to import 'module'
# pylint: disable=E1101,F0401
import signal
return os.kill(pid, signal.SIGKILL)
except ImportError:
pass
def kill_win(process):
"""Kills a process with its windows handle.
Has no effect on other platforms.
"""
try:
# Unable to import 'module'
# pylint: disable=F0401
import win32process
# Access to a protected member _handle of a client class
# pylint: disable=W0212
return win32process.TerminateProcess(process._handle, -1)
except ImportError:
pass
def add_kill():
"""Adds kill() method to subprocess.Popen for python <2.6"""
if hasattr(subprocess.Popen, 'kill'):
return
if sys.platform == 'win32':
subprocess.Popen.kill = kill_win
else:
subprocess.Popen.kill = lambda process: kill_pid(process.pid)
def hack_subprocess():
"""subprocess functions may throw exceptions when used in multiple threads.
See http://bugs.python.org/issue1731717 for more information.
"""
global SUBPROCESS_CLEANUP_HACKED
if not SUBPROCESS_CLEANUP_HACKED and threading.activeCount() != 1:
# Only hack if there is ever multiple threads.
# There is no point to leak with only one thread.
subprocess._cleanup = lambda: None
SUBPROCESS_CLEANUP_HACKED = True
def get_english_env(env):
"""Forces LANG and/or LANGUAGE to be English.
Forces encoding to utf-8 for subprocesses.
Returns None if it is unnecessary.
"""
if sys.platform == 'win32':
return None
env = env or os.environ
# Test if it is necessary at all.
is_english = lambda name: env.get(name, 'en').startswith('en')
if is_english('LANG') and is_english('LANGUAGE'):
return None
# Requires modifications.
env = env.copy()
def fix_lang(name):
if not is_english(name):
env[name] = 'en_US.UTF-8'
fix_lang('LANG')
fix_lang('LANGUAGE')
return env
def Popen(args, **kwargs):
"""Wraps subprocess.Popen() with various workarounds.
Returns a subprocess.Popen object.
- Forces English output since it's easier to parse the stdout if it is always
in English.
- Sets shell=True on windows by default. You can override this by forcing
shell parameter to a value.
- Adds support for VOID to not buffer when not needed.
Note: Popen() can throw OSError when cwd or args[0] doesn't exist. Translate
exceptions generated by cygwin when it fails trying to emulate fork().
"""
# Make sure we hack subprocess if necessary.
hack_subprocess()
add_kill()
env = get_english_env(kwargs.get('env'))
if env:
kwargs['env'] = env
if kwargs.get('shell') is None:
# *Sigh*: Windows needs shell=True, or else it won't search %PATH% for the
# executable, but shell=True makes subprocess on Linux fail when it's called
# with a list because it only tries to execute the first item in the list.
kwargs['shell'] = bool(sys.platform=='win32')
if isinstance(args, basestring):
tmp_str = args
elif isinstance(args, (list, tuple)):
tmp_str = ' '.join(args)
else:
raise CalledProcessError(None, args, kwargs.get('cwd'), None, None)
if kwargs.get('cwd', None):
tmp_str += '; cwd=%s' % kwargs['cwd']
logging.debug(tmp_str)
def fix(stream):
if kwargs.get(stream) in (VOID, os.devnull):
# Replaces VOID with handle to /dev/null.
# Create a temporary file to workaround python's deadlock.
# http://docs.python.org/library/subprocess.html#subprocess.Popen.wait
# When the pipe fills up, it will deadlock this process. Using a real file
# works around that issue.
kwargs[stream] = open(os.devnull, 'w')
if callable(kwargs.get(stream)):
# Callable stdout/stderr should be used only with call() wrappers.
kwargs[stream] = PIPE
fix('stdout')
fix('stderr')
try:
return subprocess.Popen(args, **kwargs)
except OSError, e:
if e.errno == errno.EAGAIN and sys.platform == 'cygwin':
# Convert fork() emulation failure into a CygwinRebaseError().
raise CygwinRebaseError(
e.errno,
args,
kwargs.get('cwd'),
None,
'Visit '
'http://code.google.com/p/chromium/wiki/CygwinDllRemappingFailure to '
'learn how to fix this error; you need to rebase your cygwin dlls')
# Popen() can throw OSError when cwd or args[0] doesn't exist. Let it go
# through
raise
def _queue_pipe_read(pipe, name, done, dest):
"""Queues characters read from a pipe into a queue.
Left outside the _tee_threads function to not introduce a function closure
to speed up variable lookup.
"""
while not done.isSet():
data = pipe.read(1)
if not data:
break
dest.put((name, data))
dest.put(name)
def _tee_threads(proc, timeout, start, stdin, args, kwargs):
"""Does I/O for a process's pipes using thread.
It's the simplest and slowest implementation. Expect very slow behavior.
If there is a callback and it doesn't keep up with the calls, the timeout
effectiveness will be delayed accordingly.
"""
# TODO(maruel): Implement a select based implementation on POSIX and a Windows
# one using WaitForMultipleObjects().
#
# Queue of either of <threadname> when done or (<threadname>, data).
# In theory we would like to limit to ~64kb items to not cause large memory
# usage when the callback blocks. It is not done because it slows down
# processing on OSX10.6 by a factor of 2x, making it even slower than Windows!
# Revisit this decision if it becomes a problem, e.g. crash because of
# memory exhaustion.
queue = Queue.Queue()
done = threading.Event()
def write_stdin():
stdin_io = cStringIO.StringIO(stdin)
while not done.isSet():
data = stdin_io.read(1024)
if data:
proc.stdin.write(data)
else:
proc.stdin.close()
break
queue.put('stdin')
def timeout_fn():
done.wait(timeout)
# No need to close the pipes since killing should be sufficient.
queue.put('timeout')
# Starts up to 4 threads:
# Read stdout
# Read stderr
# Write stdin
# Timeout
threads = {}
if timeout is not None:
threads['timeout'] = threading.Thread(target=timeout_fn)
if callable(kwargs.get('stdout')):
threads['stdout'] = threading.Thread(
target=_queue_pipe_read, args=(proc.stdout, 'stdout', done, queue))
if callable(kwargs.get('stderr')):
threads['stderr'] = threading.Thread(
target=_queue_pipe_read,
args=(proc.stderr, 'stderr', done, queue))
if isinstance(stdin, str):
threads['stdin'] = threading.Thread(target=write_stdin)
for t in threads.itervalues():
t.daemon = True
t.start()
timed_out = False
try:
while proc.returncode is None:
assert threads
proc.poll()
item = queue.get()
if isinstance(item, str):
threads[item].join()
del threads[item]
if item == 'timeout' and not timed_out and proc.poll() is None:
logging.debug('Timed out: killing')
proc.kill()
timed_out = True
if not threads:
# We won't be waken up anymore. Need to busy loop.
break
else:
kwargs[item[0]](item[1])
finally:
# Stop the threads.
done.set()
# Join threads
for thread in threads.itervalues():
thread.join()
# Flush the queue.
try:
while True:
item = queue.get(False)
if isinstance(item, str):
if item == 'timeout':
# TODO(maruel): Does it make sense at that point?
if not timed_out and proc.poll() is None:
logging.debug('Timed out: killing')
proc.kill()
timed_out = True
else:
kwargs[item[0]](item[1])
except Queue.Empty:
pass
# Get the remainder.
if callable(kwargs.get('stdout')):
data = proc.stdout.read()
while data:
kwargs['stdout'](data)
data = proc.stdout.read()
if callable(kwargs.get('stderr')):
data = proc.stderr.read()
while data:
kwargs['stderr'](data)
data = proc.stderr.read()
if proc.returncode is None:
# Usually happens when killed with timeout but not listening to pipes.
proc.wait()
if timed_out:
return TIMED_OUT
return proc.returncode
def _read_pipe(handles, pipe, out_fn):
"""Reads bytes from a pipe and calls the output callback."""
data = pipe.read()
if not data:
del handles[pipe]
else:
out_fn(data)
def _tee_posix(proc, timeout, start, stdin, args, kwargs):
"""Polls a process and its pipe using select.select().
TODO(maruel): Implement a non-polling method for OSes that support it.
"""
handles_r = {}
if callable(kwargs.get('stdout')):
handles_r[proc.stdout] = lambda: _read_pipe(
handles_r, proc.stdout, kwargs['stdout'])
if callable(kwargs.get('stderr')):
handles_r[proc.stderr] = lambda: _read_pipe(
handles_r, proc.stderr, kwargs['stderr'])
handles_w = {}
if isinstance(stdin, str):
stdin_io = cStringIO.StringIO(stdin)
def write_stdin():
data = stdin_io.read(1)
if data:
proc.stdin.write(data)
else:
del handles_w[proc.stdin]
proc.stdin.close()
handles_w[proc.stdin] = write_stdin
else:
# TODO(maruel): Fix me, it could be VOID.
assert stdin is None
# Make all the file objects of the child process non-blocking file.
# TODO(maruel): Test if a pipe is handed to the child process.
for pipe in (proc.stdin, proc.stdout, proc.stderr):
fileno = pipe and getattr(pipe, 'fileno', lambda: None)()
if fileno:
# Note: making a pipe non-blocking means the C stdio could act wrong. In
# particular, readline() cannot be used. Work around is to use os.read().
fl = fcntl.fcntl(fileno, fcntl.F_GETFL)
fcntl.fcntl(fileno, fcntl.F_SETFL, fl | os.O_NONBLOCK)
timed_out = False
while handles_r or handles_w or (timeout and proc.poll() is None):
period = None
if timeout:
period = max(0, timeout - (time.time() - start))
if not period and not timed_out:
proc.kill()
timed_out = True
if timed_out:
period = 0.001
# It reconstructs objects on each loop, not very efficient.
reads, writes, _, = select.select(
handles_r.keys(), handles_w.keys(), [], period)
for read in reads:
handles_r[read]()
for write in writes:
handles_w[write]()
# No pipe open anymore and if there was a time out, the child process was
# killed already.
proc.wait()
if timed_out:
return TIMED_OUT
return proc.returncode
def communicate(args, timeout=None, **kwargs):
"""Wraps subprocess.Popen().communicate().
Returns ((stdout, stderr), returncode).
- The process will be killed after |timeout| seconds and returncode set to
TIMED_OUT.
- Automatically passes stdin content as input so do not specify stdin=PIPE.
"""
if timeout and kwargs.get('shell'):
raise TypeError(
'Using timeout and shell simultaneously will cause a process leak '
'since the shell will be killed instead of the child process.')
stdin = kwargs.pop('stdin', None)
if stdin is not None:
if stdin is VOID:
kwargs['stdin'] = open(os.devnull, 'r')
stdin = None
else:
assert isinstance(stdin, basestring)
# When stdin is passed as an argument, use it as the actual input data and
# set the Popen() parameter accordingly.
kwargs['stdin'] = PIPE
start = time.time()
proc = Popen(args, **kwargs)
need_buffering = (timeout or
callable(kwargs.get('stdout')) or callable(kwargs.get('stderr')))
if not need_buffering:
# Normal workflow.
if stdin not in (None, VOID):
return proc.communicate(stdin), proc.returncode
else:
return proc.communicate(), proc.returncode
stdout = None
stderr = None
# Convert to a lambda to workaround python's deadlock.
# http://docs.python.org/library/subprocess.html#subprocess.Popen.wait
# When the pipe fills up, it will deadlock this process. Using a thread
# works around that issue. No need for thread safe function since the call
# backs are guaranteed to be called from the main thread.
if kwargs.get('stdout') == PIPE:
stdout = []
kwargs['stdout'] = stdout.append
if kwargs.get('stderr') == PIPE:
stderr = []
kwargs['stderr'] = stderr.append
if sys.platform == 'win32':
# On cygwin, ctypes._FUNCFLAG_STDCALL, which is used by ctypes.WINFUNCTYPE,
# doesn't exist so _tee_win() cannot be used yet.
returncode = _tee_threads(proc, timeout, start, stdin, args, kwargs)
else:
returncode = _tee_posix(proc, timeout, start, stdin, args, kwargs)
if not stdout is None:
stdout = ''.join(stdout)
if not stderr is None:
stderr = ''.join(stderr)
return (stdout, stderr), returncode
def call(args, **kwargs):
"""Emulates subprocess.call().
Automatically convert stdout=PIPE or stderr=PIPE to VOID.
In no case they can be returned since no code path raises
subprocess2.CalledProcessError.
"""
if kwargs.get('stdout') == PIPE:
kwargs['stdout'] = VOID
if kwargs.get('stderr') == PIPE:
kwargs['stderr'] = VOID
return communicate(args, **kwargs)[1]
def check_call_out(args, **kwargs):
"""Improved version of subprocess.check_call().
Returns (stdout, stderr), unlike subprocess.check_call().
"""
out, returncode = communicate(args, **kwargs)
if returncode:
raise CalledProcessError(
returncode, args, kwargs.get('cwd'), out[0], out[1])
return out
def check_call(args, **kwargs):
"""Emulate subprocess.check_call()."""
check_call_out(args, **kwargs)
return 0
def capture(args, **kwargs):
"""Captures stdout of a process call and returns it.
Returns stdout.
- Discards returncode.
- Blocks stdin by default if not specified since no output will be visible.
"""
kwargs.setdefault('stdin', VOID)
# Like check_output, deny the caller from using stdout arg.
return communicate(args, stdout=PIPE, **kwargs)[0][0]
def check_output(args, **kwargs):
"""Emulates subprocess.check_output().
Captures stdout of a process call and returns stdout only.
- Throws if return code is not 0.
- Works even prior to python 2.7.
- Blocks stdin by default if not specified since no output will be visible.
- As per doc, "The stdout argument is not allowed as it is used internally."
"""
kwargs.setdefault('stdin', VOID)
return check_call_out(args, stdout=PIPE, **kwargs)[0]