# coding=utf8
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Collection of subprocess wrapper functions.

In theory you shouldn't need anything else in subprocess, or this module failed.
"""

from __future__ import with_statement
import cStringIO
import errno
import logging
import os
import Queue
import subprocess
import sys
import time
import threading


# Constants forwarded from subprocess.
PIPE = subprocess.PIPE
STDOUT = subprocess.STDOUT
# Sends stdout or stderr to os.devnull.
VOID = object()
# Error code when a process was killed because it timed out.
TIMED_OUT = -2001

# Globals.
# Set to True if you somehow need to disable this hack.
SUBPROCESS_CLEANUP_HACKED = False


class CalledProcessError(subprocess.CalledProcessError):
  """Augment the standard exception with more data."""
  def __init__(self, returncode, cmd, cwd, stdout, stderr):
    super(CalledProcessError, self).__init__(returncode, cmd)
    self.stdout = stdout
    self.stderr = stderr
    self.cwd = cwd

  def __str__(self):
    out = 'Command %s returned non-zero exit status %s' % (
        ' '.join(self.cmd), self.returncode)
    if self.cwd:
      out += ' in ' + self.cwd
    return '\n'.join(filter(None, (out, self.stdout, self.stderr)))


class CygwinRebaseError(CalledProcessError):
  """Occurs when cygwin's fork() emulation fails due to rebased dll."""


## Utility functions


def kill_pid(pid):
  """Kills a process by its process id."""
  try:
    # Unable to import 'module'
    # pylint: disable=E1101,F0401
    import signal
    return os.kill(pid, signal.SIGKILL)
  except ImportError:
    pass


def kill_win(process):
  """Kills a process with its windows handle.

  Has no effect on other platforms.
  """
  try:
    # Unable to import 'module'
    # pylint: disable=F0401
    import win32process
    # Access to a protected member _handle of a client class
    # pylint: disable=W0212
    return win32process.TerminateProcess(process._handle, -1)
  except ImportError:
    pass


def add_kill():
  """Adds kill() method to subprocess.Popen for python <2.6"""
  if hasattr(subprocess.Popen, 'kill'):
    return

  if sys.platform == 'win32':
    subprocess.Popen.kill = kill_win
  else:
    subprocess.Popen.kill = lambda process: kill_pid(process.pid)


def hack_subprocess():
  """subprocess functions may throw exceptions when used in multiple threads.

  See http://bugs.python.org/issue1731717 for more information.
  """
  global SUBPROCESS_CLEANUP_HACKED
  if not SUBPROCESS_CLEANUP_HACKED and threading.activeCount() != 1:
    # Only hack if there is ever multiple threads.
    # There is no point to leak with only one thread.
    subprocess._cleanup = lambda: None
    SUBPROCESS_CLEANUP_HACKED = True


def get_english_env(env):
  """Forces LANG and/or LANGUAGE to be English.

  Forces encoding to utf-8 for subprocesses.

  Returns None if it is unnecessary.
  """
  if sys.platform == 'win32':
    return None
  env = env or os.environ

  # Test if it is necessary at all.
  is_english = lambda name: env.get(name, 'en').startswith('en')

  if is_english('LANG') and is_english('LANGUAGE'):
    return None

  # Requires modifications.
  env = env.copy()
  def fix_lang(name):
    if not is_english(name):
      env[name] = 'en_US.UTF-8'
  fix_lang('LANG')
  fix_lang('LANGUAGE')
  return env


class Popen(subprocess.Popen):
  """Wraps subprocess.Popen() with various workarounds.

  - Forces English output since it's easier to parse the stdout if it is always
    in English.
  - Sets shell=True on windows by default. You can override this by forcing
    shell parameter to a value.
  - Adds support for VOID to not buffer when not needed.
  - Adds self.start property.

  Note: Popen() can throw OSError when cwd or args[0] doesn't exist. Translate
  exceptions generated by cygwin when it fails trying to emulate fork().
  """
  def __init__(self, args, **kwargs):
    # Make sure we hack subprocess if necessary.
    hack_subprocess()
    add_kill()

    env = get_english_env(kwargs.get('env'))
    if env:
      kwargs['env'] = env
    if kwargs.get('shell') is None:
      # *Sigh*:  Windows needs shell=True, or else it won't search %PATH% for
      # the executable, but shell=True makes subprocess on Linux fail when it's
      # called with a list because it only tries to execute the first item in
      # the list.
      kwargs['shell'] = bool(sys.platform=='win32')

    if isinstance(args, basestring):
      tmp_str = args
    elif isinstance(args, (list, tuple)):
      tmp_str = ' '.join(args)
    else:
      raise CalledProcessError(None, args, kwargs.get('cwd'), None, None)
    if kwargs.get('cwd', None):
      tmp_str += ';  cwd=%s' % kwargs['cwd']
    logging.debug(tmp_str)

    self.stdout_cb = None
    self.stderr_cb = None
    self.stdout_void = False
    self.stderr_void = False
    def fix(stream):
      if kwargs.get(stream) in (VOID, os.devnull):
        # Replaces VOID with handle to /dev/null.
        # Create a temporary file to workaround python's deadlock.
        # http://docs.python.org/library/subprocess.html#subprocess.Popen.wait
        # When the pipe fills up, it will deadlock this process. Using a real
        # file works around that issue.
        kwargs[stream] = open(os.devnull, 'w')
        setattr(self, stream + '_void', True)
      if callable(kwargs.get(stream)):
        # Callable stdout/stderr should be used only with call() wrappers.
        setattr(self, stream + '_cb', kwargs[stream])
        kwargs[stream] = PIPE

    fix('stdout')
    fix('stderr')

    self.start = time.time()
    self.timeout = None
    self.shell = kwargs.get('shell', None)
    # Silence pylint on MacOSX
    self.returncode = None

    try:
      super(Popen, self).__init__(args, **kwargs)
    except OSError, e:
      if e.errno == errno.EAGAIN and sys.platform == 'cygwin':
        # Convert fork() emulation failure into a CygwinRebaseError().
        raise CygwinRebaseError(
            e.errno,
            args,
            kwargs.get('cwd'),
            None,
            'Visit '
            'http://code.google.com/p/chromium/wiki/CygwinDllRemappingFailure '
            'to learn how to fix this error; you need to rebase your cygwin '
            'dlls')
      # Popen() can throw OSError when cwd or args[0] doesn't exist. Let it go
      # through
      raise

  def _tee_threads(self, input):  # pylint: disable=W0622
    """Does I/O for a process's pipes using threads.

    It's the simplest and slowest implementation. Expect very slow behavior.

    If there is a callback and it doesn't keep up with the calls, the timeout
    effectiveness will be delayed accordingly.
    """
    # Queue of either of <threadname> when done or (<threadname>, data).  In
    # theory we would like to limit to ~64kb items to not cause large memory
    # usage when the callback blocks. It is not done because it slows down
    # processing on OSX10.6 by a factor of 2x, making it even slower than
    # Windows!  Revisit this decision if it becomes a problem, e.g. crash
    # because of memory exhaustion.
    queue = Queue.Queue()
    done = threading.Event()

    def write_stdin():
      try:
        stdin_io = cStringIO.StringIO(input)
        while True:
          data = stdin_io.read(1024)
          if data:
            self.stdin.write(data)
          else:
            self.stdin.close()
            break
      finally:
        queue.put('stdin')

    def _queue_pipe_read(pipe, name):
      """Queues characters read from a pipe into a queue."""
      try:
        while True:
          data = pipe.read(1)
          if not data:
            break
          queue.put((name, data))
      finally:
        queue.put(name)

    def timeout_fn():
      try:
        done.wait(self.timeout)
      finally:
        queue.put('timeout')

    def wait_fn():
      try:
        self.wait()
      finally:
        queue.put('wait')

    # Starts up to 5 threads:
    # Wait for the process to quit
    # Read stdout
    # Read stderr
    # Write stdin
    # Timeout
    threads = {
        'wait': threading.Thread(target=wait_fn),
    }
    if self.timeout is not None:
      threads['timeout'] = threading.Thread(target=timeout_fn)
    if self.stdout_cb:
      threads['stdout'] = threading.Thread(
          target=_queue_pipe_read, args=(self.stdout, 'stdout'))
    if self.stderr_cb:
      threads['stderr'] = threading.Thread(
        target=_queue_pipe_read, args=(self.stderr, 'stderr'))
    if input:
      threads['stdin'] = threading.Thread(target=write_stdin)
    for t in threads.itervalues():
      t.start()

    timed_out = False
    try:
      # This thread needs to be optimized for speed.
      while threads:
        item = queue.get()
        if item[0] is 'stdout':
          self.stdout_cb(item[1])
        elif item[0] is 'stderr':
          self.stderr_cb(item[1])
        else:
          # A thread terminated.
          threads[item].join()
          del threads[item]
          if item == 'wait':
            # Terminate the timeout thread if necessary.
            done.set()
          elif item == 'timeout' and not timed_out and self.poll() is None:
            logging.debug('Timed out after %fs: killing' % self.timeout)
            self.kill()
            timed_out = True
    finally:
      # Stop the threads.
      done.set()
      if 'wait' in threads:
        # Accelerate things, otherwise it would hang until the child process is
        # done.
        logging.debug('Killing child because of an exception')
        self.kill()
      # Join threads.
      for thread in threads.itervalues():
        thread.join()
      if timed_out:
        self.returncode = TIMED_OUT

  def communicate(self, input=None, timeout=None): # pylint: disable=W0221,W0622
    """Adds timeout and callbacks support.

    Returns (stdout, stderr) like subprocess.Popen().communicate().

    - The process will be killed after |timeout| seconds and returncode set to
      TIMED_OUT.
    """
    self.timeout = timeout
    if not self.timeout and not self.stdout_cb and not self.stderr_cb:
      return super(Popen, self).communicate(input)

    if self.timeout and self.shell:
      raise TypeError(
          'Using timeout and shell simultaneously will cause a process leak '
          'since the shell will be killed instead of the child process.')

    stdout = None
    stderr = None
    # Convert to a lambda to workaround python's deadlock.
    # http://docs.python.org/library/subprocess.html#subprocess.Popen.wait
    # When the pipe fills up, it will deadlock this process. Using a thread
    # works around that issue. No need for thread safe function since the call
    # backs are guaranteed to be called from the main thread.
    if self.stdout and not self.stdout_cb and not self.stdout_void:
      stdout = cStringIO.StringIO()
      self.stdout_cb = stdout.write
    if self.stderr and not self.stderr_cb and not self.stderr_void:
      stderr = cStringIO.StringIO()
      self.stderr_cb = stderr.write
    self._tee_threads(input)
    if stdout:
      stdout = stdout.getvalue()
    if stderr:
      stderr = stderr.getvalue()
    return (stdout, stderr)


def communicate(args, timeout=None, **kwargs):
  """Wraps subprocess.Popen().communicate() and add timeout support.

  Returns ((stdout, stderr), returncode).

  - The process will be killed after |timeout| seconds and returncode set to
    TIMED_OUT.
  - Automatically passes stdin content as input so do not specify stdin=PIPE.
  """
  stdin = kwargs.pop('stdin', None)
  if stdin is not None:
    if stdin is VOID:
      kwargs['stdin'] = open(os.devnull, 'r')
      stdin = None
    else:
      assert isinstance(stdin, basestring)
      # When stdin is passed as an argument, use it as the actual input data and
      # set the Popen() parameter accordingly.
      kwargs['stdin'] = PIPE

  proc = Popen(args, **kwargs)
  if stdin not in (None, VOID):
    return proc.communicate(stdin, timeout), proc.returncode
  else:
    return proc.communicate(None, timeout), proc.returncode


def call(args, **kwargs):
  """Emulates subprocess.call().

  Automatically convert stdout=PIPE or stderr=PIPE to VOID.
  In no case they can be returned since no code path raises
  subprocess2.CalledProcessError.
  """
  if kwargs.get('stdout') == PIPE:
    kwargs['stdout'] = VOID
  if kwargs.get('stderr') == PIPE:
    kwargs['stderr'] = VOID
  return communicate(args, **kwargs)[1]


def check_call_out(args, **kwargs):
  """Improved version of subprocess.check_call().

  Returns (stdout, stderr), unlike subprocess.check_call().
  """
  out, returncode = communicate(args, **kwargs)
  if returncode:
    raise CalledProcessError(
        returncode, args, kwargs.get('cwd'), out[0], out[1])
  return out


def check_call(args, **kwargs):
  """Emulate subprocess.check_call()."""
  check_call_out(args, **kwargs)
  return 0


def capture(args, **kwargs):
  """Captures stdout of a process call and returns it.

  Returns stdout.

  - Discards returncode.
  - Blocks stdin by default if not specified since no output will be visible.
  """
  kwargs.setdefault('stdin', VOID)

  # Like check_output, deny the caller from using stdout arg.
  return communicate(args, stdout=PIPE, **kwargs)[0][0]


def check_output(args, **kwargs):
  """Emulates subprocess.check_output().

  Captures stdout of a process call and returns stdout only.

  - Throws if return code is not 0.
  - Works even prior to python 2.7.
  - Blocks stdin by default if not specified since no output will be visible.
  - As per doc, "The stdout argument is not allowed as it is used internally."
  """
  kwargs.setdefault('stdin', VOID)
  if 'stdout' in kwargs:
    raise ValueError('stdout argument not allowed, it will be overridden.')
  return check_call_out(args, stdout=PIPE, **kwargs)[0]