You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			372 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			372 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
 | 
						|
# Use of this source code is governed by a BSD-style license that can be
 | 
						|
# found in the LICENSE file.
 | 
						|
 | 
						|
"""Collection of functions and classes to fix various encoding problems on
 | 
						|
multiple platforms with python.
 | 
						|
"""
 | 
						|
 | 
						|
import codecs
 | 
						|
import locale
 | 
						|
import os
 | 
						|
import sys
 | 
						|
 | 
						|
 | 
						|
# Prevents initializing multiple times.
 | 
						|
_SYS_ARGV_PROCESSED = False
 | 
						|
 | 
						|
 | 
						|
def complain(message):
 | 
						|
  """If any exception occurs in this file, we'll probably try to print it
 | 
						|
  on stderr, which makes for frustrating debugging if stderr is directed
 | 
						|
  to our wrapper. So be paranoid about catching errors and reporting them
 | 
						|
  to sys.__stderr__, so that the user has a higher chance to see them.
 | 
						|
  """
 | 
						|
  print >> sys.__stderr__, (
 | 
						|
      isinstance(message, str) and message or repr(message))
 | 
						|
 | 
						|
 | 
						|
def fix_default_encoding():
 | 
						|
  """Forces utf8 solidly on all platforms.
 | 
						|
 | 
						|
  By default python execution environment is lazy and defaults to ascii
 | 
						|
  encoding.
 | 
						|
 | 
						|
  http://uucode.com/blog/2007/03/23/shut-up-you-dummy-7-bit-python/
 | 
						|
  """
 | 
						|
  if sys.getdefaultencoding() == 'utf-8':
 | 
						|
    return False
 | 
						|
 | 
						|
  # Regenerate setdefaultencoding.
 | 
						|
  reload(sys)
 | 
						|
  # Module 'sys' has no 'setdefaultencoding' member
 | 
						|
  # pylint: disable=E1101
 | 
						|
  sys.setdefaultencoding('utf-8')
 | 
						|
  for attr in dir(locale):
 | 
						|
    if attr[0:3] != 'LC_':
 | 
						|
      continue
 | 
						|
    aref = getattr(locale, attr)
 | 
						|
    try:
 | 
						|
      locale.setlocale(aref, '')
 | 
						|
    except locale.Error:
 | 
						|
      continue
 | 
						|
    try:
 | 
						|
      lang = locale.getlocale(aref)[0]
 | 
						|
    except (TypeError, ValueError):
 | 
						|
      continue
 | 
						|
    if lang:
 | 
						|
      try:
 | 
						|
        locale.setlocale(aref, (lang, 'UTF-8'))
 | 
						|
      except locale.Error:
 | 
						|
        os.environ[attr] = lang + '.UTF-8'
 | 
						|
  try:
 | 
						|
    locale.setlocale(locale.LC_ALL, '')
 | 
						|
  except locale.Error:
 | 
						|
    pass
 | 
						|
  return True
 | 
						|
 | 
						|
 | 
						|
###############################
 | 
						|
# Windows specific
 | 
						|
 | 
						|
 | 
						|
def fix_win_sys_argv(encoding):
 | 
						|
  """Converts sys.argv to 'encoding' encoded string.
 | 
						|
 | 
						|
  utf-8 is recommended.
 | 
						|
 | 
						|
  Works around <http://bugs.python.org/issue2128>.
 | 
						|
  """
 | 
						|
  global _SYS_ARGV_PROCESSED
 | 
						|
  if _SYS_ARGV_PROCESSED:
 | 
						|
    return False
 | 
						|
 | 
						|
  # These types are available on linux but not Mac.
 | 
						|
  # pylint: disable=E0611,F0401
 | 
						|
  from ctypes import byref, c_int, POINTER, windll, WINFUNCTYPE
 | 
						|
  from ctypes.wintypes import LPCWSTR, LPWSTR
 | 
						|
 | 
						|
  # <http://msdn.microsoft.com/en-us/library/ms683156.aspx>
 | 
						|
  GetCommandLineW = WINFUNCTYPE(LPWSTR)(('GetCommandLineW', windll.kernel32))
 | 
						|
  # <http://msdn.microsoft.com/en-us/library/bb776391.aspx>
 | 
						|
  CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(
 | 
						|
      ('CommandLineToArgvW', windll.shell32))
 | 
						|
 | 
						|
  argc = c_int(0)
 | 
						|
  argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))
 | 
						|
  argv = [
 | 
						|
      argv_unicode[i].encode(encoding, 'replace')
 | 
						|
      for i in xrange(0, argc.value)]
 | 
						|
 | 
						|
  if not hasattr(sys, 'frozen'):
 | 
						|
    # If this is an executable produced by py2exe or bbfreeze, then it
 | 
						|
    # will have been invoked directly. Otherwise, unicode_argv[0] is the
 | 
						|
    # Python interpreter, so skip that.
 | 
						|
    argv = argv[1:]
 | 
						|
 | 
						|
    # Also skip option arguments to the Python interpreter.
 | 
						|
    while len(argv) > 0:
 | 
						|
      arg = argv[0]
 | 
						|
      if not arg.startswith(u'-') or arg == u'-':
 | 
						|
        break
 | 
						|
      argv = argv[1:]
 | 
						|
      if arg == u'-m':
 | 
						|
        # sys.argv[0] should really be the absolute path of the
 | 
						|
        # module source, but never mind.
 | 
						|
        break
 | 
						|
      if arg == u'-c':
 | 
						|
        argv[0] = u'-c'
 | 
						|
        break
 | 
						|
  sys.argv = argv
 | 
						|
  _SYS_ARGV_PROCESSED = True
 | 
						|
  return True
 | 
						|
 | 
						|
 | 
						|
def fix_win_codec():
 | 
						|
  """Works around <http://bugs.python.org/issue6058>."""
 | 
						|
  # <http://msdn.microsoft.com/en-us/library/dd317756.aspx>
 | 
						|
  try:
 | 
						|
    codecs.lookup('cp65001')
 | 
						|
    return False
 | 
						|
  except LookupError:
 | 
						|
    codecs.register(
 | 
						|
        lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None)
 | 
						|
    return True
 | 
						|
 | 
						|
 | 
						|
class WinUnicodeOutputBase(object):
 | 
						|
  """Base class to adapt sys.stdout or sys.stderr to behave correctly on
 | 
						|
  Windows.
 | 
						|
 | 
						|
  Setting encoding to utf-8 is recommended.
 | 
						|
  """
 | 
						|
  def __init__(self, fileno, name, encoding):
 | 
						|
    # Corresponding file handle.
 | 
						|
    self._fileno = fileno
 | 
						|
    self.encoding = encoding
 | 
						|
    self.name = name
 | 
						|
 | 
						|
    self.closed = False
 | 
						|
    self.softspace = False
 | 
						|
    self.mode = 'w'
 | 
						|
 | 
						|
  @staticmethod
 | 
						|
  def isatty():
 | 
						|
    return False
 | 
						|
 | 
						|
  def close(self):
 | 
						|
    # Don't really close the handle, that would only cause problems.
 | 
						|
    self.closed = True
 | 
						|
 | 
						|
  def fileno(self):
 | 
						|
    return self._fileno
 | 
						|
 | 
						|
  def flush(self):
 | 
						|
    raise NotImplementedError()
 | 
						|
 | 
						|
  def write(self, text):
 | 
						|
    raise NotImplementedError()
 | 
						|
 | 
						|
  def writelines(self, lines):
 | 
						|
    try:
 | 
						|
      for line in lines:
 | 
						|
        self.write(line)
 | 
						|
    except Exception, e:
 | 
						|
      complain('%s.writelines: %r' % (self.name, e))
 | 
						|
      raise
 | 
						|
 | 
						|
 | 
						|
class WinUnicodeConsoleOutput(WinUnicodeOutputBase):
 | 
						|
  """Output adapter to a Windows Console.
 | 
						|
 | 
						|
  Understands how to use the win32 console API.
 | 
						|
  """
 | 
						|
  def __init__(self, console_handle, fileno, stream_name, encoding):
 | 
						|
    super(WinUnicodeConsoleOutput, self).__init__(
 | 
						|
        fileno, '<Unicode console %s>' % stream_name, encoding)
 | 
						|
    # Handle to use for WriteConsoleW
 | 
						|
    self._console_handle = console_handle
 | 
						|
 | 
						|
    # Loads the necessary function.
 | 
						|
    # These types are available on linux but not Mac.
 | 
						|
    # pylint: disable=E0611,F0401
 | 
						|
    from ctypes import byref, GetLastError, POINTER, windll, WINFUNCTYPE
 | 
						|
    from ctypes.wintypes import BOOL, DWORD, HANDLE, LPWSTR
 | 
						|
    from ctypes.wintypes import LPVOID  # pylint: disable=E0611
 | 
						|
 | 
						|
    self._DWORD = DWORD
 | 
						|
    self._byref = byref
 | 
						|
 | 
						|
    # <http://msdn.microsoft.com/en-us/library/ms687401.aspx>
 | 
						|
    self._WriteConsoleW = WINFUNCTYPE(
 | 
						|
        BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID)(
 | 
						|
            ('WriteConsoleW', windll.kernel32))
 | 
						|
    self._GetLastError = GetLastError
 | 
						|
 | 
						|
  def flush(self):
 | 
						|
    # No need to flush the console since it's immediate.
 | 
						|
    pass
 | 
						|
 | 
						|
  def write(self, text):
 | 
						|
    try:
 | 
						|
      if not isinstance(text, unicode):
 | 
						|
        # Convert to unicode.
 | 
						|
        text = str(text).decode(self.encoding, 'replace')
 | 
						|
      remaining = len(text)
 | 
						|
      while remaining > 0:
 | 
						|
        n = self._DWORD(0)
 | 
						|
        # There is a shorter-than-documented limitation on the length of the
 | 
						|
        # string passed to WriteConsoleW. See
 | 
						|
        # <http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232>.
 | 
						|
        retval = self._WriteConsoleW(
 | 
						|
            self._console_handle, text,
 | 
						|
            min(remaining, 10000),
 | 
						|
            self._byref(n), None)
 | 
						|
        if retval == 0 or n.value == 0:
 | 
						|
          raise IOError(
 | 
						|
              'WriteConsoleW returned %r, n.value = %r, last error = %r' % (
 | 
						|
                retval, n.value, self._GetLastError()))
 | 
						|
        remaining -= n.value
 | 
						|
        if not remaining:
 | 
						|
          break
 | 
						|
        text = text[n.value:]
 | 
						|
    except Exception, e:
 | 
						|
      complain('%s.write: %r' % (self.name, e))
 | 
						|
      raise
 | 
						|
 | 
						|
 | 
						|
class WinUnicodeOutput(WinUnicodeOutputBase):
 | 
						|
  """Output adaptor to a file output on Windows.
 | 
						|
 | 
						|
  If the standard FileWrite function is used, it will be encoded in the current
 | 
						|
  code page. WriteConsoleW() permits writting any character.
 | 
						|
  """
 | 
						|
  def __init__(self, stream, fileno, encoding):
 | 
						|
    super(WinUnicodeOutput, self).__init__(
 | 
						|
        fileno, '<Unicode redirected %s>' % stream.name, encoding)
 | 
						|
    # Output stream
 | 
						|
    self._stream = stream
 | 
						|
 | 
						|
    # Flush right now.
 | 
						|
    self.flush()
 | 
						|
 | 
						|
  def flush(self):
 | 
						|
    try:
 | 
						|
      self._stream.flush()
 | 
						|
    except Exception, e:
 | 
						|
      complain('%s.flush: %r from %r' % (self.name, e, self._stream))
 | 
						|
      raise
 | 
						|
 | 
						|
  def write(self, text):
 | 
						|
    try:
 | 
						|
      if isinstance(text, unicode):
 | 
						|
        # Replace characters that cannot be printed instead of failing.
 | 
						|
        text = text.encode(self.encoding, 'replace')
 | 
						|
      self._stream.write(text)
 | 
						|
    except Exception, e:
 | 
						|
      complain('%s.write: %r' % (self.name, e))
 | 
						|
      raise
 | 
						|
 | 
						|
 | 
						|
def win_handle_is_a_console(handle):
 | 
						|
  """Returns True if a Windows file handle is a handle to a console."""
 | 
						|
  # These types are available on linux but not Mac.
 | 
						|
  # pylint: disable=E0611,F0401
 | 
						|
  from ctypes import byref, POINTER, windll, WINFUNCTYPE
 | 
						|
  from ctypes.wintypes import BOOL, DWORD, HANDLE
 | 
						|
 | 
						|
  FILE_TYPE_CHAR   = 0x0002
 | 
						|
  FILE_TYPE_REMOTE = 0x8000
 | 
						|
  INVALID_HANDLE_VALUE = DWORD(-1).value
 | 
						|
 | 
						|
  # <http://msdn.microsoft.com/en-us/library/ms683167.aspx>
 | 
						|
  GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))(
 | 
						|
      ('GetConsoleMode', windll.kernel32))
 | 
						|
  # <http://msdn.microsoft.com/en-us/library/aa364960.aspx>
 | 
						|
  GetFileType = WINFUNCTYPE(DWORD, DWORD)(('GetFileType', windll.kernel32))
 | 
						|
 | 
						|
  # GetStdHandle returns INVALID_HANDLE_VALUE, NULL, or a valid handle.
 | 
						|
  if handle == INVALID_HANDLE_VALUE or handle is None:
 | 
						|
    return False
 | 
						|
  return (
 | 
						|
      (GetFileType(handle) & ~FILE_TYPE_REMOTE) == FILE_TYPE_CHAR and
 | 
						|
       GetConsoleMode(handle, byref(DWORD())))
 | 
						|
 | 
						|
 | 
						|
def win_get_unicode_stream(stream, excepted_fileno, output_handle, encoding):
 | 
						|
  """Returns a unicode-compatible stream.
 | 
						|
 | 
						|
  This function will return a direct-Console writing object only if:
 | 
						|
  - the file number is the expected console file number
 | 
						|
  - the handle the expected file handle
 | 
						|
  - the 'real' handle is in fact a handle to a console.
 | 
						|
  """
 | 
						|
  old_fileno = getattr(stream, 'fileno', lambda: None)()
 | 
						|
  if old_fileno == excepted_fileno:
 | 
						|
    # These types are available on linux but not Mac.
 | 
						|
    # pylint: disable=E0611,F0401
 | 
						|
    from ctypes import windll, WINFUNCTYPE
 | 
						|
    from ctypes.wintypes import DWORD, HANDLE
 | 
						|
 | 
						|
    # <http://msdn.microsoft.com/en-us/library/ms683231.aspx>
 | 
						|
    GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)(('GetStdHandle', windll.kernel32))
 | 
						|
 | 
						|
    real_output_handle = GetStdHandle(DWORD(output_handle))
 | 
						|
    if win_handle_is_a_console(real_output_handle):
 | 
						|
      # It's a console.
 | 
						|
      return WinUnicodeConsoleOutput(
 | 
						|
          real_output_handle, old_fileno, stream.name, encoding)
 | 
						|
 | 
						|
  # It's something else. Create an auto-encoding stream.
 | 
						|
  return WinUnicodeOutput(stream, old_fileno, encoding)
 | 
						|
 | 
						|
 | 
						|
def fix_win_console(encoding):
 | 
						|
  """Makes Unicode console output work independently of the current code page.
 | 
						|
 | 
						|
  This also fixes <http://bugs.python.org/issue1602>.
 | 
						|
  Credit to Michael Kaplan
 | 
						|
  <http://blogs.msdn.com/b/michkap/archive/2010/04/07/9989346.aspx> and
 | 
						|
  TZOmegaTZIOY
 | 
						|
  <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462>.
 | 
						|
  """
 | 
						|
  if (isinstance(sys.stdout, WinUnicodeOutputBase) or
 | 
						|
      isinstance(sys.stderr, WinUnicodeOutputBase)):
 | 
						|
    return False
 | 
						|
 | 
						|
  try:
 | 
						|
    # SetConsoleCP and SetConsoleOutputCP could be used to change the code page
 | 
						|
    # but it's not really useful since the code here is using WriteConsoleW().
 | 
						|
    # Also, changing the code page is 'permanent' to the console and needs to be
 | 
						|
    # reverted manually.
 | 
						|
    # In practice one needs to set the console font to a TTF font to be able to
 | 
						|
    # see all the characters but it failed for me in practice. In any case, it
 | 
						|
    # won't throw any exception when printing, which is the important part.
 | 
						|
    # -11 and -12 are defined in stdio.h
 | 
						|
    sys.stdout = win_get_unicode_stream(sys.stdout, 1, -11, encoding)
 | 
						|
    sys.stderr = win_get_unicode_stream(sys.stderr, 2, -12, encoding)
 | 
						|
    # TODO(maruel): Do sys.stdin with ReadConsoleW(). Albeit the limitation is
 | 
						|
    # "It doesn't appear to be possible to read Unicode characters in UTF-8
 | 
						|
    # mode" and this appears to be a limitation of cmd.exe.
 | 
						|
  except Exception, e:
 | 
						|
    complain('exception %r while fixing up sys.stdout and sys.stderr' % e)
 | 
						|
  return True
 | 
						|
 | 
						|
 | 
						|
def fix_encoding():
 | 
						|
  """Fixes various encoding problems on all platforms.
 | 
						|
 | 
						|
  Should be called at the very begining of the process.
 | 
						|
  """
 | 
						|
  ret = True
 | 
						|
  if sys.platform == 'win32':
 | 
						|
    ret &= fix_win_codec()
 | 
						|
 | 
						|
  ret &= fix_default_encoding()
 | 
						|
 | 
						|
  if sys.platform == 'win32':
 | 
						|
    encoding = sys.getdefaultencoding()
 | 
						|
    ret &= fix_win_sys_argv(encoding)
 | 
						|
    ret &= fix_win_console(encoding)
 | 
						|
  return ret
 |