From 5d284fdf48ab00a873e710c6ab76818d5b54d2cc Mon Sep 17 00:00:00 2001
From: Raul Tambre <raul@tambre.ee>
Date: Mon, 7 Oct 2019 18:11:26 +0000
Subject: [PATCH] gclient_utils: buffer output as bytestrings in Annotated
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In Python 3 byestrings and normal strings can't be concatenated.
To fix this we buffer as bytestrings in the Annotated wrapper.
We can't decode to a string because the output might come byte-by-byte, so it doesn't work with Unicode characters like ✔.

Also had to update gclient_test.py, where double-wrapping stdout with Annotated caused made output not work and include_zero=True working caused other unintended side-effects.

Example error from "fetch chromium":
Traceback (most recent call last):
  File "C:\Google\depot_tools\gclient_scm.py", line 1045, in _Clone
    self._Run(clone_cmd, options, cwd=self._root_dir, retry=True,
  File "C:\Google\depot_tools\gclient_scm.py", line 1370, in _Run
    gclient_utils.CheckCallAndFilter(cmd, env=env, **kwargs)
  File "C:\Google\depot_tools\gclient_utils.py", line 583, in CheckCallAndFilter
    show_header_if_necessary(needs_header, attempt)
  File "C:\Google\depot_tools\gclient_utils.py", line 533, in show_header_if_necessary
    stdout_write(header.encode())
  File "C:\Google\depot_tools\gclient_utils.py", line 391, in write
    obj[0] += out
TypeError: can only concatenate str (not "bytes") to str

Bug: 984182
Change-Id: If7037d30e9faf524f2405258281f6e6cd0bcdcae
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/1778745
Commit-Queue: Edward Lesmes <ehmaldonado@chromium.org>
Reviewed-by: Edward Lesmes <ehmaldonado@chromium.org>
Auto-Submit: Raul Tambre <raul@tambre.ee>
---
 gclient_utils.py            | 26 ++++++++++++-----
 tests/gclient_test.py       |  8 ++---
 tests/gclient_utils_test.py | 58 +++++++++++++++++++++++++++++--------
 3 files changed, 66 insertions(+), 26 deletions(-)

diff --git a/gclient_utils.py b/gclient_utils.py
index 3a6bc1fa84..2c38a3a3d3 100644
--- a/gclient_utils.py
+++ b/gclient_utils.py
@@ -40,8 +40,10 @@ import subprocess2
 
 if sys.version_info.major == 2:
   from cStringIO import StringIO
+  string_type = basestring
 else:
   from io import StringIO
+  string_type = str
 
 
 RETRY_MAX = 3
@@ -371,6 +373,10 @@ class Annotated(Wrapper):
   def write(self, out):
     index = getattr(threading.currentThread(), 'index', 0)
     if not index and not self.__include_zero:
+      # Store as bytes to ensure Unicode characters get output correctly.
+      if isinstance(out, bytes):
+        out = out.decode('utf-8')
+
       # Unindexed threads aren't buffered.
       return self._wrapped.write(out)
 
@@ -380,28 +386,32 @@ class Annotated(Wrapper):
       # Strings are immutable, requiring to keep a lock for the whole dictionary
       # otherwise. Using an array is faster than using a dummy object.
       if not index in self.__output_buffers:
-        obj = self.__output_buffers[index] = ['']
+        obj = self.__output_buffers[index] = [b'']
       else:
         obj = self.__output_buffers[index]
     finally:
       self.lock.release()
 
+    # Store as bytes to ensure Unicode characters get output correctly.
+    if isinstance(out, string_type):
+      out = out.encode('utf-8')
+
     # Continue lockless.
     obj[0] += out
     while True:
       # TODO(agable): find both of these with a single pass.
-      cr_loc = obj[0].find('\r')
-      lf_loc = obj[0].find('\n')
+      cr_loc = obj[0].find(b'\r')
+      lf_loc = obj[0].find(b'\n')
       if cr_loc == lf_loc == -1:
         break
       elif cr_loc == -1 or (lf_loc >= 0 and lf_loc < cr_loc):
-        line, remaining = obj[0].split('\n', 1)
+        line, remaining = obj[0].split(b'\n', 1)
         if line:
-          self._wrapped.write('%d>%s\n' % (index, line))
+          self._wrapped.write('%d>%s\n' % (index, line.decode('utf-8')))
       elif lf_loc == -1 or (cr_loc >= 0 and cr_loc < lf_loc):
-        line, remaining = obj[0].split('\r', 1)
+        line, remaining = obj[0].split(b'\r', 1)
         if line:
-          self._wrapped.write('%d>%s\r' % (index, line))
+          self._wrapped.write('%d>%s\r' % (index, line.decode('utf-8')))
       obj[0] = remaining
 
   def flush(self):
@@ -423,7 +433,7 @@ class Annotated(Wrapper):
     # Don't keep the lock while writting. Will append \n when it shouldn't.
     for orphan in orphans:
       if orphan[1]:
-        self._wrapped.write('%d>%s\n' % (orphan[0], orphan[1]))
+        self._wrapped.write('%d>%s\n' % (orphan[0], orphan[1].decode('utf-8')))
     return self._wrapped.flush()
 
 
diff --git a/tests/gclient_test.py b/tests/gclient_test.py
index 1f959ce3ee..0faeac44ba 100755
--- a/tests/gclient_test.py
+++ b/tests/gclient_test.py
@@ -76,14 +76,10 @@ class GclientTest(trial_dir.TestCase):
     self._old_createscm = gclient.gclient_scm.GitWrapper
     gclient.gclient_scm.GitWrapper = SCMMock
     SCMMock.unit_test = self
-    self._old_sys_stdout = sys.stdout
-    sys.stdout = gclient.gclient_utils.MakeFileAutoFlush(sys.stdout)
-    sys.stdout = gclient.gclient_utils.MakeFileAnnotated(sys.stdout)
 
   def tearDown(self):
     self.assertEqual([], self._get_processed())
     gclient.gclient_scm.GitWrapper = self._old_createscm
-    sys.stdout = self._old_sys_stdout
     os.chdir(self.previous_dir)
     super(GclientTest, self).tearDown()
 
@@ -1366,9 +1362,9 @@ class GclientTest(trial_dir.TestCase):
 
 if __name__ == '__main__':
   sys.stdout = gclient_utils.MakeFileAutoFlush(sys.stdout)
-  sys.stdout = gclient_utils.MakeFileAnnotated(sys.stdout, include_zero=True)
+  sys.stdout = gclient_utils.MakeFileAnnotated(sys.stdout)
   sys.stderr = gclient_utils.MakeFileAutoFlush(sys.stderr)
-  sys.stderr = gclient_utils.MakeFileAnnotated(sys.stderr, include_zero=True)
+  sys.stderr = gclient_utils.MakeFileAnnotated(sys.stderr)
   logging.basicConfig(
       level=[logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG][
         min(sys.argv.count('-v'), 3)],
diff --git a/tests/gclient_utils_test.py b/tests/gclient_utils_test.py
index 670775a68e..8c2673d9e5 100755
--- a/tests/gclient_utils_test.py
+++ b/tests/gclient_utils_test.py
@@ -127,7 +127,7 @@ class CheckCallAndFilterTestCase(unittest.TestCase):
         'after a short nap...')
 
   @mock.patch('subprocess2.Popen')
-  def testCHeckCallAndFilter_PrintStdout(self, mockPopen):
+  def testCheckCallAndFilter_PrintStdout(self, mockPopen):
     cwd = 'bleh'
     args = ['boo', 'foo', 'bar']
     test_string = 'ahah\naccb\nallo\naddb\n✔'
@@ -139,16 +139,51 @@ class CheckCallAndFilterTestCase(unittest.TestCase):
         print_stdout=True)
 
     self.assertEqual(result, test_string.encode('utf-8'))
-    self.assertEqual(
-        self.stdout.getvalue().splitlines(),
-        [
-            b'________ running \'boo foo bar\' in \'bleh\'',
-            b'ahah',
-            b'accb',
-            b'allo',
-            b'addb',
-            b'\xe2\x9c\x94',
-        ])
+    self.assertEqual(self.stdout.getvalue().splitlines(), [
+        b"________ running 'boo foo bar' in 'bleh'",
+        b'ahah',
+        b'accb',
+        b'allo',
+        b'addb',
+        b'\xe2\x9c\x94',
+    ])
+
+
+class AnnotatedTestCase(unittest.TestCase):
+  def setUp(self):
+    self.out = gclient_utils.MakeFileAnnotated(io.StringIO())
+    self.outz = gclient_utils.MakeFileAnnotated(
+        io.StringIO(), include_zero=True)
+
+  def testString(self):
+    self.outz.write('test string\n')
+    self.assertEqual(self.outz.getvalue(), '0>test string\n')
+
+  def testBytes(self):
+    self.out.write(b'test string\n')
+    self.assertEqual(self.out.getvalue(), 'test string\n')
+
+  def testBytesZero(self):
+    self.outz.write(b'test string\n')
+    self.assertEqual(self.outz.getvalue(), '0>test string\n')
+
+  def testUnicode(self):
+    self.out.write(b'\xe2\x9c\x94\n')
+    self.assertEqual(self.out.getvalue(), '✔\n')
+
+  def testUnicodeZero(self):
+    self.outz.write('✔\n')
+    self.assertEqual(self.outz.getvalue(), '0>✔\n')
+
+  def testMultiline(self):
+    self.out.write(b'first line\nsecond line')
+    self.assertEqual(self.out.getvalue(), 'first line\nsecond line')
+
+  def testFlush(self):
+    self.outz.write(b'first line\nsecond line')
+    self.assertEqual(self.outz.getvalue(), '0>first line\n')
+    self.outz.flush()
+    self.assertEqual(self.outz.getvalue(), '0>first line\n0>second line\n')
 
 
 class SplitUrlRevisionTestCase(unittest.TestCase):
@@ -271,7 +306,6 @@ class GClientUtilsTest(trial_dir.TestCase):
 
 
 if __name__ == '__main__':
-  import unittest
   unittest.main()
 
 # vim: ts=2:sw=2:tw=80:et: