From 5d284fdf48ab00a873e710c6ab76818d5b54d2cc Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Mon, 7 Oct 2019 18:11:26 +0000 Subject: [PATCH] gclient_utils: buffer output as bytestrings in Annotated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Python 3 byestrings and normal strings can't be concatenated. To fix this we buffer as bytestrings in the Annotated wrapper. We can't decode to a string because the output might come byte-by-byte, so it doesn't work with Unicode characters like ✔. Also had to update gclient_test.py, where double-wrapping stdout with Annotated caused made output not work and include_zero=True working caused other unintended side-effects. Example error from "fetch chromium": Traceback (most recent call last): File "C:\Google\depot_tools\gclient_scm.py", line 1045, in _Clone self._Run(clone_cmd, options, cwd=self._root_dir, retry=True, File "C:\Google\depot_tools\gclient_scm.py", line 1370, in _Run gclient_utils.CheckCallAndFilter(cmd, env=env, **kwargs) File "C:\Google\depot_tools\gclient_utils.py", line 583, in CheckCallAndFilter show_header_if_necessary(needs_header, attempt) File "C:\Google\depot_tools\gclient_utils.py", line 533, in show_header_if_necessary stdout_write(header.encode()) File "C:\Google\depot_tools\gclient_utils.py", line 391, in write obj[0] += out TypeError: can only concatenate str (not "bytes") to str Bug: 984182 Change-Id: If7037d30e9faf524f2405258281f6e6cd0bcdcae Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/1778745 Commit-Queue: Edward Lesmes Reviewed-by: Edward Lesmes Auto-Submit: Raul Tambre --- gclient_utils.py | 26 ++++++++++++----- tests/gclient_test.py | 8 ++--- tests/gclient_utils_test.py | 58 +++++++++++++++++++++++++++++-------- 3 files changed, 66 insertions(+), 26 deletions(-) diff --git a/gclient_utils.py b/gclient_utils.py index 3a6bc1fa8..2c38a3a3d 100644 --- a/gclient_utils.py +++ b/gclient_utils.py @@ -40,8 +40,10 @@ import subprocess2 if sys.version_info.major == 2: from cStringIO import StringIO + string_type = basestring else: from io import StringIO + string_type = str RETRY_MAX = 3 @@ -371,6 +373,10 @@ class Annotated(Wrapper): def write(self, out): index = getattr(threading.currentThread(), 'index', 0) if not index and not self.__include_zero: + # Store as bytes to ensure Unicode characters get output correctly. + if isinstance(out, bytes): + out = out.decode('utf-8') + # Unindexed threads aren't buffered. return self._wrapped.write(out) @@ -380,28 +386,32 @@ class Annotated(Wrapper): # Strings are immutable, requiring to keep a lock for the whole dictionary # otherwise. Using an array is faster than using a dummy object. if not index in self.__output_buffers: - obj = self.__output_buffers[index] = [''] + obj = self.__output_buffers[index] = [b''] else: obj = self.__output_buffers[index] finally: self.lock.release() + # Store as bytes to ensure Unicode characters get output correctly. + if isinstance(out, string_type): + out = out.encode('utf-8') + # Continue lockless. obj[0] += out while True: # TODO(agable): find both of these with a single pass. - cr_loc = obj[0].find('\r') - lf_loc = obj[0].find('\n') + cr_loc = obj[0].find(b'\r') + lf_loc = obj[0].find(b'\n') if cr_loc == lf_loc == -1: break elif cr_loc == -1 or (lf_loc >= 0 and lf_loc < cr_loc): - line, remaining = obj[0].split('\n', 1) + line, remaining = obj[0].split(b'\n', 1) if line: - self._wrapped.write('%d>%s\n' % (index, line)) + self._wrapped.write('%d>%s\n' % (index, line.decode('utf-8'))) elif lf_loc == -1 or (cr_loc >= 0 and cr_loc < lf_loc): - line, remaining = obj[0].split('\r', 1) + line, remaining = obj[0].split(b'\r', 1) if line: - self._wrapped.write('%d>%s\r' % (index, line)) + self._wrapped.write('%d>%s\r' % (index, line.decode('utf-8'))) obj[0] = remaining def flush(self): @@ -423,7 +433,7 @@ class Annotated(Wrapper): # Don't keep the lock while writting. Will append \n when it shouldn't. for orphan in orphans: if orphan[1]: - self._wrapped.write('%d>%s\n' % (orphan[0], orphan[1])) + self._wrapped.write('%d>%s\n' % (orphan[0], orphan[1].decode('utf-8'))) return self._wrapped.flush() diff --git a/tests/gclient_test.py b/tests/gclient_test.py index 1f959ce3e..0faeac44b 100755 --- a/tests/gclient_test.py +++ b/tests/gclient_test.py @@ -76,14 +76,10 @@ class GclientTest(trial_dir.TestCase): self._old_createscm = gclient.gclient_scm.GitWrapper gclient.gclient_scm.GitWrapper = SCMMock SCMMock.unit_test = self - self._old_sys_stdout = sys.stdout - sys.stdout = gclient.gclient_utils.MakeFileAutoFlush(sys.stdout) - sys.stdout = gclient.gclient_utils.MakeFileAnnotated(sys.stdout) def tearDown(self): self.assertEqual([], self._get_processed()) gclient.gclient_scm.GitWrapper = self._old_createscm - sys.stdout = self._old_sys_stdout os.chdir(self.previous_dir) super(GclientTest, self).tearDown() @@ -1366,9 +1362,9 @@ class GclientTest(trial_dir.TestCase): if __name__ == '__main__': sys.stdout = gclient_utils.MakeFileAutoFlush(sys.stdout) - sys.stdout = gclient_utils.MakeFileAnnotated(sys.stdout, include_zero=True) + sys.stdout = gclient_utils.MakeFileAnnotated(sys.stdout) sys.stderr = gclient_utils.MakeFileAutoFlush(sys.stderr) - sys.stderr = gclient_utils.MakeFileAnnotated(sys.stderr, include_zero=True) + sys.stderr = gclient_utils.MakeFileAnnotated(sys.stderr) logging.basicConfig( level=[logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG][ min(sys.argv.count('-v'), 3)], diff --git a/tests/gclient_utils_test.py b/tests/gclient_utils_test.py index 670775a68..8c2673d9e 100755 --- a/tests/gclient_utils_test.py +++ b/tests/gclient_utils_test.py @@ -127,7 +127,7 @@ class CheckCallAndFilterTestCase(unittest.TestCase): 'after a short nap...') @mock.patch('subprocess2.Popen') - def testCHeckCallAndFilter_PrintStdout(self, mockPopen): + def testCheckCallAndFilter_PrintStdout(self, mockPopen): cwd = 'bleh' args = ['boo', 'foo', 'bar'] test_string = 'ahah\naccb\nallo\naddb\n✔' @@ -139,16 +139,51 @@ class CheckCallAndFilterTestCase(unittest.TestCase): print_stdout=True) self.assertEqual(result, test_string.encode('utf-8')) - self.assertEqual( - self.stdout.getvalue().splitlines(), - [ - b'________ running \'boo foo bar\' in \'bleh\'', - b'ahah', - b'accb', - b'allo', - b'addb', - b'\xe2\x9c\x94', - ]) + self.assertEqual(self.stdout.getvalue().splitlines(), [ + b"________ running 'boo foo bar' in 'bleh'", + b'ahah', + b'accb', + b'allo', + b'addb', + b'\xe2\x9c\x94', + ]) + + +class AnnotatedTestCase(unittest.TestCase): + def setUp(self): + self.out = gclient_utils.MakeFileAnnotated(io.StringIO()) + self.outz = gclient_utils.MakeFileAnnotated( + io.StringIO(), include_zero=True) + + def testString(self): + self.outz.write('test string\n') + self.assertEqual(self.outz.getvalue(), '0>test string\n') + + def testBytes(self): + self.out.write(b'test string\n') + self.assertEqual(self.out.getvalue(), 'test string\n') + + def testBytesZero(self): + self.outz.write(b'test string\n') + self.assertEqual(self.outz.getvalue(), '0>test string\n') + + def testUnicode(self): + self.out.write(b'\xe2\x9c\x94\n') + self.assertEqual(self.out.getvalue(), '✔\n') + + def testUnicodeZero(self): + self.outz.write('✔\n') + self.assertEqual(self.outz.getvalue(), '0>✔\n') + + def testMultiline(self): + self.out.write(b'first line\nsecond line') + self.assertEqual(self.out.getvalue(), 'first line\nsecond line') + + def testFlush(self): + self.outz.write(b'first line\nsecond line') + self.assertEqual(self.outz.getvalue(), '0>first line\n') + self.outz.flush() + self.assertEqual(self.outz.getvalue(), '0>first line\n0>second line\n') class SplitUrlRevisionTestCase(unittest.TestCase): @@ -271,7 +306,6 @@ class GClientUtilsTest(trial_dir.TestCase): if __name__ == '__main__': - import unittest unittest.main() # vim: ts=2:sw=2:tw=80:et: