From 419c92f1bc931e5d1b3f1346e82c1cfa7612f783 Mon Sep 17 00:00:00 2001 From: Edward Lemur Date: Fri, 25 Oct 2019 22:17:49 +0000 Subject: [PATCH] gclient_utils: Make FileRead always return a Unicode string. Previously, it returned bytes if the file did not contain valid Unicode data, and a Unicode string otherwise. It is confusing to reason about such a function, and no current caller needs bytes data AFAICT, so make FileRead always return Unicode strings. Bug: 1009814 Change-Id: I89dd1935e5d4fcaf9af71585b85bda6c47695950 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/1880013 Reviewed-by: Anthony Polito Commit-Queue: Edward Lesmes --- gclient_utils.py | 17 +++++++---------- tests/gclient_utils_test.py | 21 +++++++++++++++++++++ 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/gclient_utils.py b/gclient_utils.py index 52b2350e5..6939fff1f 100644 --- a/gclient_utils.py +++ b/gclient_utils.py @@ -164,19 +164,16 @@ class PrintableObject(object): return output -def FileRead(filename, mode='rU'): +def FileRead(filename, mode='rbU'): + # Always decodes output to a Unicode string. # On Python 3 newlines are converted to '\n' by default and 'U' is deprecated. - if mode == 'rU' and sys.version_info.major == 3: - mode = 'r' + if mode == 'rbU' and sys.version_info.major == 3: + mode = 'rb' with open(filename, mode=mode) as f: - # codecs.open() has different behavior than open() on python 2.6 so use - # open() and decode manually. s = f.read() - try: - return s.decode('utf-8') - # AttributeError is for Py3 compatibility - except (UnicodeDecodeError, AttributeError): - return s + if isinstance(s, bytes): + return s.decode('utf-8', 'replace') + return s def FileWrite(filename, content, mode='w'): diff --git a/tests/gclient_utils_test.py b/tests/gclient_utils_test.py index 2f8920327..541c3149c 100755 --- a/tests/gclient_utils_test.py +++ b/tests/gclient_utils_test.py @@ -10,6 +10,7 @@ from __future__ import unicode_literals import io import os import sys +import tempfile import time import unittest @@ -334,6 +335,26 @@ class GClientUtilsTest(trial_dir.TestCase): self.assertEqual( expected, gclient_utils.ParseCodereviewSettingsContent(content)) + def testFileRead_Bytes(self): + with tempfile.NamedTemporaryFile(delete=False) as tmp: + tmp.write(b'foo \xe2\x9c bar') + # NamedTemporaryFiles must be closed on Windows before being opened again. + tmp.close() + try: + self.assertEqual('foo \ufffd bar', gclient_utils.FileRead(tmp.name)) + finally: + os.remove(tmp.name) + + def testFileRead_Unicode(self): + with tempfile.NamedTemporaryFile(delete=False) as tmp: + tmp.write(b'foo \xe2\x9c\x94 bar') + # NamedTemporaryFiles must be closed on Windows before being opened again. + tmp.close() + try: + self.assertEqual('foo ✔ bar', gclient_utils.FileRead(tmp.name)) + finally: + os.remove(tmp.name) + if __name__ == '__main__': unittest.main()