Skip to content

Commit ed61a83

Browse files
author
Krzysztof Trzcinski
committed
Make get_contents() return bytes (Python3)
Currently if we end up having `Node` that is generic `Entry` and it doesn't exist when you call `get_contents` on it will give you empty string. However in all other cases we always return `bytes` objects. This subtle difference can break places that search for things in `get_contents` when running with Python3. This led me to untangling str vs bytes vs unicode confusion going on when migrating from Python2 and Python3. I assumed (not entirely sure if correctly) that when outputting text in Python3 we really want to operate on utf-8 encoding not ascii. So I went through places that immediately came at me (usually due to failing tests). Unfortunately it seems I had to adjust some tests themselves. I introduced env.EncodeText which in Python2 just passes through (I am not sure if that's what we want to end up doing) and converts string to utf-8 encoded data. Ideally I think all tools writing text to a file should be writing binary files and converting all text using env.EncodeText. It seems environment level granularity to configure encoding seems good enough, but potentially in future we could have per node encoding (I really can't think of scenario where that would be required). Minor thing is it also includes fix to some tests: - Few where using different PATH when checking if test can be run vs PATH used during actual SCons invocation (thus it decided it can run test because it could find an executable, but failed test itself because that could not find it) - Fixed TestCmd to work with Python3.6 when trying to create non-ascii filename (without change it simply raises exception "could not encode as ascii")
1 parent 867f762 commit ed61a83

28 files changed

+207
-148
lines changed

QMTest/TestCmd.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,10 @@
310310
IS_PY3 = sys.version_info[0] == 3
311311
IS_WINDOWS = sys.platform == 'win32'
312312

313+
if sys.version_info >= (3, 0, 0):
314+
fsencode = lambda name: name.encode('utf-8')
315+
else:
316+
fsencode = lambda name: name
313317

314318
class null(object):
315319
pass
@@ -1881,7 +1885,7 @@ def do_chmod(fname):
18811885
do_chmod(os.path.join(dirpath, name))
18821886
do_chmod(top)
18831887

1884-
def write(self, file, content, mode='wb'):
1888+
def write(self, filename, content, mode='wb'):
18851889
"""Writes the specified content text (second argument) to the
18861890
specified file name (first argument). The file name may be
18871891
a list, in which case the elements are concatenated with the
@@ -1890,10 +1894,10 @@ def write(self, file, content, mode='wb'):
18901894
exist. The I/O mode for the file may be specified; it must
18911895
begin with a 'w'. The default is 'wb' (binary write).
18921896
"""
1893-
file = self.canonicalize(file)
1897+
filename = self.canonicalize(filename)
18941898
if mode[0] != 'w':
18951899
raise ValueError("mode must begin with 'w'")
1896-
with open(file, mode) as f:
1900+
with open(fsencode(filename), mode) as f:
18971901
try:
18981902
f.write(content)
18991903
except TypeError as e:

runtest.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272
# This is (will be) used for reporting results back
7373
# to a central SCons test monitoring infrastructure.
7474
#
75-
# --exclude-list file
75+
# --exclude-list file
7676
# list of tests to exclude in the current selection of test
7777
# mostly meant to easily exclude tests from -a option
7878
#
@@ -99,6 +99,9 @@
9999
from Queue import Queue
100100
import subprocess
101101

102+
if sys.version_info > (3, 0, 0):
103+
import codecs
104+
sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer)
102105

103106
cwd = os.getcwd()
104107

@@ -736,7 +739,7 @@ def find_py(directory):
736739
tests.extend(unittests)
737740
tests.extend(endtests)
738741
tests.sort()
739-
742+
740743
if not tests:
741744
sys.stderr.write(usagestr + """
742745
runtest.py: No tests were found.
@@ -932,4 +935,4 @@ def run(self):
932935
# tab-width:4
933936
# indent-tabs-mode:nil
934937
# End:
935-
# vim: set expandtab tabstop=4 shiftwidth=4:
938+
# vim: set expandtab tabstop=4 shiftwidth=4:

src/engine/SCons/Action.py

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -626,20 +626,9 @@ def default_batch_key(self, env, target, source):
626626
SCons.Util.AddMethod(self, batch_key, 'batch_key')
627627

628628
def print_cmd_line(self, s, target, source, env):
629-
"""
630-
In python 3, and in some of our tests, sys.stdout is
631-
a String io object, and it takes unicode strings only
632-
In other cases it's a regular Python 2.x file object
633-
which takes strings (bytes), and if you pass those a
634-
unicode object they try to decode with 'ascii' codec
635-
which fails if the cmd line has any hi-bit-set chars.
636-
This code assumes s is a regular string, but should
637-
work if it's unicode too.
638-
"""
639-
try:
640-
sys.stdout.write(s + u"\n")
641-
except UnicodeDecodeError:
642-
sys.stdout.write(s + "\n")
629+
print(s.__class__)
630+
sys.stdout.write(s)
631+
sys.stdout.write("\n")
643632

644633
def __call__(self, target, source, env,
645634
exitstatfunc=_null,

src/engine/SCons/Environment.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,13 @@
6060
import SCons.Util
6161
import SCons.Warnings
6262

63+
if sys.version_info >= (3, 0, 0):
64+
def _encode_text(text):
65+
return text.encode('utf-8')
66+
else:
67+
def _encode_text(text):
68+
return text
69+
6370
class _Null(object):
6471
pass
6572

@@ -1216,6 +1223,18 @@ def _canonicalize(self, path):
12161223
path = str(self.fs.Dir(path))
12171224
return path
12181225

1226+
def EncodeText(self, text):
1227+
""" This is meant to be used when writing files.
1228+
It is especially important in Python3 where it always tries to encode
1229+
things in text mode.
1230+
However it seems there is no "generic" way to set up default
1231+
encoding to be "utf-8".
1232+
1233+
This is simply meant as one place to define default encoding
1234+
for all text files generated by tools.
1235+
"""
1236+
return _encode_text(text)
1237+
12191238
def AppendENVPath(self, name, newpath, envname = 'ENV',
12201239
sep = os.pathsep, delete_existing=1):
12211240
"""Append path elements to the path 'name' in the 'ENV'

src/engine/SCons/Node/FS.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1051,6 +1051,10 @@ def get_subst_proxy(self):
10511051
# the method of the FS class.
10521052
_classEntry = Entry
10531053

1054+
if sys.version_info >= (3, 0, 0):
1055+
fsencode = os.fsencode
1056+
else:
1057+
fsencode = lambda name: name
10541058

10551059
class LocalFS(object):
10561060

@@ -1106,6 +1110,8 @@ def open(self, path):
11061110
return open(path)
11071111
def unlink(self, path):
11081112
return os.unlink(path)
1113+
def fsencode(self, name):
1114+
return fsencode(name)
11091115

11101116
if hasattr(os, 'symlink'):
11111117
def islink(self, path):
@@ -1824,10 +1830,9 @@ def scanner_key(self):
18241830
"""A directory does not get scanned."""
18251831
return None
18261832

1827-
def get_text_contents(self):
1828-
"""We already emit things in text, so just return the binary
1829-
version."""
1830-
return self.get_contents()
1833+
def get_text_contents(self, encoding='utf-8'):
1834+
"""Return decoded version of contents."""
1835+
return self.get_contents().decode(encoding)
18311836

18321837
def get_contents(self):
18331838
"""Return content signatures and names of all our children

src/engine/SCons/Node/FSTests.py

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,15 @@ def setUp(self):
120120
def tearDown(self):
121121
os.chdir(self.save_cwd)
122122

123+
def assert_contents_invariants(contents):
124+
assert "".encode('ascii') in contents, contents
125+
126+
if sys.version_info >= (3,0,0):
127+
expected = bytes
128+
else:
129+
expected = str
130+
assert isinstance(contents, expected), 'get_contents has to return decodable item ({0}) got ({1}) `{2}`'.format(expected, contents.__class__, repr(contents))
131+
123132
class VariantDirTestCase(unittest.TestCase):
124133
def runTest(self):
125134
"""Test variant dir functionality"""
@@ -335,6 +344,7 @@ def link_func(target, source, env, link_made=link_made):
335344
f10 = fs.File('build/var1/asourcefile')
336345
assert f10.exists()
337346
assert f10.get_contents() == bytearray('stuff','utf-8'), f10.get_contents()
347+
assert_contents_invariants(f10.get_contents())
338348

339349
f11 = fs.File('src/file11')
340350
t, m = f11.alter_targets()
@@ -1307,6 +1317,7 @@ class DummyNode(object):
13071317
test.write("binary_file", "Foo\x1aBar")
13081318
f1 = fs.File(test.workpath("binary_file"))
13091319
assert f1.get_contents() == bytearray("Foo\x1aBar",'utf-8'), f1.get_contents()
1320+
assert_contents_invariants(f1.get_contents())
13101321

13111322
# This tests to make sure we can decode UTF-8 text files.
13121323
test_string = u"Foo\x1aBar"
@@ -1376,13 +1387,15 @@ def nonexistent(method, s):
13761387
# test Entry.get_contents()
13771388
e = fs.Entry('does_not_exist')
13781389
c = e.get_contents()
1379-
assert c == "", c
1390+
assert_contents_invariants(c)
1391+
assert c == b"", c
13801392
assert e.__class__ == SCons.Node.FS.Entry
13811393

13821394
test.write("file", "file\n")
13831395
try:
13841396
e = fs.Entry('file')
13851397
c = e.get_contents()
1398+
assert_contents_invariants(c)
13861399
assert c == bytearray("file\n",'utf-8'), c
13871400
assert e.__class__ == SCons.Node.FS.File
13881401
finally:
@@ -1406,26 +1419,22 @@ def nonexistent(method, s):
14061419
test.subdir("dir")
14071420
e = fs.Entry('dir')
14081421
c = e.get_contents()
1409-
assert c == "", c
1422+
assert_contents_invariants(c)
1423+
assert c == b"", c
14101424
assert e.__class__ == SCons.Node.FS.Dir
14111425

14121426
c = e.get_text_contents()
1413-
try:
1414-
eval('assert c == u"", c')
1415-
except SyntaxError:
1416-
assert c == ""
1427+
assert c == '', c
14171428

14181429
if sys.platform != 'win32' and hasattr(os, 'symlink'):
14191430
os.symlink('nonexistent', test.workpath('dangling_symlink'))
14201431
e = fs.Entry('dangling_symlink')
14211432
c = e.get_contents()
1433+
assert_contents_invariants(c)
14221434
assert e.__class__ == SCons.Node.FS.Entry, e.__class__
1423-
assert c == "", c
1435+
assert c == b"", repr(c)
14241436
c = e.get_text_contents()
1425-
try:
1426-
eval('assert c == u"", c')
1427-
except SyntaxError:
1428-
assert c == "", c
1437+
assert c == "", repr(c)
14291438

14301439
test.write("tstamp", "tstamp\n")
14311440
try:
@@ -2013,14 +2022,17 @@ def test_get_contents(self):
20132022
e = self.fs.Dir(os.path.join('d', 'empty'))
20142023
s = self.fs.Dir(os.path.join('d', 'sub'))
20152024

2016-
files = d.get_contents().split('\n')
2025+
files = d.get_contents().decode('utf-8').split('\n')
2026+
2027+
assert_contents_invariants(d.get_contents())
2028+
assert_contents_invariants(e.get_contents())
20172029

2018-
assert e.get_contents() == '', e.get_contents()
2030+
assert e.get_contents() == b'', e.get_contents()
20192031
assert e.get_text_contents() == '', e.get_text_contents()
2020-
assert e.get_csig()+" empty" == files[0], files
2021-
assert f.get_csig()+" f" == files[1], files
2022-
assert g.get_csig()+" g" == files[2], files
2023-
assert s.get_csig()+" sub" == files[3], files
2032+
assert e.get_csig() + " empty" == files[0], files
2033+
assert f.get_csig() + " f" == files[1], files
2034+
assert g.get_csig() + " g" == files[2], files
2035+
assert s.get_csig() + " sub" == files[3], files
20242036

20252037
def test_implicit_re_scans(self):
20262038
"""Test that adding entries causes a directory to be re-scanned
@@ -2351,14 +2363,17 @@ def test_runTest(self):
23512363

23522364
e3d = fs.Entry('e3d')
23532365
e3d.get_contents()
2366+
assert_contents_invariants(e3d.get_contents())
23542367
assert e3d.__class__ is SCons.Node.FS.Dir, e3d.__class__
23552368

23562369
e3f = fs.Entry('e3f')
23572370
e3f.get_contents()
2371+
assert_contents_invariants(e3f.get_contents())
23582372
assert e3f.__class__ is SCons.Node.FS.File, e3f.__class__
23592373

23602374
e3n = fs.Entry('e3n')
23612375
e3n.get_contents()
2376+
assert_contents_invariants(e3n.get_contents())
23622377
assert e3n.__class__ is SCons.Node.FS.Entry, e3n.__class__
23632378

23642379
test.subdir('e4d')
@@ -3113,6 +3128,7 @@ def test_get_contents(self):
31133128
test.write(["rep3", "contents"], "Con\x1aTents\n")
31143129
try:
31153130
c = fs.File("contents").get_contents()
3131+
assert_contents_invariants(c)
31163132
assert c == bytearray("Con\x1aTents\n",'utf-8'), "got '%s'" % c
31173133
finally:
31183134
test.unlink(["rep3", "contents"])

src/engine/SCons/Node/NodeTests.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ class TestNodeInfo(SCons.Node.NodeInfoBase):
273273

274274
f = ni1.format()
275275
assert f == ['x', 'y', 'z'], f
276-
276+
277277
field_list = ['xxx', 'zzz', 'aaa']
278278

279279
f = ni1.format(field_list)
@@ -586,7 +586,7 @@ def test_get_actions(self):
586586
def test_get_csig(self):
587587
"""Test generic content signature calculation
588588
"""
589-
589+
590590
class TestNodeInfo(SCons.Node.NodeInfoBase):
591591
__slots__ = ('csig',)
592592
try:
@@ -625,7 +625,7 @@ class TestNodeInfo(SCons.Node.NodeInfoBase):
625625
__slots__ = ('csig',)
626626
SCons.Node.Node.NodeInfo = TestNodeInfo
627627
node = SCons.Node.Node()
628-
628+
629629
binfo = node.get_binfo()
630630
assert isinstance(binfo, SCons.Node.BuildInfoBase), binfo
631631

src/engine/SCons/Node/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ def get_contents_entry(node):
197197
# string so calls to get_contents() in emitters and the
198198
# like (e.g. in qt.py) don't have to disambiguate by hand
199199
# or catch the exception.
200-
return ''
200+
return b''
201201
else:
202202
return _get_contents_map[node._func_get_contents](node)
203203

@@ -207,7 +207,7 @@ def get_contents_dir(node):
207207
contents = []
208208
for n in sorted(node.children(), key=lambda t: t.name):
209209
contents.append('%s %s\n' % (n.get_csig(), n.name))
210-
return ''.join(contents)
210+
return node.fs.fsencode(''.join(contents))
211211

212212
def get_contents_file(node):
213213
if not node.rexists():

0 commit comments

Comments
 (0)