replace "utf8" alias by canonical "utf-8" locale code + remove the second Python2 vs Py3 compat.py
Client: py
Patch: Alexandre Detiste
This closes #3105
diff --git a/lib/py/src/compat.py b/lib/py/src/compat.py
deleted file mode 100644
index 3b3d57f..0000000
--- a/lib/py/src/compat.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-from io import BytesIO as BufferIO # noqa
-
-def binary_to_str(bin_val):
- return bin_val.decode('utf8')
-
-def str_to_binary(str_val):
- return bytes(str_val, 'utf8')
-
-def byte_index(bytes_val, i):
- return bytes_val[i]
diff --git a/lib/py/src/protocol/TBinaryProtocol.py b/lib/py/src/protocol/TBinaryProtocol.py
index e59e0dc..af64ec1 100644
--- a/lib/py/src/protocol/TBinaryProtocol.py
+++ b/lib/py/src/protocol/TBinaryProtocol.py
@@ -17,10 +17,10 @@
# under the License.
#
-from .TProtocol import TType, TProtocolBase, TProtocolException, TProtocolFactory
-from ..compat import binary_to_str
from struct import pack, unpack
+from .TProtocol import TType, TProtocolBase, TProtocolException, TProtocolFactory
+
class TBinaryProtocol(TProtocolBase):
"""Binary implementation of the Thrift protocol driver."""
@@ -146,7 +146,7 @@
if self.strictRead:
raise TProtocolException(type=TProtocolException.BAD_VERSION,
message='No protocol version header')
- name = binary_to_str(self.trans.readAll(sz))
+ name = self.trans.readAll(sz).decode('utf-8')
type = self.readByte()
seqid = self.readI32()
return (name, type, seqid)
diff --git a/lib/py/src/protocol/TCompactProtocol.py b/lib/py/src/protocol/TCompactProtocol.py
index 700e792..a3527cd 100644
--- a/lib/py/src/protocol/TCompactProtocol.py
+++ b/lib/py/src/protocol/TCompactProtocol.py
@@ -20,8 +20,6 @@
from .TProtocol import TType, TProtocolBase, TProtocolException, TProtocolFactory, checkIntegerLimits
from struct import pack, unpack
-from ..compat import binary_to_str, str_to_binary
-
__all__ = ['TCompactProtocol', 'TCompactProtocolFactory']
CLEAR = 0
@@ -165,7 +163,7 @@
if tseqid < 0:
tseqid = 2147483648 + (2147483648 + tseqid)
self.__writeVarint(tseqid)
- self.__writeBinary(str_to_binary(name))
+ self.__writeBinary(bytes(name, 'utf-8'))
self.state = VALUE_WRITE
def writeMessageEnd(self):
@@ -346,7 +344,7 @@
# however the sequence is actually signed...
if seqid > 2147483647:
seqid = -2147483648 - (2147483648 - seqid)
- name = binary_to_str(self.__readBinary())
+ name = self.__readBinary().decode('utf-8')
return (name, type, seqid)
def readMessageEnd(self):
diff --git a/lib/py/src/protocol/TJSONProtocol.py b/lib/py/src/protocol/TJSONProtocol.py
index fef0cc9..a42aaa6 100644
--- a/lib/py/src/protocol/TJSONProtocol.py
+++ b/lib/py/src/protocol/TJSONProtocol.py
@@ -23,8 +23,6 @@
import math
import sys
-from ..compat import str_to_binary
-
__all__ = ['TJSONProtocol',
'TJSONProtocolFactory',
@@ -213,7 +211,7 @@
escaped = ESCAPE_CHAR_VALS.get(s, s)
json_str.append(escaped)
json_str.append('"')
- self.trans.write(str_to_binary(''.join(json_str)))
+ self.trans.write(bytes(''.join(json_str), 'utf-8'))
def writeJSONNumber(self, number, formatter='{0}'):
self.context.write()
@@ -313,7 +311,7 @@
utf8_bytes = bytearray([ord(character)])
while ord(self.reader.peek()) >= 0x80:
utf8_bytes.append(ord(self.reader.read()))
- character = utf8_bytes.decode('utf8')
+ character = utf8_bytes.decode('utf-8')
string.append(character)
if highSurrogate:
diff --git a/lib/py/src/protocol/TProtocol.py b/lib/py/src/protocol/TProtocol.py
index ec71ab3..a7336c5 100644
--- a/lib/py/src/protocol/TProtocol.py
+++ b/lib/py/src/protocol/TProtocol.py
@@ -19,7 +19,6 @@
from thrift.Thrift import TException, TType, TFrozenDict
from thrift.transport.TTransport import TTransportException
-from ..compat import binary_to_str, str_to_binary
import sys
from itertools import islice
@@ -117,13 +116,13 @@
pass
def writeString(self, str_val):
- self.writeBinary(str_to_binary(str_val))
+ self.writeBinary(bytes(str_val, 'utf-8'))
def writeBinary(self, str_val):
pass
def writeUtf8(self, str_val):
- self.writeString(str_val.encode('utf8'))
+ self.writeString(str_val.encode('utf-8'))
def readMessageBegin(self):
pass
@@ -180,13 +179,13 @@
pass
def readString(self):
- return binary_to_str(self.readBinary())
+ return self.readBinary().decode('utf-8')
def readBinary(self):
pass
def readUtf8(self):
- return self.readString().decode('utf8')
+ return self.readString().decode('utf-8')
def skip(self, ttype):
if ttype == TType.BOOL:
diff --git a/lib/py/src/transport/THeaderTransport.py b/lib/py/src/transport/THeaderTransport.py
index 7c9827b..4fb2034 100644
--- a/lib/py/src/transport/THeaderTransport.py
+++ b/lib/py/src/transport/THeaderTransport.py
@@ -19,8 +19,8 @@
import struct
import zlib
+from io import BytesIO
-from thrift.compat import BufferIO, byte_index
from thrift.protocol.TBinaryProtocol import TBinaryProtocol
from thrift.protocol.TCompactProtocol import TCompactProtocol, readVarint, writeVarint
from thrift.Thrift import TApplicationException
@@ -31,7 +31,6 @@
TTransportException,
)
-
U16 = struct.Struct("!H")
I32 = struct.Struct("!i")
HEADER_MAGIC = 0x0FFF
@@ -92,10 +91,10 @@
self._client_type = THeaderClientType.HEADERS
self._allowed_client_types = allowed_client_types
- self._read_buffer = BufferIO(b"")
+ self._read_buffer = BytesIO(b"")
self._read_headers = {}
- self._write_buffer = BufferIO()
+ self._write_buffer = BytesIO()
self._write_headers = {}
self._write_transforms = []
@@ -184,8 +183,8 @@
if frame_size & TBinaryProtocol.VERSION_MASK == TBinaryProtocol.VERSION_1:
self._set_client_type(THeaderClientType.UNFRAMED_BINARY)
is_unframed = True
- elif (byte_index(first_word, 0) == TCompactProtocol.PROTOCOL_ID and
- byte_index(first_word, 1) & TCompactProtocol.VERSION_MASK == TCompactProtocol.VERSION):
+ elif (first_word[0] == TCompactProtocol.PROTOCOL_ID and
+ first_word[1] & TCompactProtocol.VERSION_MASK == TCompactProtocol.VERSION):
self._set_client_type(THeaderClientType.UNFRAMED_COMPACT)
is_unframed = True
@@ -195,7 +194,7 @@
rest = self._transport.read(bytes_left_to_read)
else:
rest = b""
- self._read_buffer = BufferIO(first_word + rest)
+ self._read_buffer = BytesIO(first_word + rest)
return
# ok, we're still here so we're framed.
@@ -204,7 +203,7 @@
TTransportException.SIZE_LIMIT,
"Frame was too large.",
)
- read_buffer = BufferIO(self._transport.readAll(frame_size))
+ read_buffer = BytesIO(self._transport.readAll(frame_size))
# the next word is either going to be the version field of a
# binary/compact protocol message or the magic value + flags of a
@@ -218,8 +217,8 @@
elif version & TBinaryProtocol.VERSION_MASK == TBinaryProtocol.VERSION_1:
self._set_client_type(THeaderClientType.FRAMED_BINARY)
self._read_buffer = read_buffer
- elif (byte_index(second_word, 0) == TCompactProtocol.PROTOCOL_ID and
- byte_index(second_word, 1) & TCompactProtocol.VERSION_MASK == TCompactProtocol.VERSION):
+ elif (second_word[0] == TCompactProtocol.PROTOCOL_ID and
+ second_word[1] & TCompactProtocol.VERSION_MASK == TCompactProtocol.VERSION):
self._set_client_type(THeaderClientType.FRAMED_COMPACT)
self._read_buffer = read_buffer
else:
@@ -229,7 +228,7 @@
)
def _parse_header_format(self, buffer):
- # make BufferIO look like TTransport for varint helpers
+ # make BytesIO look like TTransport for varint helpers
buffer_transport = TMemoryBuffer()
buffer_transport._buffer = buffer
@@ -279,22 +278,22 @@
for transform_id in transforms:
transform_fn = READ_TRANSFORMS_BY_ID[transform_id]
payload = transform_fn(payload)
- return BufferIO(payload)
+ return BytesIO(payload)
def write(self, buf):
self._write_buffer.write(buf)
def flush(self):
payload = self._write_buffer.getvalue()
- self._write_buffer = BufferIO()
+ self._write_buffer = BytesIO()
- buffer = BufferIO()
+ buffer = BytesIO()
if self._client_type == THeaderClientType.HEADERS:
for transform_id in self._write_transforms:
transform_fn = WRITE_TRANSFORMS_BY_ID[transform_id]
payload = transform_fn(payload)
- headers = BufferIO()
+ headers = BytesIO()
writeVarint(headers, self._protocol_id)
writeVarint(headers, len(self._write_transforms))
for transform_id in self._write_transforms:
@@ -348,5 +347,5 @@
result = bytearray(partialread)
while len(result) < reqlen:
result += self.read(reqlen - len(result))
- self._read_buffer = BufferIO(result)
+ self._read_buffer = BytesIO(result)
return self._read_buffer
diff --git a/lib/py/src/transport/TTransport.py b/lib/py/src/transport/TTransport.py
index a686b12..4f6b67f 100644
--- a/lib/py/src/transport/TTransport.py
+++ b/lib/py/src/transport/TTransport.py
@@ -17,9 +17,9 @@
# under the License.
#
-from io import BytesIO as BufferIO
-
+from io import BytesIO
from struct import pack, unpack
+
from thrift.Thrift import TException
@@ -144,9 +144,9 @@
def __init__(self, trans, rbuf_size=DEFAULT_BUFFER):
self.__trans = trans
- self.__wbuf = BufferIO()
+ self.__wbuf = BytesIO()
# Pass string argument to initialize read buffer as cStringIO.InputType
- self.__rbuf = BufferIO(b'')
+ self.__rbuf = BytesIO(b'')
self.__rbuf_size = rbuf_size
def isOpen(self):
@@ -162,7 +162,7 @@
ret = self.__rbuf.read(sz)
if len(ret) != 0:
return ret
- self.__rbuf = BufferIO(self.__trans.read(max(sz, self.__rbuf_size)))
+ self.__rbuf = BytesIO(self.__trans.read(max(sz, self.__rbuf_size)))
return self.__rbuf.read(sz)
def write(self, buf):
@@ -170,13 +170,13 @@
self.__wbuf.write(buf)
except Exception as e:
# on exception reset wbuf so it doesn't contain a partial function call
- self.__wbuf = BufferIO()
+ self.__wbuf = BytesIO()
raise e
def flush(self):
out = self.__wbuf.getvalue()
# reset wbuf before write/flush to preserve state on underlying failure
- self.__wbuf = BufferIO()
+ self.__wbuf = BytesIO()
self.__trans.write(out)
self.__trans.flush()
@@ -195,7 +195,7 @@
if len(retstring) < reqlen:
retstring += self.__trans.readAll(reqlen - len(retstring))
- self.__rbuf = BufferIO(retstring)
+ self.__rbuf = BytesIO(retstring)
return self.__rbuf
@@ -214,9 +214,9 @@
If value is set, this will be a transport for reading,
otherwise, it is for writing"""
if value is not None:
- self._buffer = BufferIO(value)
+ self._buffer = BytesIO(value)
else:
- self._buffer = BufferIO()
+ self._buffer = BytesIO()
if offset:
self._buffer.seek(offset)
@@ -264,8 +264,8 @@
def __init__(self, trans,):
self.__trans = trans
- self.__rbuf = BufferIO(b'')
- self.__wbuf = BufferIO()
+ self.__rbuf = BytesIO(b'')
+ self.__wbuf = BytesIO()
def isOpen(self):
return self.__trans.isOpen()
@@ -287,7 +287,7 @@
def readFrame(self):
buff = self.__trans.readAll(4)
sz, = unpack('!i', buff)
- self.__rbuf = BufferIO(self.__trans.readAll(sz))
+ self.__rbuf = BytesIO(self.__trans.readAll(sz))
def write(self, buf):
self.__wbuf.write(buf)
@@ -296,7 +296,7 @@
wout = self.__wbuf.getvalue()
wsz = len(wout)
# reset wbuf before write/flush to preserve state on underlying failure
- self.__wbuf = BufferIO()
+ self.__wbuf = BytesIO()
# N.B.: Doing this string concatenation is WAY cheaper than making
# two separate calls to the underlying socket object. Socket writes in
# Python turn out to be REALLY expensive, but it seems to do a pretty
@@ -317,7 +317,7 @@
while len(prefix) < reqlen:
self.readFrame()
prefix += self.__rbuf.getvalue()
- self.__rbuf = BufferIO(prefix)
+ self.__rbuf = BytesIO(prefix)
return self.__rbuf
@@ -371,8 +371,8 @@
self.transport = transport
self.sasl = SASLClient(host, service, mechanism, **sasl_kwargs)
- self.__wbuf = BufferIO()
- self.__rbuf = BufferIO(b'')
+ self.__wbuf = BytesIO()
+ self.__rbuf = BytesIO(b'')
def open(self):
if not self.transport.isOpen():
@@ -424,7 +424,7 @@
encoded = self.sasl.wrap(data)
self.transport.write(pack("!i", len(encoded)) + encoded)
self.transport.flush()
- self.__wbuf = BufferIO()
+ self.__wbuf = BytesIO()
def read(self, sz):
ret = self.__rbuf.read(sz)
@@ -438,7 +438,7 @@
header = self.transport.readAll(4)
length, = unpack('!i', header)
encoded = self.transport.readAll(length)
- self.__rbuf = BufferIO(self.sasl.unwrap(encoded))
+ self.__rbuf = BytesIO(self.sasl.unwrap(encoded))
def close(self):
self.sasl.dispose()
@@ -456,5 +456,5 @@
while len(prefix) < reqlen:
self._read_frame()
prefix += self.__rbuf.getvalue()
- self.__rbuf = BufferIO(prefix)
+ self.__rbuf = BytesIO(prefix)
return self.__rbuf
diff --git a/lib/py/src/transport/TZlibTransport.py b/lib/py/src/transport/TZlibTransport.py
index 8b08297..a476d2a 100644
--- a/lib/py/src/transport/TZlibTransport.py
+++ b/lib/py/src/transport/TZlibTransport.py
@@ -23,8 +23,9 @@
"""
import zlib
+from io import BytesIO
+
from .TTransport import TTransportBase, CReadableTransport
-from ..compat import BufferIO
class TZlibTransportFactory:
@@ -87,8 +88,8 @@
"""
self.__trans = trans
self.compresslevel = compresslevel
- self.__rbuf = BufferIO()
- self.__wbuf = BufferIO()
+ self.__rbuf = BytesIO()
+ self.__wbuf = BytesIO()
self._init_zlib()
self._init_stats()
@@ -96,8 +97,8 @@
"""Internal method to initialize/reset the internal StringIO objects
for read and write buffers.
"""
- self.__rbuf = BufferIO()
- self.__wbuf = BufferIO()
+ self.__rbuf = BytesIO()
+ self.__wbuf = BytesIO()
def _init_stats(self):
"""Internal method to reset the internal statistics counters
@@ -202,7 +203,7 @@
self.bytes_in += len(zbuf)
self.bytes_in_comp += len(buf)
old = self.__rbuf.read()
- self.__rbuf = BufferIO(old + buf)
+ self.__rbuf = BytesIO(old + buf)
if len(old) + len(buf) == 0:
return False
return True
@@ -227,7 +228,7 @@
ztail = self._zcomp_write.flush(zlib.Z_SYNC_FLUSH)
self.bytes_out_comp += len(ztail)
if (len(zbuf) + len(ztail)) > 0:
- self.__wbuf = BufferIO()
+ self.__wbuf = BytesIO()
self.__trans.write(zbuf + ztail)
self.__trans.flush()
@@ -243,5 +244,5 @@
retstring += self.read(self.DEFAULT_BUFFSIZE)
while len(retstring) < reqlen:
retstring += self.read(reqlen - len(retstring))
- self.__rbuf = BufferIO(retstring)
+ self.__rbuf = BytesIO(retstring)
return self.__rbuf
diff --git a/lib/py/test/thrift_json.py b/lib/py/test/thrift_json.py
index 5a491e2..bf2b808 100644
--- a/lib/py/test/thrift_json.py
+++ b/lib/py/test/thrift_json.py
@@ -17,7 +17,6 @@
# under the License.
#
-import sys
import unittest
import _import_local_thrift # noqa