THRIFT-3613 Port Python C extension to Python 3
Client: Python
Patch: Nobuaki Sukegawa
This closes #845
diff --git a/lib/py/setup.py b/lib/py/setup.py
index 67e9d52..ca10096 100644
--- a/lib/py/setup.py
+++ b/lib/py/setup.py
@@ -19,7 +19,6 @@
# under the License.
#
-import platform
import sys
try:
from setuptools import setup, Extension
@@ -108,11 +107,7 @@
)
try:
- with_binary = False
- # Don't even try to build the C module unless we're on CPython 2.x.
- # TODO: fix it for CPython 3.x
- if platform.python_implementation() == 'CPython' and sys.version_info < (3,):
- with_binary = True
+ with_binary = True
run_setup(with_binary)
except BuildFailed:
print()
diff --git a/lib/py/src/compat.py b/lib/py/src/compat.py
index 42403ea..787149a 100644
--- a/lib/py/src/compat.py
+++ b/lib/py/src/compat.py
@@ -34,13 +34,7 @@
from io import BytesIO as BufferIO
def binary_to_str(bin_val):
- try:
- return bin_val.decode('utf8')
- except:
- return bin_val
+ return bin_val.decode('utf8')
def str_to_binary(str_val):
- try:
- return bytes(str_val, 'utf8')
- except:
- return str_val
+ return bytes(str_val, 'utf8')
diff --git a/lib/py/src/ext/module.cpp b/lib/py/src/ext/module.cpp
index 82e3fe7..5ffc155 100644
--- a/lib/py/src/ext/module.cpp
+++ b/lib/py/src/ext/module.cpp
@@ -142,6 +142,24 @@
{NULL, NULL, 0, NULL} /* Sentinel */
};
+#if PY_MAJOR_VERSION >= 3
+
+static struct PyModuleDef ThriftFastBinaryDef = {PyModuleDef_HEAD_INIT,
+ "thrift.protocol.fastbinary",
+ NULL,
+ 0,
+ ThriftFastBinaryMethods,
+ NULL,
+ NULL,
+ NULL,
+ NULL};
+
+#define INITERROR return NULL;
+
+PyObject* PyInit_fastbinary() {
+
+#else
+
#define INITERROR return;
void initfastbinary() {
@@ -150,6 +168,8 @@
if (PycStringIO == NULL)
INITERROR
+#endif
+
const rlim_t kStackSize = 16 * 1024 * 1024; // min stack size = 16 MB
struct rlimit rl;
int result;
@@ -181,9 +201,16 @@
#undef INIT_INTERN_STRING
PyObject* module =
+#if PY_MAJOR_VERSION >= 3
+ PyModule_Create(&ThriftFastBinaryDef);
+#else
Py_InitModule("thrift.protocol.fastbinary", ThriftFastBinaryMethods);
+#endif
if (module == NULL)
INITERROR;
+#if PY_MAJOR_VERSION >= 3
+ return module;
+#endif
}
}
diff --git a/lib/py/src/ext/protocol.tcc b/lib/py/src/ext/protocol.tcc
index 3df83a1..554ba6e 100644
--- a/lib/py/src/ext/protocol.tcc
+++ b/lib/py/src/ext/protocol.tcc
@@ -23,12 +23,18 @@
#define CHECK_RANGE(v, min, max) (((v) <= (max)) && ((v) >= (min)))
#define INIT_OUTBUF_SIZE 128
+#if PY_MAJOR_VERSION < 3
#include <cStringIO.h>
+#else
+#include <algorithm>
+#endif
namespace apache {
namespace thrift {
namespace py {
+#if PY_MAJOR_VERSION < 3
+
namespace detail {
inline bool input_check(PyObject* input) {
@@ -101,6 +107,82 @@
return true;
}
+#else
+
+namespace detail {
+
+inline bool input_check(PyObject* input) {
+ // TODO: Check for BytesIO type
+ return true;
+}
+
+inline EncodeBuffer* new_encode_buffer(size_t size) {
+ EncodeBuffer* buffer = new EncodeBuffer;
+ buffer->buf.reserve(size);
+ buffer->pos = 0;
+ return buffer;
+}
+
+struct bytesio {
+ PyObject_HEAD
+#if PY_MINOR_VERSION < 5
+ char* buf;
+#else
+ PyObject* buf;
+#endif
+ Py_ssize_t pos;
+ Py_ssize_t string_size;
+};
+
+inline int read_buffer(PyObject* buf, char** output, int len) {
+ bytesio* buf2 = reinterpret_cast<bytesio*>(buf);
+#if PY_MINOR_VERSION < 5
+ *output = buf2->buf + buf2->pos;
+#else
+ *output = PyBytes_AS_STRING(buf2->buf) + buf2->pos;
+#endif
+ Py_ssize_t pos0 = buf2->pos;
+ buf2->pos = std::min(buf2->pos + static_cast<Py_ssize_t>(len), buf2->string_size);
+ return static_cast<int>(buf2->pos - pos0);
+}
+}
+
+template <typename Impl>
+inline ProtocolBase<Impl>::~ProtocolBase() {
+ if (output_) {
+ delete output_;
+ }
+}
+
+template <typename Impl>
+inline bool ProtocolBase<Impl>::isUtf8(PyObject* typeargs) {
+ // while condition for py2 is "arg == 'UTF8'", it should be "arg != 'BINARY'" for py3.
+ // HACK: check the length and don't bother reading the value
+ return !PyUnicode_Check(typeargs) || PyUnicode_GET_LENGTH(typeargs) != 6;
+}
+
+template <typename Impl>
+PyObject* ProtocolBase<Impl>::getEncodedValue() {
+ return PyBytes_FromStringAndSize(output_->buf.data(), output_->buf.size());
+}
+
+template <typename Impl>
+inline bool ProtocolBase<Impl>::writeBuffer(char* data, size_t size) {
+ size_t need = size + output_->pos;
+ if (output_->buf.capacity() < need) {
+ try {
+ output_->buf.reserve(need);
+ } catch (std::bad_alloc& ex) {
+ PyErr_SetString(PyExc_MemoryError, "Failed to allocate write buffer");
+ return false;
+ }
+ }
+ std::copy(data, data + size, std::back_inserter(output_->buf));
+ return true;
+}
+
+#endif
+
namespace detail {
#define DECLARE_OP_SCOPE(name, op) \
@@ -192,8 +274,8 @@
return false;
} else {
// using building functions as this is a rare codepath
- ScopedPyObject newiobuf(
- PyObject_CallFunction(input_.refill_callable.get(), refill_signature, *output, rlen, len, NULL));
+ ScopedPyObject newiobuf(PyObject_CallFunction(input_.refill_callable.get(), refill_signature,
+ *output, rlen, len, NULL));
if (!newiobuf) {
return false;
}
diff --git a/lib/py/src/ext/types.cpp b/lib/py/src/ext/types.cpp
index f3a29a2..849ab2f 100644
--- a/lib/py/src/ext/types.cpp
+++ b/lib/py/src/ext/types.cpp
@@ -26,7 +26,11 @@
PyObject* ThriftModule = NULL;
+#if PY_MAJOR_VERSION < 3
char refill_signature[] = {'s', '#', 'i'};
+#else
+const char* refill_signature = "y#i";
+#endif
bool parse_struct_item_spec(StructItemSpec* dest, PyObject* spec_tuple) {
// i'd like to use ParseArgs here, but it seems to be a bottleneck.
diff --git a/lib/py/src/ext/types.h b/lib/py/src/ext/types.h
index 749bb68..0dd5d96 100644
--- a/lib/py/src/ext/types.h
+++ b/lib/py/src/ext/types.h
@@ -22,6 +22,18 @@
#include <Python.h>
+#if PY_MAJOR_VERSION >= 3
+
+#include <vector>
+
+// TODO: better macros
+#define PyInt_AsLong(v) PyLong_AsLong(v)
+#define PyInt_FromLong(v) PyLong_FromLong(v)
+
+#define PyString_InternFromString(v) PyUnicode_InternFromString(v)
+
+#endif
+
#define INTERN_STRING(value) _intern_##value
#define INT_CONV_ERROR_OCCURRED(v) (((v) == -1) && PyErr_Occurred())
@@ -104,8 +116,16 @@
ScopedPyObject refill_callable;
};
+#if PY_MAJOR_VERSION < 3
extern char refill_signature[3];
typedef PyObject EncodeBuffer;
+#else
+extern const char* refill_signature;
+struct EncodeBuffer {
+ std::vector<char> buf;
+ size_t pos;
+};
+#endif
/**
* A cache of the spec_args for a set or list,
diff --git a/lib/py/test/_import_local_thrift.py b/lib/py/test/_import_local_thrift.py
index 1741669..d223122 100644
--- a/lib/py/test/_import_local_thrift.py
+++ b/lib/py/test/_import_local_thrift.py
@@ -1,13 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import glob
import os
import sys
-
SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(SCRIPT_DIR)))
-if sys.version_info[0] == 2:
- import glob
- libdir = glob.glob(os.path.join(ROOT_DIR, 'lib', 'py', 'build', 'lib.*'))[0]
- sys.path.insert(0, libdir)
-else:
- sys.path.insert(0, os.path.join(ROOT_DIR, 'lib', 'py', 'build', 'lib'))
+for libpath in glob.glob(os.path.join(ROOT_DIR, 'lib', 'py', 'build', 'lib.*')):
+ if libpath.endswith('-%d.%d' % (sys.version_info[0], sys.version_info[1])):
+ sys.path.insert(0, libpath)
+ break