THRIFT-3613 Port Python C extension to Python 3
Client: Python
Patch: Nobuaki Sukegawa
This closes #845
diff --git a/lib/py/src/compat.py b/lib/py/src/compat.py
index 42403ea..787149a 100644
--- a/lib/py/src/compat.py
+++ b/lib/py/src/compat.py
@@ -34,13 +34,7 @@
from io import BytesIO as BufferIO
def binary_to_str(bin_val):
- try:
- return bin_val.decode('utf8')
- except:
- return bin_val
+ return bin_val.decode('utf8')
def str_to_binary(str_val):
- try:
- return bytes(str_val, 'utf8')
- except:
- return str_val
+ return bytes(str_val, 'utf8')
diff --git a/lib/py/src/ext/module.cpp b/lib/py/src/ext/module.cpp
index 82e3fe7..5ffc155 100644
--- a/lib/py/src/ext/module.cpp
+++ b/lib/py/src/ext/module.cpp
@@ -142,6 +142,24 @@
{NULL, NULL, 0, NULL} /* Sentinel */
};
+#if PY_MAJOR_VERSION >= 3
+
+static struct PyModuleDef ThriftFastBinaryDef = {PyModuleDef_HEAD_INIT,
+ "thrift.protocol.fastbinary",
+ NULL,
+ 0,
+ ThriftFastBinaryMethods,
+ NULL,
+ NULL,
+ NULL,
+ NULL};
+
+#define INITERROR return NULL;
+
+PyObject* PyInit_fastbinary() {
+
+#else
+
#define INITERROR return;
void initfastbinary() {
@@ -150,6 +168,8 @@
if (PycStringIO == NULL)
INITERROR
+#endif
+
const rlim_t kStackSize = 16 * 1024 * 1024; // min stack size = 16 MB
struct rlimit rl;
int result;
@@ -181,9 +201,16 @@
#undef INIT_INTERN_STRING
PyObject* module =
+#if PY_MAJOR_VERSION >= 3
+ PyModule_Create(&ThriftFastBinaryDef);
+#else
Py_InitModule("thrift.protocol.fastbinary", ThriftFastBinaryMethods);
+#endif
if (module == NULL)
INITERROR;
+#if PY_MAJOR_VERSION >= 3
+ return module;
+#endif
}
}
diff --git a/lib/py/src/ext/protocol.tcc b/lib/py/src/ext/protocol.tcc
index 3df83a1..554ba6e 100644
--- a/lib/py/src/ext/protocol.tcc
+++ b/lib/py/src/ext/protocol.tcc
@@ -23,12 +23,18 @@
#define CHECK_RANGE(v, min, max) (((v) <= (max)) && ((v) >= (min)))
#define INIT_OUTBUF_SIZE 128
+#if PY_MAJOR_VERSION < 3
#include <cStringIO.h>
+#else
+#include <algorithm>
+#endif
namespace apache {
namespace thrift {
namespace py {
+#if PY_MAJOR_VERSION < 3
+
namespace detail {
inline bool input_check(PyObject* input) {
@@ -101,6 +107,82 @@
return true;
}
+#else
+
+namespace detail {
+
+inline bool input_check(PyObject* input) {
+ // TODO: Check for BytesIO type
+ return true;
+}
+
+inline EncodeBuffer* new_encode_buffer(size_t size) {
+ EncodeBuffer* buffer = new EncodeBuffer;
+ buffer->buf.reserve(size);
+ buffer->pos = 0;
+ return buffer;
+}
+
+struct bytesio {
+ PyObject_HEAD
+#if PY_MINOR_VERSION < 5
+ char* buf;
+#else
+ PyObject* buf;
+#endif
+ Py_ssize_t pos;
+ Py_ssize_t string_size;
+};
+
+inline int read_buffer(PyObject* buf, char** output, int len) {
+ bytesio* buf2 = reinterpret_cast<bytesio*>(buf);
+#if PY_MINOR_VERSION < 5
+ *output = buf2->buf + buf2->pos;
+#else
+ *output = PyBytes_AS_STRING(buf2->buf) + buf2->pos;
+#endif
+ Py_ssize_t pos0 = buf2->pos;
+ buf2->pos = std::min(buf2->pos + static_cast<Py_ssize_t>(len), buf2->string_size);
+ return static_cast<int>(buf2->pos - pos0);
+}
+}
+
+template <typename Impl>
+inline ProtocolBase<Impl>::~ProtocolBase() {
+ if (output_) {
+ delete output_;
+ }
+}
+
+template <typename Impl>
+inline bool ProtocolBase<Impl>::isUtf8(PyObject* typeargs) {
+ // while condition for py2 is "arg == 'UTF8'", it should be "arg != 'BINARY'" for py3.
+ // HACK: check the length and don't bother reading the value
+ return !PyUnicode_Check(typeargs) || PyUnicode_GET_LENGTH(typeargs) != 6;
+}
+
+template <typename Impl>
+PyObject* ProtocolBase<Impl>::getEncodedValue() {
+ return PyBytes_FromStringAndSize(output_->buf.data(), output_->buf.size());
+}
+
+template <typename Impl>
+inline bool ProtocolBase<Impl>::writeBuffer(char* data, size_t size) {
+ size_t need = size + output_->pos;
+ if (output_->buf.capacity() < need) {
+ try {
+ output_->buf.reserve(need);
+ } catch (std::bad_alloc& ex) {
+ PyErr_SetString(PyExc_MemoryError, "Failed to allocate write buffer");
+ return false;
+ }
+ }
+ std::copy(data, data + size, std::back_inserter(output_->buf));
+ return true;
+}
+
+#endif
+
namespace detail {
#define DECLARE_OP_SCOPE(name, op) \
@@ -192,8 +274,8 @@
return false;
} else {
// using building functions as this is a rare codepath
- ScopedPyObject newiobuf(
- PyObject_CallFunction(input_.refill_callable.get(), refill_signature, *output, rlen, len, NULL));
+ ScopedPyObject newiobuf(PyObject_CallFunction(input_.refill_callable.get(), refill_signature,
+ *output, rlen, len, NULL));
if (!newiobuf) {
return false;
}
diff --git a/lib/py/src/ext/types.cpp b/lib/py/src/ext/types.cpp
index f3a29a2..849ab2f 100644
--- a/lib/py/src/ext/types.cpp
+++ b/lib/py/src/ext/types.cpp
@@ -26,7 +26,11 @@
PyObject* ThriftModule = NULL;
+#if PY_MAJOR_VERSION < 3
char refill_signature[] = {'s', '#', 'i'};
+#else
+const char* refill_signature = "y#i";
+#endif
bool parse_struct_item_spec(StructItemSpec* dest, PyObject* spec_tuple) {
// i'd like to use ParseArgs here, but it seems to be a bottleneck.
diff --git a/lib/py/src/ext/types.h b/lib/py/src/ext/types.h
index 749bb68..0dd5d96 100644
--- a/lib/py/src/ext/types.h
+++ b/lib/py/src/ext/types.h
@@ -22,6 +22,18 @@
#include <Python.h>
+#if PY_MAJOR_VERSION >= 3
+
+#include <vector>
+
+// TODO: better macros
+#define PyInt_AsLong(v) PyLong_AsLong(v)
+#define PyInt_FromLong(v) PyLong_FromLong(v)
+
+#define PyString_InternFromString(v) PyUnicode_InternFromString(v)
+
+#endif
+
#define INTERN_STRING(value) _intern_##value
#define INT_CONV_ERROR_OCCURRED(v) (((v) == -1) && PyErr_Occurred())
@@ -104,8 +116,16 @@
ScopedPyObject refill_callable;
};
+#if PY_MAJOR_VERSION < 3
extern char refill_signature[3];
typedef PyObject EncodeBuffer;
+#else
+extern const char* refill_signature;
+struct EncodeBuffer {
+ std::vector<char> buf;
+ size_t pos;
+};
+#endif
/**
* A cache of the spec_args for a set or list,