THRIFT-3613 Port Python C extension to Python 3
Client: Python
Patch: Nobuaki Sukegawa

This closes #845
diff --git a/lib/py/src/compat.py b/lib/py/src/compat.py
index 42403ea..787149a 100644
--- a/lib/py/src/compat.py
+++ b/lib/py/src/compat.py
@@ -34,13 +34,7 @@
     from io import BytesIO as BufferIO
 
     def binary_to_str(bin_val):
-        try:
-            return bin_val.decode('utf8')
-        except:
-            return bin_val
+        return bin_val.decode('utf8')
 
     def str_to_binary(str_val):
-        try:
-            return bytes(str_val, 'utf8')
-        except:
-            return str_val
+        return bytes(str_val, 'utf8')
diff --git a/lib/py/src/ext/module.cpp b/lib/py/src/ext/module.cpp
index 82e3fe7..5ffc155 100644
--- a/lib/py/src/ext/module.cpp
+++ b/lib/py/src/ext/module.cpp
@@ -142,6 +142,24 @@
     {NULL, NULL, 0, NULL} /* Sentinel */
 };
 
+#if PY_MAJOR_VERSION >= 3
+
+static struct PyModuleDef ThriftFastBinaryDef = {PyModuleDef_HEAD_INIT,
+                                                 "thrift.protocol.fastbinary",
+                                                 NULL,
+                                                 0,
+                                                 ThriftFastBinaryMethods,
+                                                 NULL,
+                                                 NULL,
+                                                 NULL,
+                                                 NULL};
+
+#define INITERROR return NULL;
+
+PyObject* PyInit_fastbinary() {
+
+#else
+
 #define INITERROR return;
 
 void initfastbinary() {
@@ -150,6 +168,8 @@
   if (PycStringIO == NULL)
     INITERROR
 
+#endif
+
   const rlim_t kStackSize = 16 * 1024 * 1024; // min stack size = 16 MB
   struct rlimit rl;
   int result;
@@ -181,9 +201,16 @@
 #undef INIT_INTERN_STRING
 
   PyObject* module =
+#if PY_MAJOR_VERSION >= 3
+      PyModule_Create(&ThriftFastBinaryDef);
+#else
       Py_InitModule("thrift.protocol.fastbinary", ThriftFastBinaryMethods);
+#endif
   if (module == NULL)
     INITERROR;
 
+#if PY_MAJOR_VERSION >= 3
+  return module;
+#endif
 }
 }
diff --git a/lib/py/src/ext/protocol.tcc b/lib/py/src/ext/protocol.tcc
index 3df83a1..554ba6e 100644
--- a/lib/py/src/ext/protocol.tcc
+++ b/lib/py/src/ext/protocol.tcc
@@ -23,12 +23,18 @@
 #define CHECK_RANGE(v, min, max) (((v) <= (max)) && ((v) >= (min)))
 #define INIT_OUTBUF_SIZE 128
 
+#if PY_MAJOR_VERSION < 3
 #include <cStringIO.h>
+#else
+#include <algorithm>
+#endif
 
 namespace apache {
 namespace thrift {
 namespace py {
 
+#if PY_MAJOR_VERSION < 3
+
 namespace detail {
 
 inline bool input_check(PyObject* input) {
@@ -101,6 +107,82 @@
   return true;
 }
 
+#else
+
+namespace detail {
+
+inline bool input_check(PyObject* input) {
+  // TODO: Check for BytesIO type
+  return true;
+}
+
+inline EncodeBuffer* new_encode_buffer(size_t size) {
+  EncodeBuffer* buffer = new EncodeBuffer;
+  buffer->buf.reserve(size);
+  buffer->pos = 0;
+  return buffer;
+}
+
+struct bytesio {
+  PyObject_HEAD
+#if PY_MINOR_VERSION < 5
+      char* buf;
+#else
+      PyObject* buf;
+#endif
+  Py_ssize_t pos;
+  Py_ssize_t string_size;
+};
+
+inline int read_buffer(PyObject* buf, char** output, int len) {
+  bytesio* buf2 = reinterpret_cast<bytesio*>(buf);
+#if PY_MINOR_VERSION < 5
+  *output = buf2->buf + buf2->pos;
+#else
+  *output = PyBytes_AS_STRING(buf2->buf) + buf2->pos;
+#endif
+  Py_ssize_t pos0 = buf2->pos;
+  buf2->pos = std::min(buf2->pos + static_cast<Py_ssize_t>(len), buf2->string_size);
+  return static_cast<int>(buf2->pos - pos0);
+}
+}
+
+template <typename Impl>
+inline ProtocolBase<Impl>::~ProtocolBase() {
+  if (output_) {
+    delete output_;
+  }
+}
+
+template <typename Impl>
+inline bool ProtocolBase<Impl>::isUtf8(PyObject* typeargs) {
+  // while condition for py2 is "arg == 'UTF8'", it should be "arg != 'BINARY'" for py3.
+  // HACK: check the length and don't bother reading the value
+  return !PyUnicode_Check(typeargs) || PyUnicode_GET_LENGTH(typeargs) != 6;
+}
+
+template <typename Impl>
+PyObject* ProtocolBase<Impl>::getEncodedValue() {
+  return PyBytes_FromStringAndSize(output_->buf.data(), output_->buf.size());
+}
+
+template <typename Impl>
+inline bool ProtocolBase<Impl>::writeBuffer(char* data, size_t size) {
+  size_t need = size + output_->pos;
+  if (output_->buf.capacity() < need) {
+    try {
+      output_->buf.reserve(need);
+    } catch (std::bad_alloc& ex) {
+      PyErr_SetString(PyExc_MemoryError, "Failed to allocate write buffer");
+      return false;
+    }
+  }
+  std::copy(data, data + size, std::back_inserter(output_->buf));
+  return true;
+}
+
+#endif
+
 namespace detail {
 
 #define DECLARE_OP_SCOPE(name, op)                                                                 \
@@ -192,8 +274,8 @@
     return false;
   } else {
     // using building functions as this is a rare codepath
-    ScopedPyObject newiobuf(
-        PyObject_CallFunction(input_.refill_callable.get(), refill_signature, *output, rlen, len, NULL));
+    ScopedPyObject newiobuf(PyObject_CallFunction(input_.refill_callable.get(), refill_signature,
+                                                  *output, rlen, len, NULL));
     if (!newiobuf) {
       return false;
     }
diff --git a/lib/py/src/ext/types.cpp b/lib/py/src/ext/types.cpp
index f3a29a2..849ab2f 100644
--- a/lib/py/src/ext/types.cpp
+++ b/lib/py/src/ext/types.cpp
@@ -26,7 +26,11 @@
 
 PyObject* ThriftModule = NULL;
 
+#if PY_MAJOR_VERSION < 3
 char refill_signature[] = {'s', '#', 'i'};
+#else
+const char* refill_signature = "y#i";
+#endif
 
 bool parse_struct_item_spec(StructItemSpec* dest, PyObject* spec_tuple) {
   // i'd like to use ParseArgs here, but it seems to be a bottleneck.
diff --git a/lib/py/src/ext/types.h b/lib/py/src/ext/types.h
index 749bb68..0dd5d96 100644
--- a/lib/py/src/ext/types.h
+++ b/lib/py/src/ext/types.h
@@ -22,6 +22,18 @@
 
 #include <Python.h>
 
+#if PY_MAJOR_VERSION >= 3
+
+#include <vector>
+
+// TODO: better macros
+#define PyInt_AsLong(v) PyLong_AsLong(v)
+#define PyInt_FromLong(v) PyLong_FromLong(v)
+
+#define PyString_InternFromString(v) PyUnicode_InternFromString(v)
+
+#endif
+
 #define INTERN_STRING(value) _intern_##value
 
 #define INT_CONV_ERROR_OCCURRED(v) (((v) == -1) && PyErr_Occurred())
@@ -104,8 +116,16 @@
   ScopedPyObject refill_callable;
 };
 
+#if PY_MAJOR_VERSION < 3
 extern char refill_signature[3];
 typedef PyObject EncodeBuffer;
+#else
+extern const char* refill_signature;
+struct EncodeBuffer {
+  std::vector<char> buf;
+  size_t pos;
+};
+#endif
 
 /**
  * A cache of the spec_args for a set or list,