THRIFT-3612 Add Python C extension for compact protocol
Client: Python
Patch: Nobuaki Sukegawa

This closes #844
diff --git a/compiler/cpp/src/generate/t_py_generator.cc b/compiler/cpp/src/generate/t_py_generator.cc
index 31f5d91..d1e2fa3 100644
--- a/compiler/cpp/src/generate/t_py_generator.cc
+++ b/compiler/cpp/src/generate/t_py_generator.cc
@@ -86,7 +86,7 @@
       gen_dynbaseclass_ = "TBase";
       gen_dynbaseclass_frozen_ = "TFrozenBase";
       gen_dynbaseclass_exc_ = "TExceptionBase";
-      import_dynbase_ = "from thrift.protocol.TBase import TBase, TFrozenBase, TExceptionBase, TTransport\n";
+      import_dynbase_ = "from thrift.protocol.TBase import TBase, TFrozenBase, TExceptionBase\n";
     }
 
     iter = parsed_options.find("dynbase");
@@ -241,7 +241,6 @@
   std::string py_autogen_comment();
   std::string py_imports();
   std::string render_includes();
-  std::string render_fastbinary_includes();
   std::string declare_argument(t_field* tfield);
   std::string render_field_default_value(t_field* tfield);
   std::string type_name(t_type* ttype);
@@ -384,11 +383,11 @@
   f_init.close();
 
   // Print header
-  f_types_ <<
-    py_autogen_comment() << endl <<
-    py_imports() << endl <<
-    render_includes() << endl <<
-    render_fastbinary_includes();
+  f_types_ << py_autogen_comment() << endl
+           << py_imports() << endl
+           << render_includes() << endl
+           << "from thrift.transport import TTransport" << endl
+           << import_dynbase_;
 
   f_consts_ <<
     py_autogen_comment() << endl <<
@@ -409,24 +408,6 @@
 }
 
 /**
- * Renders all the imports necessary to use the accelerated TBinaryProtocol
- */
-string t_py_generator::render_fastbinary_includes() {
-  string hdr = "";
-  if (gen_dynamic_) {
-    hdr += std::string(import_dynbase_);
-  } else {
-    hdr += "from thrift.transport import TTransport\n"
-           "from thrift.protocol import TBinaryProtocol, TProtocol\n"
-           "try:\n" +
-           indent_str() + "from thrift.protocol import fastbinary\n"
-           "except:\n" +
-           indent_str() + "fastbinary = None\n";
-  }
-  return hdr;
-}
-
-/**
  * Autogen'd comment
  */
 string t_py_generator::py_autogen_comment() {
@@ -902,19 +883,16 @@
 
   const char* id = is_immutable(tstruct) ? "cls" : "self";
 
-  indent(out) << "if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated "
+  indent(out) << "if iprot._fast_decode is not None "
                  "and isinstance(iprot.trans, TTransport.CReadableTransport) "
-                 "and " << id << ".thrift_spec is not None "
-                 "and fastbinary is not None:" << endl;
+                 "and "
+              << id << ".thrift_spec is not None:" << endl;
   indent_up();
 
   if (is_immutable(tstruct)) {
-    indent(out)
-        << "return fastbinary.decode_binary(None, iprot.trans, (cls, cls.thrift_spec), iprot.string_length_limit, iprot.container_length_limit)"
-        << endl;
+    indent(out) << "return iprot._fast_decode(None, iprot, (cls, cls.thrift_spec))" << endl;
   } else {
-    indent(out) << "fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec), iprot.string_length_limit, iprot.container_length_limit)"
-                << endl;
+    indent(out) << "iprot._fast_decode(self, iprot, (self.__class__, self.thrift_spec))" << endl;
     indent(out) << "return" << endl;
   }
   indent_down();
@@ -991,13 +969,11 @@
   indent(out) << "def write(self, oprot):" << endl;
   indent_up();
 
-  indent(out) << "if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated "
-                 "and self.thrift_spec is not None "
-                 "and fastbinary is not None:" << endl;
+  indent(out) << "if oprot._fast_encode is not None and self.thrift_spec is not None:" << endl;
   indent_up();
 
   indent(out)
-      << "oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec)))"
+      << "oprot.trans.write(oprot._fast_encode(self, (self.__class__, self.thrift_spec)))"
       << endl;
   indent(out) << "return" << endl;
   indent_down();
@@ -1074,7 +1050,8 @@
   f_service_ << "import logging" << endl
              << "from .ttypes import *" << endl
              << "from thrift.Thrift import TProcessor" << endl
-             << render_fastbinary_includes();
+             << "from thrift.transport import TTransport" << endl
+             << import_dynbase_;
 
   if (gen_twisted_) {
     f_service_ << "from zope.interface import Interface, implements" << endl
@@ -1546,11 +1523,8 @@
     indent_str() << "from urllib.parse import urlparse" << endl <<
     "else:" << endl <<
     indent_str() << "from urlparse import urlparse" << endl <<
-    "from thrift.transport import TTransport" << endl <<
-    "from thrift.transport import TSocket" << endl <<
-    "from thrift.transport import TSSLSocket" << endl <<
-    "from thrift.transport import THttpClient" << endl <<
-    "from thrift.protocol import TBinaryProtocol" << endl <<
+    "from thrift.transport import TTransport, TSocket, TSSLSocket, THttpClient" << endl <<
+    "from thrift.protocol.TBinaryProtocol import TBinaryProtocol" << endl <<
     endl;
 
   f_remote <<
@@ -1635,7 +1609,7 @@
            << indent_str() << indent_str() << "transport = TTransport.TFramedTransport(socket)" << endl
            << indent_str() << "else:" << endl
            << indent_str() << indent_str() << "transport = TTransport.TBufferedTransport(socket)" << endl
-           << "protocol = TBinaryProtocol.TBinaryProtocol(transport)" << endl
+           << "protocol = TBinaryProtocol(transport)" << endl
            << "client = " << service_name_ << ".Client(protocol)" << endl
            << "transport.open()" << endl
            << endl;
diff --git a/lib/py/setup.py b/lib/py/setup.py
index f57c1a1..67e9d52 100644
--- a/lib/py/setup.py
+++ b/lib/py/setup.py
@@ -34,7 +34,7 @@
 if 'vagrant' in str(os.environ):
     del os.link
 
-include_dirs = []
+include_dirs = ['src']
 if sys.platform == 'win32':
     include_dirs.append('compat/win32')
     ext_errors = (CCompilerError, DistutilsExecError, DistutilsPlatformError, IOError)
@@ -65,7 +65,12 @@
         extensions = dict(
             ext_modules=[
                 Extension('thrift.protocol.fastbinary',
-                          sources=['src/protocol/fastbinary.c'],
+                          sources=[
+                              'src/ext/module.cpp',
+                              'src/ext/types.cpp',
+                              'src/ext/binary.cpp',
+                              'src/ext/compact.cpp',
+                          ],
                           include_dirs=include_dirs,
                           )
             ],
diff --git a/lib/py/src/ext/binary.cpp b/lib/py/src/ext/binary.cpp
new file mode 100644
index 0000000..a83899f
--- /dev/null
+++ b/lib/py/src/ext/binary.cpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "ext/binary.h"
+namespace apache {
+namespace thrift {
+namespace py {
+
+bool BinaryProtocol::readFieldBegin(TType& type, int16_t& tag) {
+  uint8_t b = 0;
+  if (!readByte(b)) {
+    return false;
+  }
+  if (b == -1) {
+    return false;
+  }
+  type = static_cast<TType>(b);
+  if (type == T_STOP) {
+    return true;
+  }
+  return readI16(tag);
+}
+}
+}
+}
diff --git a/lib/py/src/ext/binary.h b/lib/py/src/ext/binary.h
new file mode 100644
index 0000000..dedeec3
--- /dev/null
+++ b/lib/py/src/ext/binary.h
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THRIFT_PY_BINARY_H
+#define THRIFT_PY_BINARY_H
+
+#include <Python.h>
+#include "ext/protocol.h"
+#include "ext/endian.h"
+#include <stdint.h>
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+class BinaryProtocol : public ProtocolBase<BinaryProtocol> {
+public:
+  virtual ~BinaryProtocol() {}
+
+  void writeI8(int8_t val) { writeBuffer(reinterpret_cast<char*>(&val), sizeof(int8_t)); }
+
+  void writeI16(int16_t val) {
+    int16_t net = static_cast<int16_t>(htons(val));
+    writeBuffer(reinterpret_cast<char*>(&net), sizeof(int16_t));
+  }
+
+  void writeI32(int32_t val) {
+    int32_t net = static_cast<int32_t>(htonl(val));
+    writeBuffer(reinterpret_cast<char*>(&net), sizeof(int32_t));
+  }
+
+  void writeI64(int64_t val) {
+    int64_t net = static_cast<int64_t>(htonll(val));
+    writeBuffer(reinterpret_cast<char*>(&net), sizeof(int64_t));
+  }
+
+  void writeDouble(double dub) {
+    // Unfortunately, bitwise_cast doesn't work in C.  Bad C!
+    union {
+      double f;
+      int64_t t;
+    } transfer;
+    transfer.f = dub;
+    writeI64(transfer.t);
+  }
+
+  void writeBool(int v) { writeByte(static_cast<uint8_t>(v)); }
+
+  void writeString(PyObject* value, int32_t len) {
+    writeI32(len);
+    writeBuffer(PyBytes_AS_STRING(value), len);
+  }
+
+  bool writeListBegin(PyObject* value, const SetListTypeArgs& parsedargs, int32_t len) {
+    writeByte(parsedargs.element_type);
+    writeI32(len);
+    return true;
+  }
+
+  bool writeMapBegin(PyObject* value, const MapTypeArgs& parsedargs, int32_t len) {
+    writeByte(parsedargs.ktag);
+    writeByte(parsedargs.vtag);
+    writeI32(len);
+    return true;
+  }
+
+  bool writeStructBegin() { return true; }
+  bool writeStructEnd() { return true; }
+  bool writeField(PyObject* value, const StructItemSpec& parsedspec) {
+    writeByte(static_cast<uint8_t>(parsedspec.type));
+    writeI16(parsedspec.tag);
+    return encodeValue(value, parsedspec.type, parsedspec.typeargs);
+  }
+
+  void writeFieldStop() { writeByte(static_cast<uint8_t>(T_STOP)); }
+
+  bool readBool(bool& val) {
+    char* buf;
+    if (!readBytes(&buf, 1)) {
+      return false;
+    }
+    val = buf[0] == 1;
+    return true;
+  }
+
+  bool readI8(int8_t& val) {
+    char* buf;
+    if (!readBytes(&buf, 1)) {
+      return false;
+    }
+    val = buf[0];
+    return true;
+  }
+
+  bool readI16(int16_t& val) {
+    char* buf;
+    if (!readBytes(&buf, sizeof(int16_t))) {
+      return false;
+    }
+    val = static_cast<int16_t>(ntohs(*reinterpret_cast<int16_t*>(buf)));
+    return true;
+  }
+
+  bool readI32(int32_t& val) {
+    char* buf;
+    if (!readBytes(&buf, sizeof(int32_t))) {
+      return false;
+    }
+    val = static_cast<int32_t>(ntohl(*reinterpret_cast<int32_t*>(buf)));
+    return true;
+  }
+
+  bool readI64(int64_t& val) {
+    char* buf;
+    if (!readBytes(&buf, sizeof(int64_t))) {
+      return false;
+    }
+    val = static_cast<int64_t>(ntohll(*reinterpret_cast<int64_t*>(buf)));
+    return true;
+  }
+
+  bool readDouble(double& val) {
+    union {
+      int64_t f;
+      double t;
+    } transfer;
+
+    if (!readI64(transfer.f)) {
+      return false;
+    }
+    val = transfer.t;
+    return true;
+  }
+
+  int32_t readString(char** buf) {
+    int32_t len = 0;
+    if (!readI32(len) || !checkLengthLimit(len, stringLimit()) || !readBytes(buf, len)) {
+      return -1;
+    }
+    return len;
+  }
+
+  int32_t readListBegin(TType& etype) {
+    int32_t len;
+    uint8_t b = 0;
+    if (!readByte(b) || !readI32(len) || !checkLengthLimit(len, containerLimit())) {
+      return -1;
+    }
+    etype = static_cast<TType>(b);
+    return len;
+  }
+
+  int32_t readMapBegin(TType& ktype, TType& vtype) {
+    int32_t len;
+    uint8_t k, v;
+    if (!readByte(k) || !readByte(v) || !readI32(len) || !checkLengthLimit(len, containerLimit())) {
+      return -1;
+    }
+    ktype = static_cast<TType>(k);
+    vtype = static_cast<TType>(v);
+    return len;
+  }
+
+  bool readStructBegin() { return true; }
+  bool readStructEnd() { return true; }
+
+  bool readFieldBegin(TType& type, int16_t& tag);
+
+#define SKIPBYTES(n)                                                                               \
+  do {                                                                                             \
+    if (!readBytes(&dummy_buf_, (n))) {                                                            \
+      return false;                                                                                \
+    }                                                                                              \
+    return true;                                                                                   \
+  } while (0)
+
+  bool skipBool() { SKIPBYTES(1); }
+  bool skipByte() { SKIPBYTES(1); }
+  bool skipI16() { SKIPBYTES(2); }
+  bool skipI32() { SKIPBYTES(4); }
+  bool skipI64() { SKIPBYTES(8); }
+  bool skipDouble() { SKIPBYTES(8); }
+  bool skipString() {
+    int32_t len;
+    if (!readI32(len)) {
+      return false;
+    }
+    SKIPBYTES(len);
+  }
+#undef SKIPBYTES
+
+private:
+  char* dummy_buf_;
+};
+}
+}
+}
+#endif // THRIFT_PY_BINARY_H
diff --git a/lib/py/src/ext/compact.cpp b/lib/py/src/ext/compact.cpp
new file mode 100644
index 0000000..15a99a0
--- /dev/null
+++ b/lib/py/src/ext/compact.cpp
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "ext/compact.h"
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+const uint8_t CompactProtocol::TTypeToCType[] = {
+    CT_STOP,         // T_STOP
+    0,               // unused
+    CT_BOOLEAN_TRUE, // T_BOOL
+    CT_BYTE,         // T_BYTE
+    CT_DOUBLE,       // T_DOUBLE
+    0,               // unused
+    CT_I16,          // T_I16
+    0,               // unused
+    CT_I32,          // T_I32
+    0,               // unused
+    CT_I64,          // T_I64
+    CT_BINARY,       // T_STRING
+    CT_STRUCT,       // T_STRUCT
+    CT_MAP,          // T_MAP
+    CT_SET,          // T_SET
+    CT_LIST,         // T_LIST
+};
+
+bool CompactProtocol::readFieldBegin(TType& type, int16_t& tag) {
+  uint8_t b;
+  if (!readByte(b)) {
+    return false;
+  }
+  uint8_t ctype = b & 0xf;
+  type = getTType(ctype);
+  if (type == -1) {
+    return false;
+  } else if (type == T_STOP) {
+    tag = 0;
+    return true;
+  }
+  uint8_t diff = (b & 0xf0) >> 4;
+  if (diff) {
+    tag = readTags_.top() + diff;
+  } else if (!readI16(tag)) {
+    readTags_.top() = -1;
+    return false;
+  }
+  if (ctype == CT_BOOLEAN_FALSE || ctype == CT_BOOLEAN_TRUE) {
+    readBool_.exists = true;
+    readBool_.value = ctype == CT_BOOLEAN_TRUE;
+  }
+  readTags_.top() = tag;
+  return true;
+}
+
+TType CompactProtocol::getTType(uint8_t type) {
+  switch (type) {
+  case T_STOP:
+    return T_STOP;
+  case CT_BOOLEAN_FALSE:
+  case CT_BOOLEAN_TRUE:
+    return T_BOOL;
+  case CT_BYTE:
+    return T_BYTE;
+  case CT_I16:
+    return T_I16;
+  case CT_I32:
+    return T_I32;
+  case CT_I64:
+    return T_I64;
+  case CT_DOUBLE:
+    return T_DOUBLE;
+  case CT_BINARY:
+    return T_STRING;
+  case CT_LIST:
+    return T_LIST;
+  case CT_SET:
+    return T_SET;
+  case CT_MAP:
+    return T_MAP;
+  case CT_STRUCT:
+    return T_STRUCT;
+  default:
+    PyErr_Format(PyExc_TypeError, "don't know what type: %d", type);
+    return static_cast<TType>(-1);
+  }
+}
+}
+}
+}
diff --git a/lib/py/src/ext/compact.h b/lib/py/src/ext/compact.h
new file mode 100644
index 0000000..5bba237
--- /dev/null
+++ b/lib/py/src/ext/compact.h
@@ -0,0 +1,367 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THRIFT_PY_COMPACT_H
+#define THRIFT_PY_COMPACT_H
+
+#include <Python.h>
+#include "ext/protocol.h"
+#include "ext/endian.h"
+#include <stdint.h>
+#include <stack>
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+class CompactProtocol : public ProtocolBase<CompactProtocol> {
+public:
+  CompactProtocol() { readBool_.exists = false; }
+
+  virtual ~CompactProtocol() {}
+
+  void writeI8(int8_t val) { writeBuffer(reinterpret_cast<char*>(&val), 1); }
+
+  void writeI16(int16_t val) { writeVarint(toZigZag(val)); }
+
+  int writeI32(int32_t val) { return writeVarint(toZigZag(val)); }
+
+  void writeI64(int64_t val) { writeVarint64(toZigZag64(val)); }
+
+  void writeDouble(double dub) {
+    union {
+      double f;
+      int64_t t;
+    } transfer;
+    transfer.f = htolell(dub);
+    writeBuffer(reinterpret_cast<char*>(&transfer.t), sizeof(int64_t));
+  }
+
+  void writeBool(int v) { writeByte(static_cast<uint8_t>(v ? CT_BOOLEAN_TRUE : CT_BOOLEAN_FALSE)); }
+
+  void writeString(PyObject* value, int32_t len) {
+    writeVarint(len);
+    writeBuffer(PyBytes_AS_STRING(value), len);
+  }
+
+  bool writeListBegin(PyObject* value, const SetListTypeArgs& args, int32_t len) {
+    int ctype = toCompactType(args.element_type);
+    if (len <= 14) {
+      writeByte(static_cast<uint8_t>(len << 4 | ctype));
+    } else {
+      writeByte(0xf0 | ctype);
+      writeVarint(len);
+    }
+    return true;
+  }
+
+  bool writeMapBegin(PyObject* value, const MapTypeArgs& args, int32_t len) {
+    if (len == 0) {
+      writeByte(0);
+      return true;
+    }
+    int ctype = toCompactType(args.ktag) << 4 | toCompactType(args.vtag);
+    writeVarint(len);
+    writeByte(ctype);
+    return true;
+  }
+
+  bool writeStructBegin() {
+    writeTags_.push(0);
+    return true;
+  }
+  bool writeStructEnd() {
+    writeTags_.pop();
+    return true;
+  }
+
+  bool writeField(PyObject* value, const StructItemSpec& spec) {
+    if (spec.type == T_BOOL) {
+      doWriteFieldBegin(spec, PyObject_IsTrue(value) ? CT_BOOLEAN_TRUE : CT_BOOLEAN_FALSE);
+      return true;
+    } else {
+      doWriteFieldBegin(spec, toCompactType(spec.type));
+      return encodeValue(value, spec.type, spec.typeargs);
+    }
+  }
+
+  void writeFieldStop() { writeByte(0); }
+
+  bool readBool(bool& val) {
+    if (readBool_.exists) {
+      readBool_.exists = false;
+      val = readBool_.value;
+      return true;
+    }
+    char* buf;
+    if (!readBytes(&buf, 1)) {
+      return false;
+    }
+    val = buf[0] == CT_BOOLEAN_TRUE;
+    return true;
+  }
+  bool readI8(int8_t& val) {
+    char* buf;
+    if (!readBytes(&buf, 1)) {
+      return false;
+    }
+    val = buf[0];
+    return true;
+  }
+
+  bool readI16(int16_t& val) {
+    uint16_t uval;
+    if (readVarint<uint16_t, 3>(uval)) {
+      val = fromZigZag<int16_t, uint16_t>(uval);
+      return true;
+    }
+    return false;
+  }
+
+  bool readI32(int32_t& val) {
+    uint32_t uval;
+    if (readVarint<uint32_t, 5>(uval)) {
+      val = fromZigZag<int32_t, uint32_t>(uval);
+      return true;
+    }
+    return false;
+  }
+
+  bool readI64(int64_t& val) {
+    uint64_t uval;
+    if (readVarint<uint64_t, 10>(uval)) {
+      val = fromZigZag<int64_t, uint64_t>(uval);
+      return true;
+    }
+    return false;
+  }
+
+  bool readDouble(double& val) {
+    union {
+      int64_t f;
+      double t;
+    } transfer;
+
+    char* buf;
+    if (!readBytes(&buf, 8)) {
+      return false;
+    }
+    transfer.f = letohll(*reinterpret_cast<int64_t*>(buf));
+    val = transfer.t;
+    return true;
+  }
+
+  int32_t readString(char** buf) {
+    uint32_t len;
+    if (!readVarint<uint32_t, 5>(len) || !checkLengthLimit(len, stringLimit())) {
+      return -1;
+    }
+    if (len == 0) {
+      return 0;
+    }
+    if (!readBytes(buf, len)) {
+      return -1;
+    }
+    return len;
+  }
+
+  int32_t readListBegin(TType& etype) {
+    uint8_t b;
+    if (!readByte(b)) {
+      return -1;
+    }
+    etype = getTType(b & 0xf);
+    if (etype == -1) {
+      return -1;
+    }
+    uint32_t len = (b >> 4) & 0xf;
+    if (len == 15 && !readVarint<uint32_t, 5>(len)) {
+      return -1;
+    }
+    if (!checkLengthLimit(len, containerLimit())) {
+      return -1;
+    }
+    return len;
+  }
+
+  int32_t readMapBegin(TType& ktype, TType& vtype) {
+    uint32_t len;
+    if (!readVarint<uint32_t, 5>(len) || !checkLengthLimit(len, containerLimit())) {
+      return -1;
+    }
+    if (len != 0) {
+      uint8_t kvType;
+      if (!readByte(kvType)) {
+        return -1;
+      }
+      ktype = getTType(kvType >> 4);
+      vtype = getTType(kvType & 0xf);
+      if (ktype == -1 || vtype == -1) {
+        return -1;
+      }
+    }
+    return len;
+  }
+
+  bool readStructBegin() {
+    readTags_.push(0);
+    return true;
+  }
+  bool readStructEnd() {
+    readTags_.pop();
+    return true;
+  }
+  bool readFieldBegin(TType& type, int16_t& tag);
+
+  bool skipBool() {
+    bool val;
+    return readBool(val);
+  }
+#define SKIPBYTES(n)                                                                               \
+  do {                                                                                             \
+    if (!readBytes(&dummy_buf_, (n))) {                                                            \
+      return false;                                                                                \
+    }                                                                                              \
+    return true;                                                                                   \
+  } while (0)
+  bool skipByte() { SKIPBYTES(1); }
+  bool skipDouble() { SKIPBYTES(8); }
+  bool skipI16() {
+    int16_t val;
+    return readI16(val);
+  }
+  bool skipI32() {
+    int32_t val;
+    return readI32(val);
+  }
+  bool skipI64() {
+    int64_t val;
+    return readI64(val);
+  }
+  bool skipString() {
+    uint32_t len;
+    if (!readVarint<uint32_t, 5>(len)) {
+      return false;
+    }
+    SKIPBYTES(len);
+  }
+#undef SKIPBYTES
+
+private:
+  enum Types {
+    CT_STOP = 0x00,
+    CT_BOOLEAN_TRUE = 0x01,
+    CT_BOOLEAN_FALSE = 0x02,
+    CT_BYTE = 0x03,
+    CT_I16 = 0x04,
+    CT_I32 = 0x05,
+    CT_I64 = 0x06,
+    CT_DOUBLE = 0x07,
+    CT_BINARY = 0x08,
+    CT_LIST = 0x09,
+    CT_SET = 0x0A,
+    CT_MAP = 0x0B,
+    CT_STRUCT = 0x0C
+  };
+
+  static const uint8_t TTypeToCType[];
+
+  TType getTType(uint8_t type);
+
+  int toCompactType(TType type) {
+    int i = static_cast<int>(type);
+    return i < 16 ? TTypeToCType[i] : -1;
+  }
+
+  uint32_t toZigZag(int32_t val) { return (val >> 31) ^ (val << 1); }
+
+  uint64_t toZigZag64(int64_t val) { return (val >> 63) ^ (val << 1); }
+
+  int writeVarint(uint32_t val) {
+    int cnt = 1;
+    while (val & ~0x7fU) {
+      writeByte(static_cast<char>((val & 0x7fU) | 0x80U));
+      val >>= 7;
+      ++cnt;
+    }
+    writeByte(static_cast<char>(val));
+    return cnt;
+  }
+
+  int writeVarint64(uint64_t val) {
+    int cnt = 1;
+    while (val & ~0x7fULL) {
+      writeByte(static_cast<char>((val & 0x7fULL) | 0x80ULL));
+      val >>= 7;
+      ++cnt;
+    }
+    writeByte(static_cast<char>(val));
+    return cnt;
+  }
+
+  template <typename T, int Max>
+  bool readVarint(T& result) {
+    uint8_t b;
+    T val = 0;
+    int shift = 0;
+    for (int i = 0; i < Max; ++i) {
+      if (!readByte(b)) {
+        return false;
+      }
+      if (b & 0x80) {
+        val |= static_cast<T>(b & 0x7f) << shift;
+      } else {
+        val |= static_cast<T>(b) << shift;
+        result = val;
+        return true;
+      }
+      shift += 7;
+    }
+    PyErr_Format(PyExc_OverflowError, "varint exceeded %d bytes", Max);
+    return false;
+  }
+
+  template <typename S, typename U>
+  S fromZigZag(U val) {
+    return (val >> 1) ^ static_cast<U>(-static_cast<S>(val & 1));
+  }
+
+  void doWriteFieldBegin(const StructItemSpec& spec, int ctype) {
+    int diff = spec.tag - writeTags_.top();
+    if (diff > 0 && diff <= 15) {
+      writeByte(static_cast<uint8_t>(diff << 4 | ctype));
+    } else {
+      writeByte(static_cast<uint8_t>(ctype));
+      writeI16(spec.tag);
+    }
+    writeTags_.top() = spec.tag;
+  }
+
+  std::stack<int> writeTags_;
+  std::stack<int> readTags_;
+  struct {
+    bool exists;
+    bool value;
+  } readBool_;
+  char* dummy_buf_;
+};
+}
+}
+}
+#endif // THRIFT_PY_COMPACT_H
diff --git a/lib/py/src/ext/endian.h b/lib/py/src/ext/endian.h
new file mode 100644
index 0000000..91372a7
--- /dev/null
+++ b/lib/py/src/ext/endian.h
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THRIFT_PY_ENDIAN_H
+#define THRIFT_PY_ENDIAN_H
+
+#include <Python.h>
+
+#ifndef _WIN32
+#include <netinet/in.h>
+#else
+#include <WinSock2.h>
+#pragma comment(lib, "ws2_32.lib")
+#define BIG_ENDIAN (4321)
+#define LITTLE_ENDIAN (1234)
+#define BYTE_ORDER LITTLE_ENDIAN
+#define inline __inline
+#endif
+
+/* Fix endianness issues on Solaris */
+#if defined(__SVR4) && defined(__sun)
+#if defined(__i386) && !defined(__i386__)
+#define __i386__
+#endif
+
+#ifndef BIG_ENDIAN
+#define BIG_ENDIAN (4321)
+#endif
+#ifndef LITTLE_ENDIAN
+#define LITTLE_ENDIAN (1234)
+#endif
+
+/* I386 is LE, even on Solaris */
+#if !defined(BYTE_ORDER) && defined(__i386__)
+#define BYTE_ORDER LITTLE_ENDIAN
+#endif
+#endif
+
+#ifndef __BYTE_ORDER
+#if defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN)
+#define __BYTE_ORDER BYTE_ORDER
+#define __LITTLE_ENDIAN LITTLE_ENDIAN
+#define __BIG_ENDIAN BIG_ENDIAN
+#else
+#error "Cannot determine endianness"
+#endif
+#endif
+
+// Same comment as the enum.  Sorry.
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define ntohll(n) (n)
+#define htonll(n) (n)
+#if defined(__GNUC__) && defined(__GLIBC__)
+#include <byteswap.h>
+#define letohll(n) bswap_64(n)
+#define htolell(n) bswap_64(n)
+#else /* GNUC & GLIBC */
+#define letohll(n) ((((unsigned long long)ntohl(n)) << 32) + ntohl(n >> 32))
+#define htolell(n) ((((unsigned long long)htonl(n)) << 32) + htonl(n >> 32))
+#endif
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
+#if defined(__GNUC__) && defined(__GLIBC__)
+#include <byteswap.h>
+#define ntohll(n) bswap_64(n)
+#define htonll(n) bswap_64(n)
+#else /* GNUC & GLIBC */
+#define ntohll(n) ((((unsigned long long)ntohl(n)) << 32) + ntohl(n >> 32))
+#define htonll(n) ((((unsigned long long)htonl(n)) << 32) + htonl(n >> 32))
+#endif /* GNUC & GLIBC */
+#define letohll(n) (n)
+#define htolell(n) (n)
+#else /* __BYTE_ORDER */
+#error "Can't define htonll or ntohll!"
+#endif
+
+#endif // THRIFT_PY_ENDIAN_H
diff --git a/lib/py/src/ext/module.cpp b/lib/py/src/ext/module.cpp
new file mode 100644
index 0000000..82e3fe7
--- /dev/null
+++ b/lib/py/src/ext/module.cpp
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <Python.h>
+#include "types.h"
+#include "binary.h"
+#include "compact.h"
+#include <stdint.h>
+#include <sys/resource.h>
+
+// TODO(dreiss): defval appears to be unused.  Look into removing it.
+// TODO(dreiss): Make parse_spec_args recursive, and cache the output
+//               permanently in the object.  (Malloc and orphan.)
+// TODO(dreiss): Why do we need cStringIO for reading, why not just char*?
+//               Can cStringIO let us work with a BufferedTransport?
+// TODO(dreiss): Don't ignore the rv from cwrite (maybe).
+
+// Doing a benchmark shows that interning actually makes a difference, amazingly.
+
+/** Pointer to interned string to speed up attribute lookup. */
+PyObject* INTERN_STRING(TFrozenDict);
+PyObject* INTERN_STRING(cstringio_buf);
+PyObject* INTERN_STRING(cstringio_refill);
+static PyObject* INTERN_STRING(string_length_limit);
+static PyObject* INTERN_STRING(container_length_limit);
+static PyObject* INTERN_STRING(trans);
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+template <typename T>
+static PyObject* encode_impl(PyObject* args) {
+  if (!args)
+    return NULL;
+
+  PyObject* enc_obj = NULL;
+  PyObject* type_args = NULL;
+  if (!PyArg_ParseTuple(args, "OO", &enc_obj, &type_args)) {
+    return NULL;
+  }
+  if (!enc_obj || !type_args) {
+    return NULL;
+  }
+
+  T protocol;
+  if (!protocol.prepareEncodeBuffer() || !protocol.encodeValue(enc_obj, T_STRUCT, type_args)) {
+    return NULL;
+  }
+
+  return protocol.getEncodedValue();
+}
+
+static inline long as_long_then_delete(PyObject* value, long default_value) {
+  ScopedPyObject scope(value);
+  long v = PyInt_AsLong(value);
+  if (INT_CONV_ERROR_OCCURRED(v)) {
+    PyErr_Clear();
+    return default_value;
+  }
+  return v;
+}
+
+template <typename T>
+static PyObject* decode_impl(PyObject* args) {
+  PyObject* output_obj = NULL;
+  PyObject* oprot = NULL;
+  PyObject* typeargs = NULL;
+  if (!PyArg_ParseTuple(args, "OOO", &output_obj, &oprot, &typeargs)) {
+    return NULL;
+  }
+
+  T protocol;
+  protocol.setStringLengthLimit(
+      as_long_then_delete(PyObject_GetAttr(oprot, INTERN_STRING(string_length_limit)), INT32_MAX));
+  protocol.setContainerLengthLimit(
+      as_long_then_delete(PyObject_GetAttr(oprot, INTERN_STRING(container_length_limit)),
+                          INT32_MAX));
+  ScopedPyObject transport(PyObject_GetAttr(oprot, INTERN_STRING(trans)));
+  if (!transport) {
+    return NULL;
+  }
+
+  StructTypeArgs parsedargs;
+  if (!parse_struct_args(&parsedargs, typeargs)) {
+    return NULL;
+  }
+
+  if (!protocol.prepareDecodeBufferFromTransport(transport.get())) {
+    return NULL;
+  }
+
+  return protocol.readStruct(output_obj, parsedargs.klass, parsedargs.spec);
+}
+}
+}
+}
+
+using namespace apache::thrift::py;
+
+/* -- PYTHON MODULE SETUP STUFF --- */
+
+extern "C" {
+
+static PyObject* encode_binary(PyObject*, PyObject* args) {
+  return encode_impl<BinaryProtocol>(args);
+}
+
+static PyObject* decode_binary(PyObject*, PyObject* args) {
+  return decode_impl<BinaryProtocol>(args);
+}
+
+static PyObject* encode_compact(PyObject*, PyObject* args) {
+  return encode_impl<CompactProtocol>(args);
+}
+
+static PyObject* decode_compact(PyObject*, PyObject* args) {
+  return decode_impl<CompactProtocol>(args);
+}
+
+static PyMethodDef ThriftFastBinaryMethods[] = {
+    {"encode_binary", encode_binary, METH_VARARGS, ""},
+    {"decode_binary", decode_binary, METH_VARARGS, ""},
+    {"encode_compact", encode_compact, METH_VARARGS, ""},
+    {"decode_compact", decode_compact, METH_VARARGS, ""},
+    {NULL, NULL, 0, NULL} /* Sentinel */
+};
+
+#define INITERROR return;
+
+void initfastbinary() {
+
+  PycString_IMPORT;
+  if (PycStringIO == NULL)
+    INITERROR
+
+  const rlim_t kStackSize = 16 * 1024 * 1024; // min stack size = 16 MB
+  struct rlimit rl;
+  int result;
+
+  result = getrlimit(RLIMIT_STACK, &rl);
+  if (result == 0) {
+    if (rl.rlim_cur < kStackSize) {
+      rl.rlim_cur = kStackSize;
+      result = setrlimit(RLIMIT_STACK, &rl);
+      if (result != 0) {
+        fprintf(stderr, "setrlimit returned result = %d\n", result);
+      }
+    }
+  }
+
+#define INIT_INTERN_STRING(value)                                                                  \
+  do {                                                                                             \
+    INTERN_STRING(value) = PyString_InternFromString(#value);                                      \
+    if (!INTERN_STRING(value))                                                                     \
+      INITERROR                                                                                    \
+  } while (0)
+
+  INIT_INTERN_STRING(TFrozenDict);
+  INIT_INTERN_STRING(cstringio_buf);
+  INIT_INTERN_STRING(cstringio_refill);
+  INIT_INTERN_STRING(string_length_limit);
+  INIT_INTERN_STRING(container_length_limit);
+  INIT_INTERN_STRING(trans);
+#undef INIT_INTERN_STRING
+
+  PyObject* module =
+      Py_InitModule("thrift.protocol.fastbinary", ThriftFastBinaryMethods);
+  if (module == NULL)
+    INITERROR;
+
+}
+}
diff --git a/lib/py/src/ext/protocol.h b/lib/py/src/ext/protocol.h
new file mode 100644
index 0000000..bf3a6b8
--- /dev/null
+++ b/lib/py/src/ext/protocol.h
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THRIFT_PY_PROTOCOL_H
+#define THRIFT_PY_PROTOCOL_H
+
+#include "ext/types.h"
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+template <typename Impl>
+class ProtocolBase {
+
+public:
+  ProtocolBase() : stringLimit_(INT32_MAX), containerLimit_(INT32_MAX), output_(NULL) {}
+  inline virtual ~ProtocolBase();
+
+  bool prepareDecodeBufferFromTransport(PyObject* trans);
+
+  PyObject* readStruct(PyObject* output, PyObject* klass, PyObject* spec_seq);
+
+
+  bool prepareEncodeBuffer();
+
+  bool encodeValue(PyObject* value, TType type, PyObject* typeargs);
+
+  PyObject* getEncodedValue();
+
+  long stringLimit() const { return stringLimit_; }
+  void setStringLengthLimit(long limit) { stringLimit_ = limit; }
+
+  long containerLimit() const { return containerLimit_; }
+  void setContainerLengthLimit(long limit) { containerLimit_ = limit; }
+
+protected:
+  bool readBytes(char** output, int len);
+
+  bool readByte(uint8_t& val) {
+    char* buf;
+    if (!readBytes(&buf, 1)) {
+      return false;
+    }
+    val = static_cast<uint8_t>(buf[0]);
+    return true;
+  }
+
+  bool writeBuffer(char* data, size_t len);
+
+  void writeByte(uint8_t val) { writeBuffer(reinterpret_cast<char*>(&val), 1); }
+
+  PyObject* decodeValue(TType type, PyObject* typeargs);
+
+  bool skip(TType type);
+
+  inline bool checkType(TType got, TType expected);
+  inline bool checkLengthLimit(int32_t len, long limit);
+
+  inline bool isUtf8(PyObject* typeargs);
+
+private:
+  Impl* impl() { return static_cast<Impl*>(this); }
+
+  long stringLimit_;
+  long containerLimit_;
+  EncodeBuffer* output_;
+  DecodeBuffer input_;
+};
+}
+}
+}
+
+#include "ext/protocol.tcc"
+
+#endif // THRIFT_PY_PROTOCOL_H
diff --git a/lib/py/src/ext/protocol.tcc b/lib/py/src/ext/protocol.tcc
new file mode 100644
index 0000000..3df83a1
--- /dev/null
+++ b/lib/py/src/ext/protocol.tcc
@@ -0,0 +1,820 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THRIFT_PY_PROTOCOL_TCC
+#define THRIFT_PY_PROTOCOL_TCC
+
+#define CHECK_RANGE(v, min, max) (((v) <= (max)) && ((v) >= (min)))
+#define INIT_OUTBUF_SIZE 128
+
+#include <cStringIO.h>
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+namespace detail {
+
+inline bool input_check(PyObject* input) {
+  return PycStringIO_InputCheck(input);
+}
+
+inline EncodeBuffer* new_encode_buffer(size_t size) {
+  if (!PycStringIO) {
+    PycString_IMPORT;
+  }
+  if (!PycStringIO) {
+    return NULL;
+  }
+  return PycStringIO->NewOutput(size);
+}
+
+inline int read_buffer(PyObject* buf, char** output, int len) {
+  if (!PycStringIO) {
+    PycString_IMPORT;
+  }
+  if (!PycStringIO) {
+    PyErr_SetString(PyExc_ImportError, "failed to import native cStringIO");
+    return -1;
+  }
+  return PycStringIO->cread(buf, output, len);
+}
+}
+
+template <typename Impl>
+inline ProtocolBase<Impl>::~ProtocolBase() {
+  if (output_) {
+    Py_CLEAR(output_);
+  }
+}
+
+template <typename Impl>
+inline bool ProtocolBase<Impl>::isUtf8(PyObject* typeargs) {
+  return PyString_Check(typeargs) && !strncmp(PyString_AS_STRING(typeargs), "UTF8", 4);
+}
+
+template <typename Impl>
+PyObject* ProtocolBase<Impl>::getEncodedValue() {
+  if (!PycStringIO) {
+    PycString_IMPORT;
+  }
+  if (!PycStringIO) {
+    return NULL;
+  }
+  return PycStringIO->cgetvalue(output_);
+}
+
+template <typename Impl>
+inline bool ProtocolBase<Impl>::writeBuffer(char* data, size_t size) {
+  if (!PycStringIO) {
+    PycString_IMPORT;
+  }
+  if (!PycStringIO) {
+    PyErr_SetString(PyExc_ImportError, "failed to import native cStringIO");
+    return false;
+  }
+  int len = PycStringIO->cwrite(output_, data, size);
+  if (len < 0) {
+    PyErr_SetString(PyExc_IOError, "failed to write to cStringIO object");
+    return false;
+  }
+  if (len != size) {
+    PyErr_Format(PyExc_EOFError, "write length mismatch: expected %d got %d", size, len);
+    return false;
+  }
+  return true;
+}
+
+namespace detail {
+
+#define DECLARE_OP_SCOPE(name, op)                                                                 \
+  template <typename Impl>                                                                         \
+  struct name##Scope {                                                                             \
+    Impl* impl;                                                                                    \
+    bool valid;                                                                                    \
+    name##Scope(Impl* thiz) : impl(thiz), valid(impl->op##Begin()) {}                              \
+    ~name##Scope() {                                                                               \
+      if (valid)                                                                                   \
+        impl->op##End();                                                                           \
+    }                                                                                              \
+    operator bool() { return valid; }                                                              \
+  };                                                                                               \
+  template <typename Impl, template <typename> class T>                                            \
+  name##Scope<Impl> op##Scope(T<Impl>* thiz) {                                                     \
+    return name##Scope<Impl>(static_cast<Impl*>(thiz));                                            \
+  }
+DECLARE_OP_SCOPE(WriteStruct, writeStruct)
+DECLARE_OP_SCOPE(ReadStruct, readStruct)
+#undef DECLARE_OP_SCOPE
+
+inline bool check_ssize_t_32(Py_ssize_t len) {
+  // error from getting the int
+  if (INT_CONV_ERROR_OCCURRED(len)) {
+    return false;
+  }
+  if (!CHECK_RANGE(len, 0, INT32_MAX)) {
+    PyErr_SetString(PyExc_OverflowError, "size out of range: exceeded INT32_MAX");
+    return false;
+  }
+  return true;
+}
+}
+
+template <typename T>
+bool parse_pyint(PyObject* o, T* ret, int32_t min, int32_t max) {
+  long val = PyInt_AsLong(o);
+
+  if (INT_CONV_ERROR_OCCURRED(val)) {
+    return false;
+  }
+  if (!CHECK_RANGE(val, min, max)) {
+    PyErr_SetString(PyExc_OverflowError, "int out of range");
+    return false;
+  }
+
+  *ret = static_cast<T>(val);
+  return true;
+}
+
+template <typename Impl>
+inline bool ProtocolBase<Impl>::checkType(TType got, TType expected) {
+  if (expected != got) {
+    PyErr_SetString(PyExc_TypeError, "got wrong ttype while reading field");
+    return false;
+  }
+  return true;
+}
+
+template <typename Impl>
+bool ProtocolBase<Impl>::checkLengthLimit(int32_t len, long limit) {
+  if (len < 0) {
+    PyErr_Format(PyExc_OverflowError, "negative length: %d", limit);
+    return false;
+  }
+  if (len > limit) {
+    PyErr_Format(PyExc_OverflowError, "size exceeded specified limit: %d", limit);
+    return false;
+  }
+  return true;
+}
+
+template <typename Impl>
+bool ProtocolBase<Impl>::readBytes(char** output, int len) {
+  if (len < 0) {
+    PyErr_Format(PyExc_ValueError, "attempted to read negative length: %d", len);
+    return false;
+  }
+  // TODO(dreiss): Don't fear the malloc.  Think about taking a copy of
+  //               the partial read instead of forcing the transport
+  //               to prepend it to its buffer.
+
+  int rlen = detail::read_buffer(input_.stringiobuf.get(), output, len);
+
+  if (rlen == len) {
+    return true;
+  } else if (rlen == -1) {
+    return false;
+  } else {
+    // using building functions as this is a rare codepath
+    ScopedPyObject newiobuf(
+        PyObject_CallFunction(input_.refill_callable.get(), refill_signature, *output, rlen, len, NULL));
+    if (!newiobuf) {
+      return false;
+    }
+
+    // must do this *AFTER* the call so that we don't deref the io buffer
+    input_.stringiobuf.reset(newiobuf.release());
+
+    rlen = detail::read_buffer(input_.stringiobuf.get(), output, len);
+
+    if (rlen == len) {
+      return true;
+    } else if (rlen == -1) {
+      return false;
+    } else {
+      // TODO(dreiss): This could be a valid code path for big binary blobs.
+      PyErr_SetString(PyExc_TypeError, "refill claimed to have refilled the buffer, but didn't!!");
+      return false;
+    }
+  }
+}
+
+template <typename Impl>
+bool ProtocolBase<Impl>::prepareDecodeBufferFromTransport(PyObject* trans) {
+  if (input_.stringiobuf) {
+    PyErr_SetString(PyExc_ValueError, "decode buffer is already initialized");
+    return false;
+  }
+
+  ScopedPyObject stringiobuf(PyObject_GetAttr(trans, INTERN_STRING(cstringio_buf)));
+  if (!stringiobuf) {
+    return false;
+  }
+  if (!detail::input_check(stringiobuf.get())) {
+    PyErr_SetString(PyExc_TypeError, "expecting stringio input_");
+    return false;
+  }
+
+  ScopedPyObject refill_callable(PyObject_GetAttr(trans, INTERN_STRING(cstringio_refill)));
+  if (!refill_callable) {
+    return false;
+  }
+  if (!PyCallable_Check(refill_callable.get())) {
+    PyErr_SetString(PyExc_TypeError, "expecting callable");
+    return false;
+  }
+
+  input_.stringiobuf.swap(stringiobuf);
+  input_.refill_callable.swap(refill_callable);
+  return true;
+}
+
+template <typename Impl>
+bool ProtocolBase<Impl>::prepareEncodeBuffer() {
+  output_ = detail::new_encode_buffer(INIT_OUTBUF_SIZE);
+  return output_ != NULL;
+}
+
+template <typename Impl>
+bool ProtocolBase<Impl>::encodeValue(PyObject* value, TType type, PyObject* typeargs) {
+  /*
+   * Refcounting Strategy:
+   *
+   * We assume that elements of the thrift_spec tuple are not going to be
+   * mutated, so we don't ref count those at all. Other than that, we try to
+   * keep a reference to all the user-created objects while we work with them.
+   * encodeValue assumes that a reference is already held. The *caller* is
+   * responsible for handling references
+   */
+
+  switch (type) {
+
+  case T_BOOL: {
+    int v = PyObject_IsTrue(value);
+    if (v == -1) {
+      return false;
+    }
+    impl()->writeBool(v);
+    return true;
+  }
+  case T_I08: {
+    int8_t val;
+
+    if (!parse_pyint(value, &val, INT8_MIN, INT8_MAX)) {
+      return false;
+    }
+
+    impl()->writeI8(val);
+    return true;
+  }
+  case T_I16: {
+    int16_t val;
+
+    if (!parse_pyint(value, &val, INT16_MIN, INT16_MAX)) {
+      return false;
+    }
+
+    impl()->writeI16(val);
+    return true;
+  }
+  case T_I32: {
+    int32_t val;
+
+    if (!parse_pyint(value, &val, INT32_MIN, INT32_MAX)) {
+      return false;
+    }
+
+    impl()->writeI32(val);
+    return true;
+  }
+  case T_I64: {
+    int64_t nval = PyLong_AsLongLong(value);
+
+    if (INT_CONV_ERROR_OCCURRED(nval)) {
+      return false;
+    }
+
+    if (!CHECK_RANGE(nval, INT64_MIN, INT64_MAX)) {
+      PyErr_SetString(PyExc_OverflowError, "int out of range");
+      return false;
+    }
+
+    impl()->writeI64(nval);
+    return true;
+  }
+
+  case T_DOUBLE: {
+    double nval = PyFloat_AsDouble(value);
+    if (nval == -1.0 && PyErr_Occurred()) {
+      return false;
+    }
+
+    impl()->writeDouble(nval);
+    return true;
+  }
+
+  case T_STRING: {
+    if (PyUnicode_Check(value)) {
+      value = PyUnicode_AsUTF8String(value);
+      if (!value) {
+        return false;
+      }
+    }
+
+    Py_ssize_t len = PyBytes_Size(value);
+    if (!detail::check_ssize_t_32(len)) {
+      return false;
+    }
+
+    impl()->writeString(value, static_cast<int32_t>(len));
+    return true;
+  }
+
+  case T_LIST:
+  case T_SET: {
+    SetListTypeArgs parsedargs;
+    if (!parse_set_list_args(&parsedargs, typeargs)) {
+      return false;
+    }
+
+    Py_ssize_t len = PyObject_Length(value);
+    if (!detail::check_ssize_t_32(len)) {
+      return false;
+    }
+
+    if (!impl()->writeListBegin(value, parsedargs, static_cast<int32_t>(len)) || PyErr_Occurred()) {
+      return false;
+    }
+    ScopedPyObject iterator(PyObject_GetIter(value));
+    if (!iterator) {
+      return false;
+    }
+
+    while (PyObject* rawItem = PyIter_Next(iterator.get())) {
+      ScopedPyObject item(rawItem);
+      if (!encodeValue(item.get(), parsedargs.element_type, parsedargs.typeargs)) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  case T_MAP: {
+    Py_ssize_t len = PyDict_Size(value);
+    if (!detail::check_ssize_t_32(len)) {
+      return false;
+    }
+
+    MapTypeArgs parsedargs;
+    if (!parse_map_args(&parsedargs, typeargs)) {
+      return false;
+    }
+
+    if (!impl()->writeMapBegin(value, parsedargs, static_cast<int32_t>(len)) || PyErr_Occurred()) {
+      return false;
+    }
+    Py_ssize_t pos = 0;
+    PyObject* k = NULL;
+    PyObject* v = NULL;
+    // TODO(bmaurer): should support any mapping, not just dicts
+    while (PyDict_Next(value, &pos, &k, &v)) {
+      if (!encodeValue(k, parsedargs.ktag, parsedargs.ktypeargs)
+          || !encodeValue(v, parsedargs.vtag, parsedargs.vtypeargs)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  case T_STRUCT: {
+    StructTypeArgs parsedargs;
+    if (!parse_struct_args(&parsedargs, typeargs)) {
+      return false;
+    }
+
+    Py_ssize_t nspec = PyTuple_Size(parsedargs.spec);
+    if (nspec == -1) {
+      PyErr_SetString(PyExc_TypeError, "spec is not a tuple");
+      return false;
+    }
+
+    detail::WriteStructScope<Impl> scope = detail::writeStructScope(this);
+    if (!scope) {
+      return false;
+    }
+    for (Py_ssize_t i = 0; i < nspec; i++) {
+      PyObject* spec_tuple = PyTuple_GET_ITEM(parsedargs.spec, i);
+      if (spec_tuple == Py_None) {
+        continue;
+      }
+
+      StructItemSpec parsedspec;
+      if (!parse_struct_item_spec(&parsedspec, spec_tuple)) {
+        return false;
+      }
+
+      ScopedPyObject instval(PyObject_GetAttr(value, parsedspec.attrname));
+
+      if (!instval) {
+        return false;
+      }
+
+      if (instval.get() == Py_None) {
+        continue;
+      }
+
+      bool res = impl()->writeField(instval.get(), parsedspec);
+      if (!res) {
+        return false;
+      }
+    }
+    impl()->writeFieldStop();
+    return true;
+  }
+
+  case T_STOP:
+  case T_VOID:
+  case T_UTF16:
+  case T_UTF8:
+  case T_U64:
+  default:
+    PyErr_Format(PyExc_TypeError, "Unexpected TType for encodeValue: %d", type);
+    return false;
+  }
+
+  return true;
+}
+
+template <typename Impl>
+bool ProtocolBase<Impl>::skip(TType type) {
+  switch (type) {
+  case T_BOOL:
+    return impl()->skipBool();
+  case T_I08:
+    return impl()->skipByte();
+  case T_I16:
+    return impl()->skipI16();
+  case T_I32:
+    return impl()->skipI32();
+  case T_I64:
+    return impl()->skipI64();
+  case T_DOUBLE:
+    return impl()->skipDouble();
+
+  case T_STRING: {
+    return impl()->skipString();
+  }
+
+  case T_LIST:
+  case T_SET: {
+    TType etype = T_STOP;
+    int32_t len = impl()->readListBegin(etype);
+    if (len < 0) {
+      return false;
+    }
+    for (int32_t i = 0; i < len; i++) {
+      if (!skip(etype)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  case T_MAP: {
+    TType ktype = T_STOP;
+    TType vtype = T_STOP;
+    int32_t len = impl()->readMapBegin(ktype, vtype);
+    if (len < 0) {
+      return false;
+    }
+    for (int32_t i = 0; i < len; i++) {
+      if (!skip(ktype) || !skip(vtype)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  case T_STRUCT: {
+    detail::ReadStructScope<Impl> scope = detail::readStructScope(this);
+    if (!scope) {
+      return false;
+    }
+    while (true) {
+      TType type = T_STOP;
+      int16_t tag;
+      if (!impl()->readFieldBegin(type, tag)) {
+        return false;
+      }
+      if (type == T_STOP) {
+        return true;
+      }
+      if (!skip(type)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  case T_STOP:
+  case T_VOID:
+  case T_UTF16:
+  case T_UTF8:
+  case T_U64:
+  default:
+    PyErr_Format(PyExc_TypeError, "Unexpected TType for skip: %d", type);
+    return false;
+  }
+
+  return true;
+}
+
+// Returns a new reference.
+template <typename Impl>
+PyObject* ProtocolBase<Impl>::decodeValue(TType type, PyObject* typeargs) {
+  switch (type) {
+
+  case T_BOOL: {
+    bool v = 0;
+    if (!impl()->readBool(v)) {
+      return NULL;
+    }
+    if (v) {
+      Py_RETURN_TRUE;
+    } else {
+      Py_RETURN_FALSE;
+    }
+  }
+  case T_I08: {
+    int8_t v = 0;
+    if (!impl()->readI8(v)) {
+      return NULL;
+    }
+    return PyInt_FromLong(v);
+  }
+  case T_I16: {
+    int16_t v = 0;
+    if (!impl()->readI16(v)) {
+      return NULL;
+    }
+    return PyInt_FromLong(v);
+  }
+  case T_I32: {
+    int32_t v = 0;
+    if (!impl()->readI32(v)) {
+      return NULL;
+    }
+    return PyInt_FromLong(v);
+  }
+
+  case T_I64: {
+    int64_t v = 0;
+    if (!impl()->readI64(v)) {
+      return NULL;
+    }
+    // TODO(dreiss): Find out if we can take this fastpath always when
+    //               sizeof(long) == sizeof(long long).
+    if (CHECK_RANGE(v, LONG_MIN, LONG_MAX)) {
+      return PyInt_FromLong((long)v);
+    }
+    return PyLong_FromLongLong(v);
+  }
+
+  case T_DOUBLE: {
+    double v = 0.0;
+    if (!impl()->readDouble(v)) {
+      return NULL;
+    }
+    return PyFloat_FromDouble(v);
+  }
+
+  case T_STRING: {
+    char* buf = NULL;
+    int len = impl()->readString(&buf);
+    if (len < 0) {
+      return NULL;
+    }
+    if (isUtf8(typeargs)) {
+      return PyUnicode_DecodeUTF8(buf, len, 0);
+    } else {
+      return PyBytes_FromStringAndSize(buf, len);
+    }
+  }
+
+  case T_LIST:
+  case T_SET: {
+    SetListTypeArgs parsedargs;
+    if (!parse_set_list_args(&parsedargs, typeargs)) {
+      return NULL;
+    }
+
+    TType etype = T_STOP;
+    int32_t len = impl()->readListBegin(etype);
+    if (len < 0) {
+      return NULL;
+    }
+    if (len > 0 && !checkType(etype, parsedargs.element_type)) {
+      return NULL;
+    }
+
+    bool use_tuple = type == T_LIST && parsedargs.immutable;
+    ScopedPyObject ret(use_tuple ? PyTuple_New(len) : PyList_New(len));
+    if (!ret) {
+      return NULL;
+    }
+
+    for (int i = 0; i < len; i++) {
+      PyObject* item = decodeValue(etype, parsedargs.typeargs);
+      if (!item) {
+        return NULL;
+      }
+      if (use_tuple) {
+        PyTuple_SET_ITEM(ret.get(), i, item);
+      } else {
+        PyList_SET_ITEM(ret.get(), i, item);
+      }
+    }
+
+    // TODO(dreiss): Consider biting the bullet and making two separate cases
+    //               for list and set, avoiding this post facto conversion.
+    if (type == T_SET) {
+      PyObject* setret;
+      setret = parsedargs.immutable ? PyFrozenSet_New(ret.get()) : PySet_New(ret.get());
+      return setret;
+    }
+    return ret.release();
+  }
+
+  case T_MAP: {
+    MapTypeArgs parsedargs;
+    if (!parse_map_args(&parsedargs, typeargs)) {
+      return NULL;
+    }
+
+    TType ktype = T_STOP;
+    TType vtype = T_STOP;
+    uint32_t len = impl()->readMapBegin(ktype, vtype);
+    if (len > 0 && (!checkType(ktype, parsedargs.ktag) || !checkType(vtype, parsedargs.vtag))) {
+      return NULL;
+    }
+
+    ScopedPyObject ret(PyDict_New());
+    if (!ret) {
+      return NULL;
+    }
+
+    for (uint32_t i = 0; i < len; i++) {
+      ScopedPyObject k(decodeValue(ktype, parsedargs.ktypeargs));
+      if (!k) {
+        return NULL;
+      }
+      ScopedPyObject v(decodeValue(vtype, parsedargs.vtypeargs));
+      if (!v) {
+        return NULL;
+      }
+      if (PyDict_SetItem(ret.get(), k.get(), v.get()) == -1) {
+        return NULL;
+      }
+    }
+
+    if (parsedargs.immutable) {
+      if (!ThriftModule) {
+        ThriftModule = PyImport_ImportModule("thrift.Thrift");
+      }
+      if (!ThriftModule) {
+        return NULL;
+      }
+
+      ScopedPyObject cls(PyObject_GetAttr(ThriftModule, INTERN_STRING(TFrozenDict)));
+      if (!cls) {
+        return NULL;
+      }
+
+      ScopedPyObject arg(PyTuple_New(1));
+      PyTuple_SET_ITEM(arg.get(), 0, ret.release());
+      ret.reset(PyObject_CallObject(cls.get(), arg.get()));
+    }
+
+    return ret.release();
+  }
+
+  case T_STRUCT: {
+    StructTypeArgs parsedargs;
+    if (!parse_struct_args(&parsedargs, typeargs)) {
+      return NULL;
+    }
+    return readStruct(Py_None, parsedargs.klass, parsedargs.spec);
+  }
+
+  case T_STOP:
+  case T_VOID:
+  case T_UTF16:
+  case T_UTF8:
+  case T_U64:
+  default:
+    PyErr_Format(PyExc_TypeError, "Unexpected TType for decodeValue: %d", type);
+    return NULL;
+  }
+}
+
+template <typename Impl>
+PyObject* ProtocolBase<Impl>::readStruct(PyObject* output, PyObject* klass, PyObject* spec_seq) {
+  int spec_seq_len = PyTuple_Size(spec_seq);
+  bool immutable = output == Py_None;
+  ScopedPyObject kwargs;
+  if (spec_seq_len == -1) {
+    return NULL;
+  }
+
+  if (immutable) {
+    kwargs.reset(PyDict_New());
+    if (!kwargs) {
+      PyErr_SetString(PyExc_TypeError, "failed to prepare kwargument storage");
+      return NULL;
+    }
+  }
+
+  detail::ReadStructScope<Impl> scope = detail::readStructScope(this);
+  if (!scope) {
+    return NULL;
+  }
+  while (true) {
+    TType type = T_STOP;
+    int16_t tag;
+    if (!impl()->readFieldBegin(type, tag)) {
+      return NULL;
+    }
+    if (type == T_STOP) {
+      break;
+    }
+    if (tag < 0 || tag >= spec_seq_len) {
+      if (!skip(type)) {
+        PyErr_SetString(PyExc_TypeError, "Error while skipping unknown field");
+        return NULL;
+      }
+      continue;
+    }
+
+    PyObject* item_spec = PyTuple_GET_ITEM(spec_seq, tag);
+    if (item_spec == Py_None) {
+      if (!skip(type)) {
+        PyErr_SetString(PyExc_TypeError, "Error while skipping unknown field");
+        return NULL;
+      }
+      continue;
+    }
+    StructItemSpec parsedspec;
+    if (!parse_struct_item_spec(&parsedspec, item_spec)) {
+      return NULL;
+    }
+    if (parsedspec.type != type) {
+      if (!skip(type)) {
+        PyErr_Format(PyExc_TypeError, "struct field had wrong type: expected %d but got %d",
+                     parsedspec.type, type);
+        return NULL;
+      }
+      continue;
+    }
+
+    ScopedPyObject fieldval(decodeValue(parsedspec.type, parsedspec.typeargs));
+    if (!fieldval) {
+      return NULL;
+    }
+
+    if ((immutable && PyDict_SetItem(kwargs.get(), parsedspec.attrname, fieldval.get()) == -1)
+        || (!immutable && PyObject_SetAttr(output, parsedspec.attrname, fieldval.get()) == -1)) {
+      return NULL;
+    }
+  }
+  if (immutable) {
+    ScopedPyObject args(PyTuple_New(0));
+    if (!args) {
+      PyErr_SetString(PyExc_TypeError, "failed to prepare argument storage");
+      return NULL;
+    }
+    return PyObject_Call(klass, args.get(), kwargs.get());
+  }
+  Py_INCREF(output);
+  return output;
+}
+}
+}
+}
+#endif // THRIFT_PY_PROTOCOL_H
diff --git a/lib/py/src/ext/types.cpp b/lib/py/src/ext/types.cpp
new file mode 100644
index 0000000..f3a29a2
--- /dev/null
+++ b/lib/py/src/ext/types.cpp
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "ext/types.h"
+#include "ext/protocol.h"
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+PyObject* ThriftModule = NULL;
+
+char refill_signature[] = {'s', '#', 'i'};
+
+bool parse_struct_item_spec(StructItemSpec* dest, PyObject* spec_tuple) {
+  // i'd like to use ParseArgs here, but it seems to be a bottleneck.
+  if (PyTuple_Size(spec_tuple) != 5) {
+    PyErr_Format(PyExc_TypeError, "expecting 5 arguments for spec tuple but got %d",
+                 static_cast<int>(PyTuple_Size(spec_tuple)));
+    return false;
+  }
+
+  dest->tag = static_cast<TType>(PyInt_AsLong(PyTuple_GET_ITEM(spec_tuple, 0)));
+  if (INT_CONV_ERROR_OCCURRED(dest->tag)) {
+    return false;
+  }
+
+  dest->type = static_cast<TType>(PyInt_AsLong(PyTuple_GET_ITEM(spec_tuple, 1)));
+  if (INT_CONV_ERROR_OCCURRED(dest->type)) {
+    return false;
+  }
+
+  dest->attrname = PyTuple_GET_ITEM(spec_tuple, 2);
+  dest->typeargs = PyTuple_GET_ITEM(spec_tuple, 3);
+  dest->defval = PyTuple_GET_ITEM(spec_tuple, 4);
+  return true;
+}
+
+bool parse_set_list_args(SetListTypeArgs* dest, PyObject* typeargs) {
+  if (PyTuple_Size(typeargs) != 3) {
+    PyErr_SetString(PyExc_TypeError, "expecting tuple of size 3 for list/set type args");
+    return false;
+  }
+
+  dest->element_type = static_cast<TType>(PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 0)));
+  if (INT_CONV_ERROR_OCCURRED(dest->element_type)) {
+    return false;
+  }
+
+  dest->typeargs = PyTuple_GET_ITEM(typeargs, 1);
+
+  dest->immutable = Py_True == PyTuple_GET_ITEM(typeargs, 2);
+
+  return true;
+}
+
+bool parse_map_args(MapTypeArgs* dest, PyObject* typeargs) {
+  if (PyTuple_Size(typeargs) != 5) {
+    PyErr_SetString(PyExc_TypeError, "expecting 5 arguments for typeargs to map");
+    return false;
+  }
+
+  dest->ktag = static_cast<TType>(PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 0)));
+  if (INT_CONV_ERROR_OCCURRED(dest->ktag)) {
+    return false;
+  }
+
+  dest->vtag = static_cast<TType>(PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 2)));
+  if (INT_CONV_ERROR_OCCURRED(dest->vtag)) {
+    return false;
+  }
+
+  dest->ktypeargs = PyTuple_GET_ITEM(typeargs, 1);
+  dest->vtypeargs = PyTuple_GET_ITEM(typeargs, 3);
+  dest->immutable = Py_True == PyTuple_GET_ITEM(typeargs, 4);
+
+  return true;
+}
+
+bool parse_struct_args(StructTypeArgs* dest, PyObject* typeargs) {
+  if (PyTuple_Size(typeargs) != 2) {
+    PyErr_SetString(PyExc_TypeError, "expecting tuple of size 2 for struct args");
+    return false;
+  }
+
+  dest->klass = PyTuple_GET_ITEM(typeargs, 0);
+  dest->spec = PyTuple_GET_ITEM(typeargs, 1);
+
+  return true;
+}
+}
+}
+}
diff --git a/lib/py/src/ext/types.h b/lib/py/src/ext/types.h
new file mode 100644
index 0000000..749bb68
--- /dev/null
+++ b/lib/py/src/ext/types.h
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THRIFT_PY_TYPES_H
+#define THRIFT_PY_TYPES_H
+
+#include <Python.h>
+
+#define INTERN_STRING(value) _intern_##value
+
+#define INT_CONV_ERROR_OCCURRED(v) (((v) == -1) && PyErr_Occurred())
+
+extern "C" {
+extern PyObject* INTERN_STRING(TFrozenDict);
+extern PyObject* INTERN_STRING(cstringio_buf);
+extern PyObject* INTERN_STRING(cstringio_refill);
+}
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+extern PyObject* ThriftModule;
+
+// Stolen out of TProtocol.h.
+// It would be a huge pain to have both get this from one place.
+enum TType {
+  T_STOP = 0,
+  T_VOID = 1,
+  T_BOOL = 2,
+  T_BYTE = 3,
+  T_I08 = 3,
+  T_I16 = 6,
+  T_I32 = 8,
+  T_U64 = 9,
+  T_I64 = 10,
+  T_DOUBLE = 4,
+  T_STRING = 11,
+  T_UTF7 = 11,
+  T_STRUCT = 12,
+  T_MAP = 13,
+  T_SET = 14,
+  T_LIST = 15,
+  T_UTF8 = 16,
+  T_UTF16 = 17
+};
+
+// replace with unique_ptr when we're OK with C++11
+class ScopedPyObject {
+public:
+  ScopedPyObject() : obj_(NULL) {}
+  explicit ScopedPyObject(PyObject* py_object) : obj_(py_object) {}
+  ~ScopedPyObject() {
+    if (obj_)
+      Py_DECREF(obj_);
+  }
+  PyObject* get() throw() { return obj_; }
+  operator bool() { return obj_; }
+  void reset(PyObject* py_object) throw() {
+    if (obj_)
+      Py_DECREF(obj_);
+    obj_ = py_object;
+  }
+  PyObject* release() throw() {
+    PyObject* tmp = obj_;
+    obj_ = NULL;
+    return tmp;
+  }
+  void swap(ScopedPyObject& other) throw() {
+    ScopedPyObject tmp(other.release());
+    other.reset(release());
+    reset(tmp.release());
+  }
+
+private:
+  ScopedPyObject(const ScopedPyObject&) {}
+  ScopedPyObject& operator=(const ScopedPyObject&) { return *this; }
+
+  PyObject* obj_;
+};
+
+/**
+ * A cache of the two key attributes of a CReadableTransport,
+ * so we don't have to keep calling PyObject_GetAttr.
+ */
+struct DecodeBuffer {
+  ScopedPyObject stringiobuf;
+  ScopedPyObject refill_callable;
+};
+
+extern char refill_signature[3];
+typedef PyObject EncodeBuffer;
+
+/**
+ * A cache of the spec_args for a set or list,
+ * so we don't have to keep calling PyTuple_GET_ITEM.
+ */
+struct SetListTypeArgs {
+  TType element_type;
+  PyObject* typeargs;
+  bool immutable;
+};
+
+/**
+ * A cache of the spec_args for a map,
+ * so we don't have to keep calling PyTuple_GET_ITEM.
+ */
+struct MapTypeArgs {
+  TType ktag;
+  TType vtag;
+  PyObject* ktypeargs;
+  PyObject* vtypeargs;
+  bool immutable;
+};
+
+/**
+ * A cache of the spec_args for a struct,
+ * so we don't have to keep calling PyTuple_GET_ITEM.
+ */
+struct StructTypeArgs {
+  PyObject* klass;
+  PyObject* spec;
+  bool immutable;
+};
+
+/**
+ * A cache of the item spec from a struct specification,
+ * so we don't have to keep calling PyTuple_GET_ITEM.
+ */
+struct StructItemSpec {
+  int tag;
+  TType type;
+  PyObject* attrname;
+  PyObject* typeargs;
+  PyObject* defval;
+};
+
+bool parse_set_list_args(SetListTypeArgs* dest, PyObject* typeargs);
+
+bool parse_map_args(MapTypeArgs* dest, PyObject* typeargs);
+
+bool parse_struct_args(StructTypeArgs* dest, PyObject* typeargs);
+
+bool parse_struct_item_spec(StructItemSpec* dest, PyObject* spec_tuple);
+}
+}
+}
+
+#endif // THRIFT_PY_TYPES_H
diff --git a/lib/py/src/protocol/TBase.py b/lib/py/src/protocol/TBase.py
index 87caf0d..55da19e 100644
--- a/lib/py/src/protocol/TBase.py
+++ b/lib/py/src/protocol/TBase.py
@@ -17,14 +17,8 @@
 # under the License.
 #
 
-from thrift.protocol import TBinaryProtocol
 from thrift.transport import TTransport
 
-try:
-    from thrift.protocol import fastbinary
-except:
-    fastbinary = None
-
 
 class TBase(object):
     __slots__ = ()
@@ -47,27 +41,19 @@
         return not (self == other)
 
     def read(self, iprot):
-        if (iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and
+        if (iprot._fast_decode is not None and
                 isinstance(iprot.trans, TTransport.CReadableTransport) and
-                self.thrift_spec is not None and
-                fastbinary is not None):
-            fastbinary.decode_binary(self,
-                                     iprot.trans,
-                                     (self.__class__, self.thrift_spec),
-                                     iprot.string_length_limit,
-                                     iprot.container_length_limit)
-            return
-        iprot.readStruct(self, self.thrift_spec)
+                self.thrift_spec is not None):
+            iprot._fast_decode(self, iprot, (self.__class__, self.thrift_spec))
+        else:
+            iprot.readStruct(self, self.thrift_spec)
 
     def write(self, oprot):
-        if (oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and
-                self.thrift_spec is not None and
-                fastbinary is not None):
+        if (oprot._fast_encode is not None and self.thrift_spec is not None):
             oprot.trans.write(
-                fastbinary.encode_binary(
-                    self, (self.__class__, self.thrift_spec)))
-            return
-        oprot.writeStruct(self, self.thrift_spec)
+                oprot._fast_encode(self, (self.__class__, self.thrift_spec)))
+        else:
+            oprot.writeStruct(self, self.thrift_spec)
 
 
 class TExceptionBase(TBase, Exception):
@@ -86,14 +72,11 @@
 
     @classmethod
     def read(cls, iprot):
-        if (iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and
+        if (iprot._fast_decode is not None and
                 isinstance(iprot.trans, TTransport.CReadableTransport) and
-                cls.thrift_spec is not None and
-                fastbinary is not None):
+                cls.thrift_spec is not None):
             self = cls()
-            return fastbinary.decode_binary(None,
-                                            iprot.trans,
-                                            (self.__class__, self.thrift_spec),
-                                            iprot.string_length_limit,
-                                            iprot.container_length_limit)
-        return iprot.readStruct(cls, cls.thrift_spec, True)
+            return iprot._fast_decode(None, iprot,
+                                      (self.__class__, self.thrift_spec))
+        else:
+            return iprot.readStruct(cls, cls.thrift_spec, True)
diff --git a/lib/py/src/protocol/TBinaryProtocol.py b/lib/py/src/protocol/TBinaryProtocol.py
index 7fce12f..f6be772 100644
--- a/lib/py/src/protocol/TBinaryProtocol.py
+++ b/lib/py/src/protocol/TBinaryProtocol.py
@@ -258,6 +258,7 @@
     We inherit from TBinaryProtocol so that the normal TBinaryProtocol
     encoding can happen if the fastbinary module doesn't work for some
     reason.  (TODO(dreiss): Make this happen sanely in more cases.)
+    To disable this behavior, pass fallback=False constructor argument.
 
     In order to take advantage of the C module, just use
     TBinaryProtocolAccelerated instead of TBinaryProtocol.
@@ -270,16 +271,31 @@
     """
     pass
 
+    def __init__(self, *args, **kwargs):
+        fallback = kwargs.pop('fallback', True)
+        super(TBinaryProtocolAccelerated, self).__init__(*args, **kwargs)
+        try:
+            from thrift.protocol import fastbinary
+        except ImportError:
+            if not fallback:
+                raise
+        else:
+            self._fast_decode = fastbinary.decode_binary
+            self._fast_encode = fastbinary.encode_binary
+
 
 class TBinaryProtocolAcceleratedFactory(object):
     def __init__(self,
                  string_length_limit=None,
-                 container_length_limit=None):
+                 container_length_limit=None,
+                 fallback=True):
         self.string_length_limit = string_length_limit
         self.container_length_limit = container_length_limit
+        self._fallback = fallback
 
     def getProtocol(self, trans):
         return TBinaryProtocolAccelerated(
             trans,
             string_length_limit=self.string_length_limit,
-            container_length_limit=self.container_length_limit)
+            container_length_limit=self.container_length_limit,
+            fallback=self._fallback)
diff --git a/lib/py/src/protocol/TCompactProtocol.py b/lib/py/src/protocol/TCompactProtocol.py
index 8d3db1a..16fd9be 100644
--- a/lib/py/src/protocol/TCompactProtocol.py
+++ b/lib/py/src/protocol/TCompactProtocol.py
@@ -424,3 +424,49 @@
         return TCompactProtocol(trans,
                                 self.string_length_limit,
                                 self.container_length_limit)
+
+
+class TCompactProtocolAccelerated(TCompactProtocol):
+    """C-Accelerated version of TCompactProtocol.
+
+    This class does not override any of TCompactProtocol's methods,
+    but the generated code recognizes it directly and will call into
+    our C module to do the encoding, bypassing this object entirely.
+    We inherit from TCompactProtocol so that the normal TCompactProtocol
+    encoding can happen if the fastbinary module doesn't work for some
+    reason.
+    To disable this behavior, pass fallback=False constructor argument.
+
+    In order to take advantage of the C module, just use
+    TCompactProtocolAccelerated instead of TCompactProtocol.
+    """
+    pass
+
+    def __init__(self, *args, **kwargs):
+        fallback = kwargs.pop('fallback', True)
+        super(TCompactProtocolAccelerated, self).__init__(*args, **kwargs)
+        try:
+            from thrift.protocol import fastbinary
+        except ImportError:
+            if not fallback:
+                raise
+        else:
+            self._fast_decode = fastbinary.decode_compact
+            self._fast_encode = fastbinary.encode_compact
+
+
+class TCompactProtocolAcceleratedFactory(object):
+    def __init__(self,
+                 string_length_limit=None,
+                 container_length_limit=None,
+                 fallback=True):
+        self.string_length_limit = string_length_limit
+        self.container_length_limit = container_length_limit
+        self._fallback = fallback
+
+    def getProtocol(self, trans):
+        return TCompactProtocolAccelerated(
+            trans,
+            string_length_limit=self.string_length_limit,
+            container_length_limit=self.container_length_limit,
+            fallback=self._fallback)
diff --git a/lib/py/src/protocol/TProtocol.py b/lib/py/src/protocol/TProtocol.py
index ed6938b..f29c58d 100644
--- a/lib/py/src/protocol/TProtocol.py
+++ b/lib/py/src/protocol/TProtocol.py
@@ -48,6 +48,8 @@
 
     def __init__(self, trans):
         self.trans = trans
+        self._fast_decode = None
+        self._fast_encode = None
 
     @staticmethod
     def _check_length(limit, length):
@@ -276,7 +278,7 @@
             yield read()
 
     def readFieldByTType(self, ttype, spec):
-        return self._read_by_ttype(ttype, spec, spec).next()
+        return next(self._read_by_ttype(ttype, spec, spec))
 
     def readContainerList(self, spec):
         ttype, tspec, is_immutable = spec
@@ -394,7 +396,7 @@
             yield write(v)
 
     def writeFieldByTType(self, ttype, val, spec):
-        self._write_by_ttype(ttype, [val], spec, spec).next()
+        next(self._write_by_ttype(ttype, [val], spec, spec))
 
 
 def checkIntegerLimits(i, bits):
diff --git a/lib/py/src/protocol/fastbinary.c b/lib/py/src/protocol/fastbinary.c
deleted file mode 100644
index da57c85..0000000
--- a/lib/py/src/protocol/fastbinary.c
+++ /dev/null
@@ -1,1289 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <Python.h>
-#include "cStringIO.h"
-#include <stdint.h>
-#ifndef _WIN32
-# include <stdbool.h>
-# include <netinet/in.h>
-#else
-# include <WinSock2.h>
-# pragma comment (lib, "ws2_32.lib")
-# define BIG_ENDIAN (4321)
-# define LITTLE_ENDIAN (1234)
-# define BYTE_ORDER LITTLE_ENDIAN
-# if defined(_MSC_VER) && _MSC_VER < 1600
-   typedef int _Bool;
-#  define bool _Bool
-#  define false 0
-#  define true 1
-# endif
-# define inline __inline
-#endif
-
-/* Fix endianness issues on Solaris */
-#if defined (__SVR4) && defined (__sun)
- #if defined(__i386) && !defined(__i386__)
-  #define __i386__
- #endif
-
- #ifndef BIG_ENDIAN
-  #define BIG_ENDIAN (4321)
- #endif
- #ifndef LITTLE_ENDIAN
-  #define LITTLE_ENDIAN (1234)
- #endif
-
- /* I386 is LE, even on Solaris */
- #if !defined(BYTE_ORDER) && defined(__i386__)
-  #define BYTE_ORDER LITTLE_ENDIAN
- #endif
-#endif
-
-// TODO(dreiss): defval appears to be unused.  Look into removing it.
-// TODO(dreiss): Make parse_spec_args recursive, and cache the output
-//               permanently in the object.  (Malloc and orphan.)
-// TODO(dreiss): Why do we need cStringIO for reading, why not just char*?
-//               Can cStringIO let us work with a BufferedTransport?
-// TODO(dreiss): Don't ignore the rv from cwrite (maybe).
-
-/* ====== BEGIN UTILITIES ====== */
-
-#define INIT_OUTBUF_SIZE 128
-
-// Stolen out of TProtocol.h.
-// It would be a huge pain to have both get this from one place.
-typedef enum TType {
-  T_STOP       = 0,
-  T_VOID       = 1,
-  T_BOOL       = 2,
-  T_BYTE       = 3,
-  T_I08        = 3,
-  T_I16        = 6,
-  T_I32        = 8,
-  T_U64        = 9,
-  T_I64        = 10,
-  T_DOUBLE     = 4,
-  T_STRING     = 11,
-  T_UTF7       = 11,
-  T_STRUCT     = 12,
-  T_MAP        = 13,
-  T_SET        = 14,
-  T_LIST       = 15,
-  T_UTF8       = 16,
-  T_UTF16      = 17
-} TType;
-
-#ifndef __BYTE_ORDER
-# if defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN)
-#  define __BYTE_ORDER BYTE_ORDER
-#  define __LITTLE_ENDIAN LITTLE_ENDIAN
-#  define __BIG_ENDIAN BIG_ENDIAN
-# else
-#  error "Cannot determine endianness"
-# endif
-#endif
-
-// Same comment as the enum.  Sorry.
-#if __BYTE_ORDER == __BIG_ENDIAN
-# define ntohll(n) (n)
-# define htonll(n) (n)
-#elif __BYTE_ORDER == __LITTLE_ENDIAN
-# if defined(__GNUC__) && defined(__GLIBC__)
-#  include <byteswap.h>
-#  define ntohll(n) bswap_64(n)
-#  define htonll(n) bswap_64(n)
-# else /* GNUC & GLIBC */
-#  define ntohll(n) ( (((unsigned long long)ntohl(n)) << 32) + ntohl(n >> 32) )
-#  define htonll(n) ( (((unsigned long long)htonl(n)) << 32) + htonl(n >> 32) )
-# endif /* GNUC & GLIBC */
-#else /* __BYTE_ORDER */
-# error "Can't define htonll or ntohll!"
-#endif
-
-// Doing a benchmark shows that interning actually makes a difference, amazingly.
-#define INTERN_STRING(value) _intern_ ## value
-
-#define INT_CONV_ERROR_OCCURRED(v) ( ((v) == -1) && PyErr_Occurred() )
-#define CHECK_RANGE(v, min, max) ( ((v) <= (max)) && ((v) >= (min)) )
-
-/**
- * A cache of the spec_args for a set or list,
- * so we don't have to keep calling PyTuple_GET_ITEM.
- */
-typedef struct {
-  TType element_type;
-  PyObject* typeargs;
-  bool immutable;
-} SetListTypeArgs;
-
-/**
- * A cache of the spec_args for a map,
- * so we don't have to keep calling PyTuple_GET_ITEM.
- */
-typedef struct {
-  TType ktag;
-  TType vtag;
-  PyObject* ktypeargs;
-  PyObject* vtypeargs;
-  bool immutable;
-} MapTypeArgs;
-
-/**
- * A cache of the spec_args for a struct,
- * so we don't have to keep calling PyTuple_GET_ITEM.
- */
-typedef struct {
-  PyObject* klass;
-  PyObject* spec;
-  bool immutable;
-} StructTypeArgs;
-
-/**
- * A cache of the item spec from a struct specification,
- * so we don't have to keep calling PyTuple_GET_ITEM.
- */
-typedef struct {
-  int tag;
-  TType type;
-  PyObject* attrname;
-  PyObject* typeargs;
-  PyObject* defval;
-} StructItemSpec;
-
-/**
- * A cache of the two key attributes of a CReadableTransport,
- * so we don't have to keep calling PyObject_GetAttr.
- */
-typedef struct {
-  PyObject* stringiobuf;
-  PyObject* refill_callable;
-} DecodeBuffer;
-
-/** Pointer to interned string to speed up attribute lookup. */
-static PyObject* INTERN_STRING(cstringio_buf);
-/** Pointer to interned string to speed up attribute lookup. */
-static PyObject* INTERN_STRING(cstringio_refill);
-
-static inline bool
-check_ssize_t_32(Py_ssize_t len) {
-  // error from getting the int
-  if (INT_CONV_ERROR_OCCURRED(len)) {
-    return false;
-  }
-  if (!CHECK_RANGE(len, 0, INT32_MAX)) {
-    PyErr_SetString(PyExc_OverflowError, "size out of range: exceeded INT32_MAX");
-    return false;
-  }
-  return true;
-}
-
-static inline bool
-check_length_limit(Py_ssize_t len, long limit) {
-  if (!check_ssize_t_32(len)) {
-    return false;
-  }
-  if (len > limit) {
-    PyErr_Format(PyExc_OverflowError, "size exceeded specified limit: %d", limit);
-    return false;
-  }
-  return true;
-}
-
-static inline bool
-parse_pyint(PyObject* o, int32_t* ret, int32_t min, int32_t max) {
-  long val = PyInt_AsLong(o);
-
-  if (INT_CONV_ERROR_OCCURRED(val)) {
-    return false;
-  }
-  if (!CHECK_RANGE(val, min, max)) {
-    PyErr_SetString(PyExc_OverflowError, "int out of range");
-    return false;
-  }
-
-  *ret = (int32_t) val;
-  return true;
-}
-
-static bool
-is_utf8(PyObject* typeargs) {
-  return PyString_Check(typeargs) && !strncmp(PyString_AS_STRING(typeargs), "UTF8", 4);
-}
-
-/* --- FUNCTIONS TO PARSE STRUCT SPECIFICATOINS --- */
-
-static bool
-parse_set_list_args(SetListTypeArgs* dest, PyObject* typeargs) {
-  if (PyTuple_Size(typeargs) != 3) {
-    PyErr_SetString(PyExc_TypeError, "expecting tuple of size 3 for list/set type args");
-    return false;
-  }
-
-  dest->element_type = PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 0));
-  if (INT_CONV_ERROR_OCCURRED(dest->element_type)) {
-    return false;
-  }
-
-  dest->typeargs = PyTuple_GET_ITEM(typeargs, 1);
-
-  dest->immutable = Py_True == PyTuple_GET_ITEM(typeargs, 2);
-
-  return true;
-}
-
-static bool
-parse_map_args(MapTypeArgs* dest, PyObject* typeargs) {
-  if (PyTuple_Size(typeargs) != 5) {
-    PyErr_SetString(PyExc_TypeError, "expecting 5 arguments for typeargs to map");
-    return false;
-  }
-
-  dest->ktag = PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 0));
-  if (INT_CONV_ERROR_OCCURRED(dest->ktag)) {
-    return false;
-  }
-
-  dest->vtag = PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 2));
-  if (INT_CONV_ERROR_OCCURRED(dest->vtag)) {
-    return false;
-  }
-
-  dest->ktypeargs = PyTuple_GET_ITEM(typeargs, 1);
-  dest->vtypeargs = PyTuple_GET_ITEM(typeargs, 3);
-  dest->immutable = Py_True == PyTuple_GET_ITEM(typeargs, 4);
-
-  return true;
-}
-
-static bool
-parse_struct_args(StructTypeArgs* dest, PyObject* typeargs) {
-  if (PyTuple_Size(typeargs) != 2) {
-    PyErr_SetString(PyExc_TypeError, "expecting tuple of size 2 for struct args");
-    return false;
-  }
-
-  dest->klass = PyTuple_GET_ITEM(typeargs, 0);
-  dest->spec = PyTuple_GET_ITEM(typeargs, 1);
-
-  return true;
-}
-
-static int
-parse_struct_item_spec(StructItemSpec* dest, PyObject* spec_tuple) {
-
-  // i'd like to use ParseArgs here, but it seems to be a bottleneck.
-  if (PyTuple_Size(spec_tuple) != 5) {
-    PyErr_Format(PyExc_TypeError, "expecting 5 arguments for spec tuple but got %d", (int)PyTuple_Size(spec_tuple));
-    return false;
-  }
-
-  dest->tag = PyInt_AsLong(PyTuple_GET_ITEM(spec_tuple, 0));
-  if (INT_CONV_ERROR_OCCURRED(dest->tag)) {
-    return false;
-  }
-
-  dest->type = PyInt_AsLong(PyTuple_GET_ITEM(spec_tuple, 1));
-  if (INT_CONV_ERROR_OCCURRED(dest->type)) {
-    return false;
-  }
-
-  dest->attrname = PyTuple_GET_ITEM(spec_tuple, 2);
-  dest->typeargs = PyTuple_GET_ITEM(spec_tuple, 3);
-  dest->defval = PyTuple_GET_ITEM(spec_tuple, 4);
-  return true;
-}
-
-/* ====== END UTILITIES ====== */
-
-
-/* ====== BEGIN WRITING FUNCTIONS ====== */
-
-/* --- LOW-LEVEL WRITING FUNCTIONS --- */
-
-static void writeByte(PyObject* outbuf, int8_t val) {
-  int8_t net = val;
-  PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int8_t));
-}
-
-static void writeI16(PyObject* outbuf, int16_t val) {
-  int16_t net = (int16_t)htons(val);
-  PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int16_t));
-}
-
-static void writeI32(PyObject* outbuf, int32_t val) {
-  int32_t net = (int32_t)htonl(val);
-  PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int32_t));
-}
-
-static void writeI64(PyObject* outbuf, int64_t val) {
-  int64_t net = (int64_t)htonll(val);
-  PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int64_t));
-}
-
-static void writeDouble(PyObject* outbuf, double dub) {
-  // Unfortunately, bitwise_cast doesn't work in C.  Bad C!
-  union {
-    double f;
-    int64_t t;
-  } transfer;
-  transfer.f = dub;
-  writeI64(outbuf, transfer.t);
-}
-
-
-/* --- MAIN RECURSIVE OUTPUT FUNCTION -- */
-
-static bool
-output_val(PyObject* output, PyObject* value, TType type, PyObject* typeargs) {
-  /*
-   * Refcounting Strategy:
-   *
-   * We assume that elements of the thrift_spec tuple are not going to be
-   * mutated, so we don't ref count those at all. Other than that, we try to
-   * keep a reference to all the user-created objects while we work with them.
-   * output_val assumes that a reference is already held. The *caller* is
-   * responsible for handling references
-   */
-
-  switch (type) {
-
-  case T_BOOL: {
-    int v = PyObject_IsTrue(value);
-    if (v == -1) {
-      return false;
-    }
-
-    writeByte(output, (int8_t) v);
-    break;
-  }
-  case T_I08: {
-    int32_t val;
-
-    if (!parse_pyint(value, &val, INT8_MIN, INT8_MAX)) {
-      return false;
-    }
-
-    writeByte(output, (int8_t) val);
-    break;
-  }
-  case T_I16: {
-    int32_t val;
-
-    if (!parse_pyint(value, &val, INT16_MIN, INT16_MAX)) {
-      return false;
-    }
-
-    writeI16(output, (int16_t) val);
-    break;
-  }
-  case T_I32: {
-    int32_t val;
-
-    if (!parse_pyint(value, &val, INT32_MIN, INT32_MAX)) {
-      return false;
-    }
-
-    writeI32(output, val);
-    break;
-  }
-  case T_I64: {
-    int64_t nval = PyLong_AsLongLong(value);
-
-    if (INT_CONV_ERROR_OCCURRED(nval)) {
-      return false;
-    }
-
-    if (!CHECK_RANGE(nval, INT64_MIN, INT64_MAX)) {
-      PyErr_SetString(PyExc_OverflowError, "int out of range");
-      return false;
-    }
-
-    writeI64(output, nval);
-    break;
-  }
-
-  case T_DOUBLE: {
-    double nval = PyFloat_AsDouble(value);
-    if (nval == -1.0 && PyErr_Occurred()) {
-      return false;
-    }
-
-    writeDouble(output, nval);
-    break;
-  }
-
-  case T_STRING: {
-    Py_ssize_t len = 0;
-    if (is_utf8(typeargs) && PyUnicode_Check(value))
-      value = PyUnicode_AsUTF8String(value);
-    len = PyString_Size(value);
-
-    if (!check_ssize_t_32(len)) {
-      return false;
-    }
-
-    writeI32(output, (int32_t) len);
-    PycStringIO->cwrite(output, PyString_AsString(value), (int32_t) len);
-    break;
-  }
-
-  case T_LIST:
-  case T_SET: {
-    Py_ssize_t len;
-    SetListTypeArgs parsedargs;
-    PyObject *item;
-    PyObject *iterator;
-
-    if (!parse_set_list_args(&parsedargs, typeargs)) {
-      return false;
-    }
-
-    len = PyObject_Length(value);
-
-    if (!check_ssize_t_32(len)) {
-      return false;
-    }
-
-    writeByte(output, parsedargs.element_type);
-    writeI32(output, (int32_t) len);
-
-    iterator =  PyObject_GetIter(value);
-    if (iterator == NULL) {
-      return false;
-    }
-
-    while ((item = PyIter_Next(iterator))) {
-      if (!output_val(output, item, parsedargs.element_type, parsedargs.typeargs)) {
-        Py_DECREF(item);
-        Py_DECREF(iterator);
-        return false;
-      }
-      Py_DECREF(item);
-    }
-
-    Py_DECREF(iterator);
-
-    if (PyErr_Occurred()) {
-      return false;
-    }
-
-    break;
-  }
-
-  case T_MAP: {
-    PyObject *k, *v;
-    Py_ssize_t pos = 0;
-    Py_ssize_t len;
-
-    MapTypeArgs parsedargs;
-
-    len = PyDict_Size(value);
-    if (!check_ssize_t_32(len)) {
-      return false;
-    }
-
-    if (!parse_map_args(&parsedargs, typeargs)) {
-      return false;
-    }
-
-    writeByte(output, parsedargs.ktag);
-    writeByte(output, parsedargs.vtag);
-    writeI32(output, len);
-
-    // TODO(bmaurer): should support any mapping, not just dicts
-    while (PyDict_Next(value, &pos, &k, &v)) {
-      // TODO(dreiss): Think hard about whether these INCREFs actually
-      //               turn any unsafe scenarios into safe scenarios.
-      Py_INCREF(k);
-      Py_INCREF(v);
-
-      if (!output_val(output, k, parsedargs.ktag, parsedargs.ktypeargs)
-          || !output_val(output, v, parsedargs.vtag, parsedargs.vtypeargs)) {
-        Py_DECREF(k);
-        Py_DECREF(v);
-        return false;
-      }
-      Py_DECREF(k);
-      Py_DECREF(v);
-    }
-    break;
-  }
-
-  // TODO(dreiss): Consider breaking this out as a function
-  //               the way we did for decode_struct.
-  case T_STRUCT: {
-    StructTypeArgs parsedargs;
-    Py_ssize_t nspec;
-    Py_ssize_t i;
-
-    if (!parse_struct_args(&parsedargs, typeargs)) {
-      return false;
-    }
-
-    nspec = PyTuple_Size(parsedargs.spec);
-
-    if (nspec == -1) {
-      return false;
-    }
-
-    for (i = 0; i < nspec; i++) {
-      StructItemSpec parsedspec;
-      PyObject* spec_tuple;
-      PyObject* instval = NULL;
-
-      spec_tuple = PyTuple_GET_ITEM(parsedargs.spec, i);
-      if (spec_tuple == Py_None) {
-        continue;
-      }
-
-      if (!parse_struct_item_spec (&parsedspec, spec_tuple)) {
-        return false;
-      }
-
-      instval = PyObject_GetAttr(value, parsedspec.attrname);
-
-      if (!instval) {
-        return false;
-      }
-
-      if (instval == Py_None) {
-        Py_DECREF(instval);
-        continue;
-      }
-
-      writeByte(output, (int8_t) parsedspec.type);
-      writeI16(output, parsedspec.tag);
-
-      if (!output_val(output, instval, parsedspec.type, parsedspec.typeargs)) {
-        Py_DECREF(instval);
-        return false;
-      }
-
-      Py_DECREF(instval);
-    }
-
-    writeByte(output, (int8_t)T_STOP);
-    break;
-  }
-
-  case T_STOP:
-  case T_VOID:
-  case T_UTF16:
-  case T_UTF8:
-  case T_U64:
-  default:
-    PyErr_SetString(PyExc_TypeError, "Unexpected TType");
-    return false;
-
-  }
-
-  return true;
-}
-
-
-/* --- TOP-LEVEL WRAPPER FOR OUTPUT -- */
-
-static PyObject *
-encode_binary(PyObject *self, PyObject *args) {
-  PyObject* enc_obj;
-  PyObject* type_args;
-  PyObject* buf;
-  PyObject* ret = NULL;
-
-  if (!PyArg_ParseTuple(args, "OO", &enc_obj, &type_args)) {
-    return NULL;
-  }
-
-  buf = PycStringIO->NewOutput(INIT_OUTBUF_SIZE);
-  if (output_val(buf, enc_obj, T_STRUCT, type_args)) {
-    ret = PycStringIO->cgetvalue(buf);
-  }
-
-  Py_DECREF(buf);
-  return ret;
-}
-
-/* ====== END WRITING FUNCTIONS ====== */
-
-
-/* ====== BEGIN READING FUNCTIONS ====== */
-
-/* --- LOW-LEVEL READING FUNCTIONS --- */
-
-static void
-free_decodebuf(DecodeBuffer* d) {
-  Py_XDECREF(d->stringiobuf);
-  Py_XDECREF(d->refill_callable);
-}
-
-static bool
-decode_buffer_from_obj(DecodeBuffer* dest, PyObject* obj) {
-  dest->stringiobuf = PyObject_GetAttr(obj, INTERN_STRING(cstringio_buf));
-  if (!dest->stringiobuf) {
-    return false;
-  }
-
-  if (!PycStringIO_InputCheck(dest->stringiobuf)) {
-    free_decodebuf(dest);
-    PyErr_SetString(PyExc_TypeError, "expecting stringio input");
-    return false;
-  }
-
-  dest->refill_callable = PyObject_GetAttr(obj, INTERN_STRING(cstringio_refill));
-
-  if(!dest->refill_callable) {
-    free_decodebuf(dest);
-    return false;
-  }
-
-  if (!PyCallable_Check(dest->refill_callable)) {
-    free_decodebuf(dest);
-    PyErr_SetString(PyExc_TypeError, "expecting callable");
-    return false;
-  }
-
-  return true;
-}
-
-static bool readBytes(DecodeBuffer* input, char** output, int len) {
-  int read;
-
-  // TODO(dreiss): Don't fear the malloc.  Think about taking a copy of
-  //               the partial read instead of forcing the transport
-  //               to prepend it to its buffer.
-
-  read = PycStringIO->cread(input->stringiobuf, output, len);
-
-  if (read == len) {
-    return true;
-  } else if (read == -1) {
-    return false;
-  } else {
-    PyObject* newiobuf;
-
-    // using building functions as this is a rare codepath
-    newiobuf = PyObject_CallFunction(
-        input->refill_callable, "s#i", *output, read, len, NULL);
-    if (newiobuf == NULL) {
-      return false;
-    }
-
-    // must do this *AFTER* the call so that we don't deref the io buffer
-    Py_CLEAR(input->stringiobuf);
-    input->stringiobuf = newiobuf;
-
-    read = PycStringIO->cread(input->stringiobuf, output, len);
-
-    if (read == len) {
-      return true;
-    } else if (read == -1) {
-      return false;
-    } else {
-      // TODO(dreiss): This could be a valid code path for big binary blobs.
-      PyErr_SetString(PyExc_TypeError,
-          "refill claimed to have refilled the buffer, but didn't!!");
-      return false;
-    }
-  }
-}
-
-static int8_t readByte(DecodeBuffer* input) {
-  char* buf;
-  if (!readBytes(input, &buf, sizeof(int8_t))) {
-    return -1;
-  }
-
-  return *(int8_t*) buf;
-}
-
-static int16_t readI16(DecodeBuffer* input) {
-  char* buf;
-  if (!readBytes(input, &buf, sizeof(int16_t))) {
-    return -1;
-  }
-
-  return (int16_t) ntohs(*(int16_t*) buf);
-}
-
-static int32_t readI32(DecodeBuffer* input) {
-  char* buf;
-  if (!readBytes(input, &buf, sizeof(int32_t))) {
-    return -1;
-  }
-  return (int32_t) ntohl(*(int32_t*) buf);
-}
-
-
-static int64_t readI64(DecodeBuffer* input) {
-  char* buf;
-  if (!readBytes(input, &buf, sizeof(int64_t))) {
-    return -1;
-  }
-
-  return (int64_t) ntohll(*(int64_t*) buf);
-}
-
-static double readDouble(DecodeBuffer* input) {
-  union {
-    int64_t f;
-    double t;
-  } transfer;
-
-  transfer.f = readI64(input);
-  if (transfer.f == -1) {
-    return -1;
-  }
-  return transfer.t;
-}
-
-static bool
-checkTypeByte(DecodeBuffer* input, TType expected) {
-  TType got = readByte(input);
-  if (INT_CONV_ERROR_OCCURRED(got)) {
-    return false;
-  }
-
-  if (expected != got) {
-    PyErr_SetString(PyExc_TypeError, "got wrong ttype while reading field");
-    return false;
-  }
-  return true;
-}
-
-static bool
-skip(DecodeBuffer* input, TType type) {
-#define SKIPBYTES(n) \
-  do { \
-    if (!readBytes(input, &dummy_buf, (n))) { \
-      return false; \
-    } \
-  } while(0)
-
-  char* dummy_buf;
-
-  switch (type) {
-
-  case T_BOOL:
-  case T_I08: SKIPBYTES(1); break;
-  case T_I16: SKIPBYTES(2); break;
-  case T_I32: SKIPBYTES(4); break;
-  case T_I64:
-  case T_DOUBLE: SKIPBYTES(8); break;
-
-  case T_STRING: {
-    // TODO(dreiss): Find out if these check_ssize_t32s are really necessary.
-    int len = readI32(input);
-    if (!check_ssize_t_32(len)) {
-      return false;
-    }
-    SKIPBYTES(len);
-    break;
-  }
-
-  case T_LIST:
-  case T_SET: {
-    TType etype;
-    int len, i;
-
-    etype = readByte(input);
-    if (etype == -1) {
-      return false;
-    }
-
-    len = readI32(input);
-    if (!check_ssize_t_32(len)) {
-      return false;
-    }
-
-    for (i = 0; i < len; i++) {
-      if (!skip(input, etype)) {
-        return false;
-      }
-    }
-    break;
-  }
-
-  case T_MAP: {
-    TType ktype, vtype;
-    int len, i;
-
-    ktype = readByte(input);
-    if (ktype == -1) {
-      return false;
-    }
-
-    vtype = readByte(input);
-    if (vtype == -1) {
-      return false;
-    }
-
-    len = readI32(input);
-    if (!check_ssize_t_32(len)) {
-      return false;
-    }
-
-    for (i = 0; i < len; i++) {
-      if (!(skip(input, ktype) && skip(input, vtype))) {
-        return false;
-      }
-    }
-    break;
-  }
-
-  case T_STRUCT: {
-    while (true) {
-      TType type;
-
-      type = readByte(input);
-      if (type == -1) {
-        return false;
-      }
-
-      if (type == T_STOP)
-        break;
-
-      SKIPBYTES(2); // tag
-      if (!skip(input, type)) {
-        return false;
-      }
-    }
-    break;
-  }
-
-  case T_STOP:
-  case T_VOID:
-  case T_UTF16:
-  case T_UTF8:
-  case T_U64:
-  default:
-    PyErr_SetString(PyExc_TypeError, "Unexpected TType");
-    return false;
-
-  }
-
-  return true;
-
-#undef SKIPBYTES
-}
-
-
-/* --- HELPER FUNCTION FOR DECODE_VAL --- */
-
-static PyObject*
-decode_val(DecodeBuffer* input, TType type, PyObject* typeargs, long string_limit, long container_limit);
-
-static PyObject*
-decode_struct(DecodeBuffer* input, PyObject* output, PyObject* klass, PyObject* spec_seq, long string_limit, long container_limit) {
-  int spec_seq_len = PyTuple_Size(spec_seq);
-  bool immutable = output == Py_None;
-  PyObject* kwargs = NULL;
-  if (spec_seq_len == -1) {
-    return NULL;
-  }
-
-  if (immutable) {
-    kwargs = PyDict_New();
-    if (!kwargs) {
-      PyErr_SetString(PyExc_TypeError, "failed to prepare kwargument storage");
-      return NULL;
-    }
-  }
-
-  while (true) {
-    TType type;
-    int16_t tag;
-    PyObject* item_spec;
-    PyObject* fieldval = NULL;
-    StructItemSpec parsedspec;
-
-    type = readByte(input);
-    if (type == -1) {
-      goto error;
-    }
-    if (type == T_STOP) {
-      break;
-    }
-    tag = readI16(input);
-    if (INT_CONV_ERROR_OCCURRED(tag)) {
-      goto error;
-    }
-    if (tag >= 0 && tag < spec_seq_len) {
-      item_spec = PyTuple_GET_ITEM(spec_seq, tag);
-    } else {
-      item_spec = Py_None;
-    }
-
-    if (item_spec == Py_None) {
-      if (!skip(input, type)) {
-        goto error;
-      } else {
-        continue;
-      }
-    }
-
-    if (!parse_struct_item_spec(&parsedspec, item_spec)) {
-      goto error;
-    }
-    if (parsedspec.type != type) {
-      if (!skip(input, type)) {
-        PyErr_Format(PyExc_TypeError, "struct field had wrong type: expected %d but got %d", parsedspec.type, type);
-        goto error;
-      } else {
-        continue;
-      }
-    }
-
-    fieldval = decode_val(input, parsedspec.type, parsedspec.typeargs, string_limit, container_limit);
-    if (fieldval == NULL) {
-      goto error;
-    }
-
-    if ((immutable && PyDict_SetItem(kwargs, parsedspec.attrname, fieldval) == -1)
-        || (!immutable && PyObject_SetAttr(output, parsedspec.attrname, fieldval) == -1)) {
-      Py_DECREF(fieldval);
-      goto error;
-    }
-    Py_DECREF(fieldval);
-  }
-  if (immutable) {
-    PyObject* args = PyTuple_New(0);
-    PyObject* ret = NULL;
-    if (!args) {
-      PyErr_SetString(PyExc_TypeError, "failed to prepare argument storage");
-      goto error;
-    }
-    ret = PyObject_Call(klass, args, kwargs);
-    Py_DECREF(kwargs);
-    Py_DECREF(args);
-    return ret;
-  }
-  Py_INCREF(output);
-  return output;
-
-  error:
-  Py_XDECREF(kwargs);
-  return NULL;
-}
-
-
-/* --- MAIN RECURSIVE INPUT FUNCTION --- */
-
-// Returns a new reference.
-static PyObject*
-decode_val(DecodeBuffer* input, TType type, PyObject* typeargs, long string_limit, long container_limit) {
-  switch (type) {
-
-  case T_BOOL: {
-    int8_t v = readByte(input);
-    if (INT_CONV_ERROR_OCCURRED(v)) {
-      return NULL;
-    }
-
-    switch (v) {
-    case 0: Py_RETURN_FALSE;
-    case 1: Py_RETURN_TRUE;
-    // Don't laugh.  This is a potentially serious issue.
-    default: PyErr_SetString(PyExc_TypeError, "boolean out of range"); return NULL;
-    }
-    break;
-  }
-  case T_I08: {
-    int8_t v = readByte(input);
-    if (INT_CONV_ERROR_OCCURRED(v)) {
-      return NULL;
-    }
-
-    return PyInt_FromLong(v);
-  }
-  case T_I16: {
-    int16_t v = readI16(input);
-    if (INT_CONV_ERROR_OCCURRED(v)) {
-      return NULL;
-    }
-    return PyInt_FromLong(v);
-  }
-  case T_I32: {
-    int32_t v = readI32(input);
-    if (INT_CONV_ERROR_OCCURRED(v)) {
-      return NULL;
-    }
-    return PyInt_FromLong(v);
-  }
-
-  case T_I64: {
-    int64_t v = readI64(input);
-    if (INT_CONV_ERROR_OCCURRED(v)) {
-      return NULL;
-    }
-    // TODO(dreiss): Find out if we can take this fastpath always when
-    //               sizeof(long) == sizeof(long long).
-    if (CHECK_RANGE(v, LONG_MIN, LONG_MAX)) {
-      return PyInt_FromLong((long) v);
-    }
-
-    return PyLong_FromLongLong(v);
-  }
-
-  case T_DOUBLE: {
-    double v = readDouble(input);
-    if (v == -1.0 && PyErr_Occurred()) {
-      return false;
-    }
-    return PyFloat_FromDouble(v);
-  }
-
-  case T_STRING: {
-    Py_ssize_t len = readI32(input);
-    char* buf;
-    if (!readBytes(input, &buf, len)) {
-      return NULL;
-    }
-    if (!check_length_limit(len, string_limit)) {
-      return NULL;
-    }
-
-    if (is_utf8(typeargs))
-      return PyUnicode_DecodeUTF8(buf, len, 0);
-    else
-      return PyString_FromStringAndSize(buf, len);
-  }
-
-  case T_LIST:
-  case T_SET: {
-    SetListTypeArgs parsedargs;
-    int32_t len;
-    PyObject* ret = NULL;
-    int i;
-    bool use_tuple = false;
-
-    if (!parse_set_list_args(&parsedargs, typeargs)) {
-      return NULL;
-    }
-
-    if (!checkTypeByte(input, parsedargs.element_type)) {
-      return NULL;
-    }
-
-    len = readI32(input);
-    if (!check_length_limit(len, container_limit)) {
-      return NULL;
-    }
-
-    use_tuple = type == T_LIST && parsedargs.immutable;
-    ret = use_tuple ? PyTuple_New(len) : PyList_New(len);
-    if (!ret) {
-      return NULL;
-    }
-
-    for (i = 0; i < len; i++) {
-      PyObject* item = decode_val(input, parsedargs.element_type, parsedargs.typeargs, string_limit, container_limit);
-      if (!item) {
-        Py_DECREF(ret);
-        return NULL;
-      }
-      if (use_tuple) {
-        PyTuple_SET_ITEM(ret, i, item);
-      } else  {
-        PyList_SET_ITEM(ret, i, item);
-      }
-    }
-
-    // TODO(dreiss): Consider biting the bullet and making two separate cases
-    //               for list and set, avoiding this post facto conversion.
-    if (type == T_SET) {
-      PyObject* setret;
-      setret = parsedargs.immutable ? PyFrozenSet_New(ret) : PySet_New(ret);
-      Py_DECREF(ret);
-      return setret;
-    }
-    return ret;
-  }
-
-  case T_MAP: {
-    int32_t len;
-    int i;
-    MapTypeArgs parsedargs;
-    PyObject* ret = NULL;
-
-    if (!parse_map_args(&parsedargs, typeargs)) {
-      return NULL;
-    }
-
-    if (!checkTypeByte(input, parsedargs.ktag)) {
-      return NULL;
-    }
-    if (!checkTypeByte(input, parsedargs.vtag)) {
-      return NULL;
-    }
-
-    len = readI32(input);
-    if (!check_length_limit(len, container_limit)) {
-      return NULL;
-    }
-
-    ret = PyDict_New();
-    if (!ret) {
-      goto error;
-    }
-
-    for (i = 0; i < len; i++) {
-      PyObject* k = NULL;
-      PyObject* v = NULL;
-      k = decode_val(input, parsedargs.ktag, parsedargs.ktypeargs, string_limit, container_limit);
-      if (k == NULL) {
-        goto loop_error;
-      }
-      v = decode_val(input, parsedargs.vtag, parsedargs.vtypeargs, string_limit, container_limit);
-      if (v == NULL) {
-        goto loop_error;
-      }
-      if (PyDict_SetItem(ret, k, v) == -1) {
-        goto loop_error;
-      }
-
-      Py_DECREF(k);
-      Py_DECREF(v);
-      continue;
-
-      // Yuck!  Destructors, anyone?
-      loop_error:
-      Py_XDECREF(k);
-      Py_XDECREF(v);
-      goto error;
-    }
-
-    if (parsedargs.immutable) {
-      PyObject* thrift = PyImport_ImportModule("thrift.Thrift");
-      PyObject* cls = NULL;
-      PyObject* arg = NULL;
-      if (!thrift) {
-        goto error;
-      }
-      cls = PyObject_GetAttrString(thrift, "TFrozenDict");
-      if (!cls) {
-        goto error;
-      }
-      arg = PyTuple_New(1);
-      PyTuple_SET_ITEM(arg, 0, ret);
-      return PyObject_CallObject(cls, arg);
-    }
-
-    return ret;
-
-    error:
-    Py_XDECREF(ret);
-    return NULL;
-  }
-
-  case T_STRUCT: {
-    StructTypeArgs parsedargs;
-    if (!parse_struct_args(&parsedargs, typeargs)) {
-      return NULL;
-    }
-
-    return decode_struct(input, Py_None, parsedargs.klass, parsedargs.spec, string_limit, container_limit);
-  }
-
-  case T_STOP:
-  case T_VOID:
-  case T_UTF16:
-  case T_UTF8:
-  case T_U64:
-  default:
-    PyErr_SetString(PyExc_TypeError, "Unexpected TType");
-    return NULL;
-  }
-}
-
-static long as_long_or(PyObject* value, long default_value) {
-  long v = PyInt_AsLong(value);
-  if (INT_CONV_ERROR_OCCURRED(v)) {
-    PyErr_Clear();
-    return default_value;
-  }
-  return v;
-}
-
-
-/* --- TOP-LEVEL WRAPPER FOR INPUT -- */
-
-static PyObject*
-decode_binary(PyObject *self, PyObject *args) {
-  PyObject* output_obj = NULL;
-  PyObject* transport = NULL;
-  PyObject* typeargs = NULL;
-  StructTypeArgs parsedargs;
-  PyObject* string_limit_obj = NULL;
-  PyObject* container_limit_obj = NULL;
-  long string_limit = 0;
-  long container_limit = 0;
-  DecodeBuffer input = {0, 0};
-  PyObject* ret = NULL;
-
-  if (!PyArg_ParseTuple(args, "OOOOO", &output_obj, &transport, &typeargs, &string_limit_obj, &container_limit_obj)) {
-    return NULL;
-  }
-  string_limit = as_long_or(string_limit_obj, INT32_MAX);
-  container_limit = as_long_or(container_limit_obj, INT32_MAX);
-
-  if (!parse_struct_args(&parsedargs, typeargs)) {
-    return NULL;
-  }
-
-  if (!decode_buffer_from_obj(&input, transport)) {
-    return NULL;
-  }
-
-  ret = decode_struct(&input, output_obj, parsedargs.klass, parsedargs.spec, string_limit, container_limit);
-  free_decodebuf(&input);
-  return  ret;
-}
-
-/* ====== END READING FUNCTIONS ====== */
-
-
-/* -- PYTHON MODULE SETUP STUFF --- */
-
-static PyMethodDef ThriftFastBinaryMethods[] = {
-
-  {"encode_binary",  encode_binary, METH_VARARGS, ""},
-  {"decode_binary",  decode_binary, METH_VARARGS, ""},
-
-  {NULL, NULL, 0, NULL}        /* Sentinel */
-};
-
-PyMODINIT_FUNC
-initfastbinary(void) {
-#define INIT_INTERN_STRING(value) \
-  do { \
-    INTERN_STRING(value) = PyString_InternFromString(#value); \
-    if(!INTERN_STRING(value)) return; \
-  } while(0)
-
-  INIT_INTERN_STRING(cstringio_buf);
-  INIT_INTERN_STRING(cstringio_refill);
-#undef INIT_INTERN_STRING
-
-  PycString_IMPORT;
-  if (PycStringIO == NULL) return;
-
-  (void) Py_InitModule("thrift.protocol.fastbinary", ThriftFastBinaryMethods);
-}
diff --git a/test/known_failures_Linux.json b/test/known_failures_Linux.json
index be9e945..6cd595b 100644
--- a/test/known_failures_Linux.json
+++ b/test/known_failures_Linux.json
@@ -70,6 +70,8 @@
   "csharp-py_binary-accel_framed-ip-ssl",
   "csharp-py_binary_buffered-ip-ssl",
   "csharp-py_binary_framed-ip-ssl",
+  "csharp-py_compact-accelc_buffered-ip-ssl",
+  "csharp-py_compact-accelc_framed-ip-ssl",
   "csharp-py_compact_buffered-ip-ssl",
   "csharp-py_compact_framed-ip-ssl",
   "csharp-py_json_buffered-ip-ssl",
diff --git a/test/py/CMakeLists.txt b/test/py/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/test/py/FastbinaryTest.py b/test/py/FastbinaryTest.py
index a8718dc..d688cd5 100755
--- a/test/py/FastbinaryTest.py
+++ b/test/py/FastbinaryTest.py
@@ -25,6 +25,8 @@
 
 # TODO(dreiss): Test error cases.  Check for memory leaks.
 
+from __future__ import print_function
+
 import math
 import os
 import sys
@@ -34,7 +36,8 @@
 from pprint import pprint
 
 from thrift.transport import TTransport
-from thrift.protocol import TBinaryProtocol
+from thrift.protocol.TBinaryProtocol import TBinaryProtocol, TBinaryProtocolAccelerated
+from thrift.protocol.TCompactProtocol import TCompactProtocol, TCompactProtocolAccelerated
 
 from DebugProtoTest import Srv
 from DebugProtoTest.ttypes import Backwards, Bonk, Empty, HolyMoley, OneOfEach, RandomStuff, Wrapper
@@ -99,123 +102,150 @@
 rs.a = 1
 rs.b = 2
 rs.c = 3
-rs.myintlist = range(20)
+rs.myintlist = list(range(20))
 rs.maps = {1: Wrapper(**{"foo": Empty()}), 2: Wrapper(**{"foo": Empty()})}
 rs.bigint = 124523452435
 rs.triple = 3.14
 
 # make sure this splits two buffers in a buffered protocol
 rshuge = RandomStuff()
-rshuge.myintlist = range(10000)
+rshuge.myintlist = list(range(10000))
 
 my_zero = Srv.Janky_result(**{"success": 5})
 
 
-def check_write(o):
-    trans_fast = TTransport.TMemoryBuffer()
-    trans_slow = TTransport.TMemoryBuffer()
-    prot_fast = TBinaryProtocol.TBinaryProtocolAccelerated(trans_fast)
-    prot_slow = TBinaryProtocol.TBinaryProtocol(trans_slow)
+class Test(object):
+    def __init__(self, fast, slow):
+        self._fast = fast
+        self._slow = slow
 
-    o.write(prot_fast)
-    o.write(prot_slow)
-    ORIG = trans_slow.getvalue()
-    MINE = trans_fast.getvalue()
-    if ORIG != MINE:
-        print("mine: %s\norig: %s" % (repr(MINE), repr(ORIG)))
+    def _check_write(self, o):
+        trans_fast = TTransport.TMemoryBuffer()
+        trans_slow = TTransport.TMemoryBuffer()
+        prot_fast = self._fast(trans_fast)
+        prot_slow = self._slow(trans_slow)
+
+        o.write(prot_fast)
+        o.write(prot_slow)
+        ORIG = trans_slow.getvalue()
+        MINE = trans_fast.getvalue()
+        if ORIG != MINE:
+            print("actual  : %s\nexpected: %s" % (repr(MINE), repr(ORIG)))
+            raise Exception('write value mismatch')
+
+    def _check_read(self, o):
+        prot = self._slow(TTransport.TMemoryBuffer())
+        o.write(prot)
+
+        slow_version_binary = prot.trans.getvalue()
+
+        prot = self._fast(
+            TTransport.TMemoryBuffer(slow_version_binary))
+        c = o.__class__()
+        c.read(prot)
+        if c != o:
+            print("actual  : ")
+            pprint(repr(c))
+            print("expected: ")
+            pprint(repr(o))
+            raise Exception('read value mismatch')
+
+        prot = self._fast(
+            TTransport.TBufferedTransport(
+                TTransport.TMemoryBuffer(slow_version_binary)))
+        c = o.__class__()
+        c.read(prot)
+        if c != o:
+            print("actual  : ")
+            pprint(repr(c))
+            print("expected: ")
+            pprint(repr(o))
+            raise Exception('read value mismatch')
+
+    def do_test(self):
+        self._check_write(HolyMoley())
+        self._check_read(HolyMoley())
+
+        self._check_write(hm)
+        no_set = deepcopy(hm)
+        no_set.contain = set()
+        self._check_read(no_set)
+        self._check_read(hm)
+
+        self._check_write(rs)
+        self._check_read(rs)
+
+        self._check_write(rshuge)
+        self._check_read(rshuge)
+
+        self._check_write(my_zero)
+        self._check_read(my_zero)
+
+        self._check_read(Backwards(**{"first_tag2": 4, "second_tag1": 2}))
+
+        # One case where the serialized form changes, but only superficially.
+        o = Backwards(**{"first_tag2": 4, "second_tag1": 2})
+        trans_fast = TTransport.TMemoryBuffer()
+        trans_slow = TTransport.TMemoryBuffer()
+        prot_fast = self._fast(trans_fast)
+        prot_slow = self._slow(trans_slow)
+
+        o.write(prot_fast)
+        o.write(prot_slow)
+        ORIG = trans_slow.getvalue()
+        MINE = trans_fast.getvalue()
+        assert id(ORIG) != id(MINE)
+
+        prot = self._fast(TTransport.TMemoryBuffer())
+        o.write(prot)
+        prot = self._slow(
+            TTransport.TMemoryBuffer(prot.trans.getvalue()))
+        c = o.__class__()
+        c.read(prot)
+        if c != o:
+            print("copy: ")
+            pprint(repr(c))
+            print("orig: ")
+            pprint(repr(o))
 
 
-def check_read(o):
-    prot = TBinaryProtocol.TBinaryProtocol(TTransport.TMemoryBuffer())
-    o.write(prot)
-
-    slow_version_binary = prot.trans.getvalue()
-
-    prot = TBinaryProtocol.TBinaryProtocolAccelerated(
-        TTransport.TMemoryBuffer(slow_version_binary))
-    c = o.__class__()
-    c.read(prot)
-    if c != o:
-        print("copy: ")
-        pprint(eval(repr(c)))
-        print("orig: ")
-        pprint(eval(repr(o)))
-
-    prot = TBinaryProtocol.TBinaryProtocolAccelerated(
-        TTransport.TBufferedTransport(
-            TTransport.TMemoryBuffer(slow_version_binary)))
-    c = o.__class__()
-    c.read(prot)
-    if c != o:
-        print("copy: ")
-        pprint(eval(repr(c)))
-        print("orig: ")
-        pprint(eval(repr(o)))
+def do_test(fast, slow):
+    Test(fast, slow).do_test()
 
 
-def do_test():
-    check_write(hm)
-    check_read(HolyMoley())
-    no_set = deepcopy(hm)
-    no_set.contain = set()
-    check_read(no_set)
-    check_write(rs)
-    check_read(rs)
-    check_write(rshuge)
-    check_read(rshuge)
-    check_write(my_zero)
-    check_read(my_zero)
-    check_read(Backwards(**{"first_tag2": 4, "second_tag1": 2}))
-
-    # One case where the serialized form changes, but only superficially.
-    o = Backwards(**{"first_tag2": 4, "second_tag1": 2})
-    trans_fast = TTransport.TMemoryBuffer()
-    trans_slow = TTransport.TMemoryBuffer()
-    prot_fast = TBinaryProtocol.TBinaryProtocolAccelerated(trans_fast)
-    prot_slow = TBinaryProtocol.TBinaryProtocol(trans_slow)
-
-    o.write(prot_fast)
-    o.write(prot_slow)
-    ORIG = trans_slow.getvalue()
-    MINE = trans_fast.getvalue()
-    assert id(ORIG) != id(MINE)
-
-    prot = TBinaryProtocol.TBinaryProtocolAccelerated(TTransport.TMemoryBuffer())
-    o.write(prot)
-    prot = TBinaryProtocol.TBinaryProtocol(
-        TTransport.TMemoryBuffer(prot.trans.getvalue()))
-    c = o.__class__()
-    c.read(prot)
-    if c != o:
-        print("copy: ")
-        pprint(eval(repr(c)))
-        print("orig: ")
-        pprint(eval(repr(o)))
-
-
-def do_benchmark(iters=5000):
+def do_benchmark(protocol, iters=5000, skip_slow=False):
     setup = """
 from __main__ import hm, rs, TDevNullTransport
-from thrift.protocol import TBinaryProtocol
+from thrift.protocol.{0} import {0}{1}
 trans = TDevNullTransport()
-prot = TBinaryProtocol.TBinaryProtocol%s(trans)
+prot = {0}{1}(trans)
 """
 
-    setup_fast = setup % "Accelerated"
-    setup_slow = setup % ""
+    setup_fast = setup.format(protocol, 'Accelerated')
+    if not skip_slow:
+        setup_slow = setup.format(protocol, '')
 
     print("Starting Benchmarks")
 
-    print("HolyMoley Standard = %f" %
-          timeit.Timer('hm.write(prot)', setup_slow).timeit(number=iters))
+    if not skip_slow:
+        print("HolyMoley Standard = %f" %
+              timeit.Timer('hm.write(prot)', setup_slow).timeit(number=iters))
+
     print("HolyMoley Acceler. = %f" %
           timeit.Timer('hm.write(prot)', setup_fast).timeit(number=iters))
 
-    print("FastStruct Standard = %f" %
-          timeit.Timer('rs.write(prot)', setup_slow).timeit(number=iters))
+    if not skip_slow:
+        print("FastStruct Standard = %f" %
+              timeit.Timer('rs.write(prot)', setup_slow).timeit(number=iters))
+
     print("FastStruct Acceler. = %f" %
           timeit.Timer('rs.write(prot)', setup_fast).timeit(number=iters))
 
+
 if __name__ == '__main__':
-    do_test()
-    do_benchmark()
+    print('Testing TBinaryAccelerated')
+    do_test(TBinaryProtocolAccelerated, TBinaryProtocol)
+    do_benchmark('TBinaryProtocol')
+    print('Testing TCompactAccelerated')
+    do_test(TCompactProtocolAccelerated, TCompactProtocol)
+    do_benchmark('TCompactProtocol')
diff --git a/test/py/Makefile.am b/test/py/Makefile.am
old mode 100755
new mode 100644
diff --git a/test/py/RunClientServer.py b/test/py/RunClientServer.py
index 98ead43..150f2be 100755
--- a/test/py/RunClientServer.py
+++ b/test/py/RunClientServer.py
@@ -52,6 +52,7 @@
 
 PROTOS = [
     'accel',
+    'accelc',
     'binary',
     'compact',
     'json',
diff --git a/test/py/SerializationTest.py b/test/py/SerializationTest.py
index 65a1495..d6308f0 100755
--- a/test/py/SerializationTest.py
+++ b/test/py/SerializationTest.py
@@ -19,7 +19,22 @@
 # under the License.
 #
 
-from ThriftTest.ttypes import *
+from ThriftTest.ttypes import (
+    Bonk,
+    Bools,
+    LargeDeltas,
+    ListBonks,
+    NestedListsBonk,
+    NestedListsI32x2,
+    NestedListsI32x3,
+    NestedMixedx2,
+    Numberz,
+    VersioningTestV1,
+    VersioningTestV2,
+    Xtruct,
+    Xtruct2,
+)
+
 from DebugProtoTest.ttypes import CompactProtoTestStruct, Empty
 from thrift.transport import TTransport
 from thrift.protocol import TBinaryProtocol, TCompactProtocol, TJSONProtocol
@@ -284,6 +299,10 @@
     protocol_factory = TCompactProtocol.TCompactProtocolFactory()
 
 
+class AcceleratedCompactTest(AbstractTest):
+    protocol_factory = TCompactProtocol.TCompactProtocolAcceleratedFactory()
+
+
 class JSONProtocolTest(AbstractTest):
     protocol_factory = TJSONProtocol.TJSONProtocolFactory()
 
@@ -361,6 +380,7 @@
 
     suite.addTest(loader.loadTestsFromTestCase(NormalBinaryTest))
     suite.addTest(loader.loadTestsFromTestCase(AcceleratedBinaryTest))
+    suite.addTest(loader.loadTestsFromTestCase(AcceleratedCompactTest))
     suite.addTest(loader.loadTestsFromTestCase(CompactProtocolTest))
     suite.addTest(loader.loadTestsFromTestCase(JSONProtocolTest))
     suite.addTest(loader.loadTestsFromTestCase(AcceleratedFramedTest))
diff --git a/test/py/TestClient.py b/test/py/TestClient.py
index bc7650d..e83a880 100755
--- a/test/py/TestClient.py
+++ b/test/py/TestClient.py
@@ -271,6 +271,11 @@
         return TBinaryProtocol.TBinaryProtocolAcceleratedFactory().getProtocol(transport)
 
 
+class AcceleratedCompactTest(AbstractTest):
+    def get_protocol(self, transport):
+        return TCompactProtocol.TCompactProtocolAcceleratedFactory().getProtocol(transport)
+
+
 def suite():
     suite = unittest.TestSuite()
     loader = unittest.TestLoader()
@@ -280,6 +285,8 @@
         suite.addTest(loader.loadTestsFromTestCase(AcceleratedBinaryTest))
     elif options.proto == 'compact':
         suite.addTest(loader.loadTestsFromTestCase(CompactTest))
+    elif options.proto == 'accelc':
+        suite.addTest(loader.loadTestsFromTestCase(AcceleratedCompactTest))
     elif options.proto == 'json':
         suite.addTest(loader.loadTestsFromTestCase(JSONTest))
     else:
diff --git a/test/py/TestEof.py b/test/py/TestEof.py
index 0239fc6..8901613 100755
--- a/test/py/TestEof.py
+++ b/test/py/TestEof.py
@@ -84,7 +84,7 @@
         self.fail("Should have gotten EOFError")
 
     def eofTestHelperStress(self, pfactory):
-        """Teest the ability of TBinaryProtocol to deal with the removal of every byte in the file"""
+        """Test the ability of TBinaryProtocol to deal with the removal of every byte in the file"""
         # TODO: we should make sure this covers more of the code paths
 
         data = self.make_data(pfactory)
@@ -105,7 +105,7 @@
         self.eofTestHelper(TBinaryProtocol.TBinaryProtocolFactory())
         self.eofTestHelperStress(TBinaryProtocol.TBinaryProtocolFactory())
 
-    def testBinaryProtocolAcceleratedEof(self):
+    def testBinaryProtocolAcceleratedBinaryEof(self):
         """Test that TBinaryProtocolAccelerated throws an EOFError when it reaches the end of the stream"""
         self.eofTestHelper(TBinaryProtocol.TBinaryProtocolAcceleratedFactory())
         self.eofTestHelperStress(TBinaryProtocol.TBinaryProtocolAcceleratedFactory())
@@ -115,6 +115,11 @@
         self.eofTestHelper(TCompactProtocol.TCompactProtocolFactory())
         self.eofTestHelperStress(TCompactProtocol.TCompactProtocolFactory())
 
+    def testCompactProtocolAcceleratedCompactEof(self):
+        """Test that TCompactProtocolAccelerated throws an EOFError when it reaches the end of the stream"""
+        self.eofTestHelper(TCompactProtocol.TCompactProtocolAcceleratedFactory())
+        self.eofTestHelperStress(TCompactProtocol.TCompactProtocolAcceleratedFactory())
+
 
 def suite():
     suite = unittest.TestSuite()
diff --git a/test/py/TestFrozen.py b/test/py/TestFrozen.py
index 30a6a55..f7c629b 100755
--- a/test/py/TestFrozen.py
+++ b/test/py/TestFrozen.py
@@ -22,7 +22,7 @@
 from DebugProtoTest.ttypes import CompactProtoTestStruct, Empty, Wrapper
 from thrift.Thrift import TFrozenDict
 from thrift.transport import TTransport
-from thrift.protocol import TBinaryProtocol
+from thrift.protocol import TBinaryProtocol, TCompactProtocol
 import collections
 import unittest
 
@@ -100,16 +100,22 @@
         return TBinaryProtocol.TBinaryProtocolFactory().getProtocol(trans)
 
 
-class TestFrozenAccelerated(TestFrozenBase):
+class TestFrozenAcceleratedBinary(TestFrozenBase):
     def protocol(self, trans):
         return TBinaryProtocol.TBinaryProtocolAcceleratedFactory().getProtocol(trans)
 
 
+class TestFrozenAcceleratedCompact(TestFrozenBase):
+    def protocol(self, trans):
+        return TCompactProtocol.TCompactProtocolAcceleratedFactory().getProtocol(trans)
+
+
 def suite():
     suite = unittest.TestSuite()
     loader = unittest.TestLoader()
     suite.addTest(loader.loadTestsFromTestCase(TestFrozen))
-    suite.addTest(loader.loadTestsFromTestCase(TestFrozenAccelerated))
+    suite.addTest(loader.loadTestsFromTestCase(TestFrozenAcceleratedBinary))
+    suite.addTest(loader.loadTestsFromTestCase(TestFrozenAcceleratedCompact))
     return suite
 
 if __name__ == "__main__":
diff --git a/test/py/TestServer.py b/test/py/TestServer.py
index ef93509..8819821 100755
--- a/test/py/TestServer.py
+++ b/test/py/TestServer.py
@@ -184,6 +184,7 @@
         'binary': TBinaryProtocol.TBinaryProtocolFactory,
         'accel': TBinaryProtocol.TBinaryProtocolAcceleratedFactory,
         'compact': TCompactProtocol.TCompactProtocolFactory,
+        'accelc': TCompactProtocol.TCompactProtocolAcceleratedFactory,
         'json': TJSONProtocol.TJSONProtocolFactory,
     }
     pfactory_cls = prot_factories.get(options.proto, None)
diff --git a/test/tests.json b/test/tests.json
index be5992c..3104790 100644
--- a/test/tests.json
+++ b/test/tests.json
@@ -200,7 +200,8 @@
       "compact",
       "binary",
       "json",
-      "binary:accel"
+      "binary:accel",
+      "compact:accelc"
     ],
     "workdir": "py"
   },