THRIFT-3612 Add Python C extension for compact protocol
Client: Python
Patch: Nobuaki Sukegawa
This closes #844
diff --git a/lib/py/src/ext/binary.cpp b/lib/py/src/ext/binary.cpp
new file mode 100644
index 0000000..a83899f
--- /dev/null
+++ b/lib/py/src/ext/binary.cpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "ext/binary.h"
+namespace apache {
+namespace thrift {
+namespace py {
+
+bool BinaryProtocol::readFieldBegin(TType& type, int16_t& tag) {
+ uint8_t b = 0;
+ if (!readByte(b)) {
+ return false;
+ }
+ if (b == -1) {
+ return false;
+ }
+ type = static_cast<TType>(b);
+ if (type == T_STOP) {
+ return true;
+ }
+ return readI16(tag);
+}
+}
+}
+}
diff --git a/lib/py/src/ext/binary.h b/lib/py/src/ext/binary.h
new file mode 100644
index 0000000..dedeec3
--- /dev/null
+++ b/lib/py/src/ext/binary.h
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THRIFT_PY_BINARY_H
+#define THRIFT_PY_BINARY_H
+
+#include <Python.h>
+#include "ext/protocol.h"
+#include "ext/endian.h"
+#include <stdint.h>
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+class BinaryProtocol : public ProtocolBase<BinaryProtocol> {
+public:
+ virtual ~BinaryProtocol() {}
+
+ void writeI8(int8_t val) { writeBuffer(reinterpret_cast<char*>(&val), sizeof(int8_t)); }
+
+ void writeI16(int16_t val) {
+ int16_t net = static_cast<int16_t>(htons(val));
+ writeBuffer(reinterpret_cast<char*>(&net), sizeof(int16_t));
+ }
+
+ void writeI32(int32_t val) {
+ int32_t net = static_cast<int32_t>(htonl(val));
+ writeBuffer(reinterpret_cast<char*>(&net), sizeof(int32_t));
+ }
+
+ void writeI64(int64_t val) {
+ int64_t net = static_cast<int64_t>(htonll(val));
+ writeBuffer(reinterpret_cast<char*>(&net), sizeof(int64_t));
+ }
+
+ void writeDouble(double dub) {
+ // Unfortunately, bitwise_cast doesn't work in C. Bad C!
+ union {
+ double f;
+ int64_t t;
+ } transfer;
+ transfer.f = dub;
+ writeI64(transfer.t);
+ }
+
+ void writeBool(int v) { writeByte(static_cast<uint8_t>(v)); }
+
+ void writeString(PyObject* value, int32_t len) {
+ writeI32(len);
+ writeBuffer(PyBytes_AS_STRING(value), len);
+ }
+
+ bool writeListBegin(PyObject* value, const SetListTypeArgs& parsedargs, int32_t len) {
+ writeByte(parsedargs.element_type);
+ writeI32(len);
+ return true;
+ }
+
+ bool writeMapBegin(PyObject* value, const MapTypeArgs& parsedargs, int32_t len) {
+ writeByte(parsedargs.ktag);
+ writeByte(parsedargs.vtag);
+ writeI32(len);
+ return true;
+ }
+
+ bool writeStructBegin() { return true; }
+ bool writeStructEnd() { return true; }
+ bool writeField(PyObject* value, const StructItemSpec& parsedspec) {
+ writeByte(static_cast<uint8_t>(parsedspec.type));
+ writeI16(parsedspec.tag);
+ return encodeValue(value, parsedspec.type, parsedspec.typeargs);
+ }
+
+ void writeFieldStop() { writeByte(static_cast<uint8_t>(T_STOP)); }
+
+ bool readBool(bool& val) {
+ char* buf;
+ if (!readBytes(&buf, 1)) {
+ return false;
+ }
+ val = buf[0] == 1;
+ return true;
+ }
+
+ bool readI8(int8_t& val) {
+ char* buf;
+ if (!readBytes(&buf, 1)) {
+ return false;
+ }
+ val = buf[0];
+ return true;
+ }
+
+ bool readI16(int16_t& val) {
+ char* buf;
+ if (!readBytes(&buf, sizeof(int16_t))) {
+ return false;
+ }
+ val = static_cast<int16_t>(ntohs(*reinterpret_cast<int16_t*>(buf)));
+ return true;
+ }
+
+ bool readI32(int32_t& val) {
+ char* buf;
+ if (!readBytes(&buf, sizeof(int32_t))) {
+ return false;
+ }
+ val = static_cast<int32_t>(ntohl(*reinterpret_cast<int32_t*>(buf)));
+ return true;
+ }
+
+ bool readI64(int64_t& val) {
+ char* buf;
+ if (!readBytes(&buf, sizeof(int64_t))) {
+ return false;
+ }
+ val = static_cast<int64_t>(ntohll(*reinterpret_cast<int64_t*>(buf)));
+ return true;
+ }
+
+ bool readDouble(double& val) {
+ union {
+ int64_t f;
+ double t;
+ } transfer;
+
+ if (!readI64(transfer.f)) {
+ return false;
+ }
+ val = transfer.t;
+ return true;
+ }
+
+ int32_t readString(char** buf) {
+ int32_t len = 0;
+ if (!readI32(len) || !checkLengthLimit(len, stringLimit()) || !readBytes(buf, len)) {
+ return -1;
+ }
+ return len;
+ }
+
+ int32_t readListBegin(TType& etype) {
+ int32_t len;
+ uint8_t b = 0;
+ if (!readByte(b) || !readI32(len) || !checkLengthLimit(len, containerLimit())) {
+ return -1;
+ }
+ etype = static_cast<TType>(b);
+ return len;
+ }
+
+ int32_t readMapBegin(TType& ktype, TType& vtype) {
+ int32_t len;
+ uint8_t k, v;
+ if (!readByte(k) || !readByte(v) || !readI32(len) || !checkLengthLimit(len, containerLimit())) {
+ return -1;
+ }
+ ktype = static_cast<TType>(k);
+ vtype = static_cast<TType>(v);
+ return len;
+ }
+
+ bool readStructBegin() { return true; }
+ bool readStructEnd() { return true; }
+
+ bool readFieldBegin(TType& type, int16_t& tag);
+
+#define SKIPBYTES(n) \
+ do { \
+ if (!readBytes(&dummy_buf_, (n))) { \
+ return false; \
+ } \
+ return true; \
+ } while (0)
+
+ bool skipBool() { SKIPBYTES(1); }
+ bool skipByte() { SKIPBYTES(1); }
+ bool skipI16() { SKIPBYTES(2); }
+ bool skipI32() { SKIPBYTES(4); }
+ bool skipI64() { SKIPBYTES(8); }
+ bool skipDouble() { SKIPBYTES(8); }
+ bool skipString() {
+ int32_t len;
+ if (!readI32(len)) {
+ return false;
+ }
+ SKIPBYTES(len);
+ }
+#undef SKIPBYTES
+
+private:
+ char* dummy_buf_;
+};
+}
+}
+}
+#endif // THRIFT_PY_BINARY_H
diff --git a/lib/py/src/ext/compact.cpp b/lib/py/src/ext/compact.cpp
new file mode 100644
index 0000000..15a99a0
--- /dev/null
+++ b/lib/py/src/ext/compact.cpp
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "ext/compact.h"
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+const uint8_t CompactProtocol::TTypeToCType[] = {
+ CT_STOP, // T_STOP
+ 0, // unused
+ CT_BOOLEAN_TRUE, // T_BOOL
+ CT_BYTE, // T_BYTE
+ CT_DOUBLE, // T_DOUBLE
+ 0, // unused
+ CT_I16, // T_I16
+ 0, // unused
+ CT_I32, // T_I32
+ 0, // unused
+ CT_I64, // T_I64
+ CT_BINARY, // T_STRING
+ CT_STRUCT, // T_STRUCT
+ CT_MAP, // T_MAP
+ CT_SET, // T_SET
+ CT_LIST, // T_LIST
+};
+
+bool CompactProtocol::readFieldBegin(TType& type, int16_t& tag) {
+ uint8_t b;
+ if (!readByte(b)) {
+ return false;
+ }
+ uint8_t ctype = b & 0xf;
+ type = getTType(ctype);
+ if (type == -1) {
+ return false;
+ } else if (type == T_STOP) {
+ tag = 0;
+ return true;
+ }
+ uint8_t diff = (b & 0xf0) >> 4;
+ if (diff) {
+ tag = readTags_.top() + diff;
+ } else if (!readI16(tag)) {
+ readTags_.top() = -1;
+ return false;
+ }
+ if (ctype == CT_BOOLEAN_FALSE || ctype == CT_BOOLEAN_TRUE) {
+ readBool_.exists = true;
+ readBool_.value = ctype == CT_BOOLEAN_TRUE;
+ }
+ readTags_.top() = tag;
+ return true;
+}
+
+TType CompactProtocol::getTType(uint8_t type) {
+ switch (type) {
+ case T_STOP:
+ return T_STOP;
+ case CT_BOOLEAN_FALSE:
+ case CT_BOOLEAN_TRUE:
+ return T_BOOL;
+ case CT_BYTE:
+ return T_BYTE;
+ case CT_I16:
+ return T_I16;
+ case CT_I32:
+ return T_I32;
+ case CT_I64:
+ return T_I64;
+ case CT_DOUBLE:
+ return T_DOUBLE;
+ case CT_BINARY:
+ return T_STRING;
+ case CT_LIST:
+ return T_LIST;
+ case CT_SET:
+ return T_SET;
+ case CT_MAP:
+ return T_MAP;
+ case CT_STRUCT:
+ return T_STRUCT;
+ default:
+ PyErr_Format(PyExc_TypeError, "don't know what type: %d", type);
+ return static_cast<TType>(-1);
+ }
+}
+}
+}
+}
diff --git a/lib/py/src/ext/compact.h b/lib/py/src/ext/compact.h
new file mode 100644
index 0000000..5bba237
--- /dev/null
+++ b/lib/py/src/ext/compact.h
@@ -0,0 +1,367 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THRIFT_PY_COMPACT_H
+#define THRIFT_PY_COMPACT_H
+
+#include <Python.h>
+#include "ext/protocol.h"
+#include "ext/endian.h"
+#include <stdint.h>
+#include <stack>
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+class CompactProtocol : public ProtocolBase<CompactProtocol> {
+public:
+ CompactProtocol() { readBool_.exists = false; }
+
+ virtual ~CompactProtocol() {}
+
+ void writeI8(int8_t val) { writeBuffer(reinterpret_cast<char*>(&val), 1); }
+
+ void writeI16(int16_t val) { writeVarint(toZigZag(val)); }
+
+ int writeI32(int32_t val) { return writeVarint(toZigZag(val)); }
+
+ void writeI64(int64_t val) { writeVarint64(toZigZag64(val)); }
+
+ void writeDouble(double dub) {
+ union {
+ double f;
+ int64_t t;
+ } transfer;
+ transfer.f = htolell(dub);
+ writeBuffer(reinterpret_cast<char*>(&transfer.t), sizeof(int64_t));
+ }
+
+ void writeBool(int v) { writeByte(static_cast<uint8_t>(v ? CT_BOOLEAN_TRUE : CT_BOOLEAN_FALSE)); }
+
+ void writeString(PyObject* value, int32_t len) {
+ writeVarint(len);
+ writeBuffer(PyBytes_AS_STRING(value), len);
+ }
+
+ bool writeListBegin(PyObject* value, const SetListTypeArgs& args, int32_t len) {
+ int ctype = toCompactType(args.element_type);
+ if (len <= 14) {
+ writeByte(static_cast<uint8_t>(len << 4 | ctype));
+ } else {
+ writeByte(0xf0 | ctype);
+ writeVarint(len);
+ }
+ return true;
+ }
+
+ bool writeMapBegin(PyObject* value, const MapTypeArgs& args, int32_t len) {
+ if (len == 0) {
+ writeByte(0);
+ return true;
+ }
+ int ctype = toCompactType(args.ktag) << 4 | toCompactType(args.vtag);
+ writeVarint(len);
+ writeByte(ctype);
+ return true;
+ }
+
+ bool writeStructBegin() {
+ writeTags_.push(0);
+ return true;
+ }
+ bool writeStructEnd() {
+ writeTags_.pop();
+ return true;
+ }
+
+ bool writeField(PyObject* value, const StructItemSpec& spec) {
+ if (spec.type == T_BOOL) {
+ doWriteFieldBegin(spec, PyObject_IsTrue(value) ? CT_BOOLEAN_TRUE : CT_BOOLEAN_FALSE);
+ return true;
+ } else {
+ doWriteFieldBegin(spec, toCompactType(spec.type));
+ return encodeValue(value, spec.type, spec.typeargs);
+ }
+ }
+
+ void writeFieldStop() { writeByte(0); }
+
+ bool readBool(bool& val) {
+ if (readBool_.exists) {
+ readBool_.exists = false;
+ val = readBool_.value;
+ return true;
+ }
+ char* buf;
+ if (!readBytes(&buf, 1)) {
+ return false;
+ }
+ val = buf[0] == CT_BOOLEAN_TRUE;
+ return true;
+ }
+ bool readI8(int8_t& val) {
+ char* buf;
+ if (!readBytes(&buf, 1)) {
+ return false;
+ }
+ val = buf[0];
+ return true;
+ }
+
+ bool readI16(int16_t& val) {
+ uint16_t uval;
+ if (readVarint<uint16_t, 3>(uval)) {
+ val = fromZigZag<int16_t, uint16_t>(uval);
+ return true;
+ }
+ return false;
+ }
+
+ bool readI32(int32_t& val) {
+ uint32_t uval;
+ if (readVarint<uint32_t, 5>(uval)) {
+ val = fromZigZag<int32_t, uint32_t>(uval);
+ return true;
+ }
+ return false;
+ }
+
+ bool readI64(int64_t& val) {
+ uint64_t uval;
+ if (readVarint<uint64_t, 10>(uval)) {
+ val = fromZigZag<int64_t, uint64_t>(uval);
+ return true;
+ }
+ return false;
+ }
+
+ bool readDouble(double& val) {
+ union {
+ int64_t f;
+ double t;
+ } transfer;
+
+ char* buf;
+ if (!readBytes(&buf, 8)) {
+ return false;
+ }
+ transfer.f = letohll(*reinterpret_cast<int64_t*>(buf));
+ val = transfer.t;
+ return true;
+ }
+
+ int32_t readString(char** buf) {
+ uint32_t len;
+ if (!readVarint<uint32_t, 5>(len) || !checkLengthLimit(len, stringLimit())) {
+ return -1;
+ }
+ if (len == 0) {
+ return 0;
+ }
+ if (!readBytes(buf, len)) {
+ return -1;
+ }
+ return len;
+ }
+
+ int32_t readListBegin(TType& etype) {
+ uint8_t b;
+ if (!readByte(b)) {
+ return -1;
+ }
+ etype = getTType(b & 0xf);
+ if (etype == -1) {
+ return -1;
+ }
+ uint32_t len = (b >> 4) & 0xf;
+ if (len == 15 && !readVarint<uint32_t, 5>(len)) {
+ return -1;
+ }
+ if (!checkLengthLimit(len, containerLimit())) {
+ return -1;
+ }
+ return len;
+ }
+
+ int32_t readMapBegin(TType& ktype, TType& vtype) {
+ uint32_t len;
+ if (!readVarint<uint32_t, 5>(len) || !checkLengthLimit(len, containerLimit())) {
+ return -1;
+ }
+ if (len != 0) {
+ uint8_t kvType;
+ if (!readByte(kvType)) {
+ return -1;
+ }
+ ktype = getTType(kvType >> 4);
+ vtype = getTType(kvType & 0xf);
+ if (ktype == -1 || vtype == -1) {
+ return -1;
+ }
+ }
+ return len;
+ }
+
+ bool readStructBegin() {
+ readTags_.push(0);
+ return true;
+ }
+ bool readStructEnd() {
+ readTags_.pop();
+ return true;
+ }
+ bool readFieldBegin(TType& type, int16_t& tag);
+
+ bool skipBool() {
+ bool val;
+ return readBool(val);
+ }
+#define SKIPBYTES(n) \
+ do { \
+ if (!readBytes(&dummy_buf_, (n))) { \
+ return false; \
+ } \
+ return true; \
+ } while (0)
+ bool skipByte() { SKIPBYTES(1); }
+ bool skipDouble() { SKIPBYTES(8); }
+ bool skipI16() {
+ int16_t val;
+ return readI16(val);
+ }
+ bool skipI32() {
+ int32_t val;
+ return readI32(val);
+ }
+ bool skipI64() {
+ int64_t val;
+ return readI64(val);
+ }
+ bool skipString() {
+ uint32_t len;
+ if (!readVarint<uint32_t, 5>(len)) {
+ return false;
+ }
+ SKIPBYTES(len);
+ }
+#undef SKIPBYTES
+
+private:
+ enum Types {
+ CT_STOP = 0x00,
+ CT_BOOLEAN_TRUE = 0x01,
+ CT_BOOLEAN_FALSE = 0x02,
+ CT_BYTE = 0x03,
+ CT_I16 = 0x04,
+ CT_I32 = 0x05,
+ CT_I64 = 0x06,
+ CT_DOUBLE = 0x07,
+ CT_BINARY = 0x08,
+ CT_LIST = 0x09,
+ CT_SET = 0x0A,
+ CT_MAP = 0x0B,
+ CT_STRUCT = 0x0C
+ };
+
+ static const uint8_t TTypeToCType[];
+
+ TType getTType(uint8_t type);
+
+ int toCompactType(TType type) {
+ int i = static_cast<int>(type);
+ return i < 16 ? TTypeToCType[i] : -1;
+ }
+
+ uint32_t toZigZag(int32_t val) { return (val >> 31) ^ (val << 1); }
+
+ uint64_t toZigZag64(int64_t val) { return (val >> 63) ^ (val << 1); }
+
+ int writeVarint(uint32_t val) {
+ int cnt = 1;
+ while (val & ~0x7fU) {
+ writeByte(static_cast<char>((val & 0x7fU) | 0x80U));
+ val >>= 7;
+ ++cnt;
+ }
+ writeByte(static_cast<char>(val));
+ return cnt;
+ }
+
+ int writeVarint64(uint64_t val) {
+ int cnt = 1;
+ while (val & ~0x7fULL) {
+ writeByte(static_cast<char>((val & 0x7fULL) | 0x80ULL));
+ val >>= 7;
+ ++cnt;
+ }
+ writeByte(static_cast<char>(val));
+ return cnt;
+ }
+
+ template <typename T, int Max>
+ bool readVarint(T& result) {
+ uint8_t b;
+ T val = 0;
+ int shift = 0;
+ for (int i = 0; i < Max; ++i) {
+ if (!readByte(b)) {
+ return false;
+ }
+ if (b & 0x80) {
+ val |= static_cast<T>(b & 0x7f) << shift;
+ } else {
+ val |= static_cast<T>(b) << shift;
+ result = val;
+ return true;
+ }
+ shift += 7;
+ }
+ PyErr_Format(PyExc_OverflowError, "varint exceeded %d bytes", Max);
+ return false;
+ }
+
+ template <typename S, typename U>
+ S fromZigZag(U val) {
+ return (val >> 1) ^ static_cast<U>(-static_cast<S>(val & 1));
+ }
+
+ void doWriteFieldBegin(const StructItemSpec& spec, int ctype) {
+ int diff = spec.tag - writeTags_.top();
+ if (diff > 0 && diff <= 15) {
+ writeByte(static_cast<uint8_t>(diff << 4 | ctype));
+ } else {
+ writeByte(static_cast<uint8_t>(ctype));
+ writeI16(spec.tag);
+ }
+ writeTags_.top() = spec.tag;
+ }
+
+ std::stack<int> writeTags_;
+ std::stack<int> readTags_;
+ struct {
+ bool exists;
+ bool value;
+ } readBool_;
+ char* dummy_buf_;
+};
+}
+}
+}
+#endif // THRIFT_PY_COMPACT_H
diff --git a/lib/py/src/ext/endian.h b/lib/py/src/ext/endian.h
new file mode 100644
index 0000000..91372a7
--- /dev/null
+++ b/lib/py/src/ext/endian.h
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THRIFT_PY_ENDIAN_H
+#define THRIFT_PY_ENDIAN_H
+
+#include <Python.h>
+
+#ifndef _WIN32
+#include <netinet/in.h>
+#else
+#include <WinSock2.h>
+#pragma comment(lib, "ws2_32.lib")
+#define BIG_ENDIAN (4321)
+#define LITTLE_ENDIAN (1234)
+#define BYTE_ORDER LITTLE_ENDIAN
+#define inline __inline
+#endif
+
+/* Fix endianness issues on Solaris */
+#if defined(__SVR4) && defined(__sun)
+#if defined(__i386) && !defined(__i386__)
+#define __i386__
+#endif
+
+#ifndef BIG_ENDIAN
+#define BIG_ENDIAN (4321)
+#endif
+#ifndef LITTLE_ENDIAN
+#define LITTLE_ENDIAN (1234)
+#endif
+
+/* I386 is LE, even on Solaris */
+#if !defined(BYTE_ORDER) && defined(__i386__)
+#define BYTE_ORDER LITTLE_ENDIAN
+#endif
+#endif
+
+#ifndef __BYTE_ORDER
+#if defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN)
+#define __BYTE_ORDER BYTE_ORDER
+#define __LITTLE_ENDIAN LITTLE_ENDIAN
+#define __BIG_ENDIAN BIG_ENDIAN
+#else
+#error "Cannot determine endianness"
+#endif
+#endif
+
+// Same comment as the enum. Sorry.
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define ntohll(n) (n)
+#define htonll(n) (n)
+#if defined(__GNUC__) && defined(__GLIBC__)
+#include <byteswap.h>
+#define letohll(n) bswap_64(n)
+#define htolell(n) bswap_64(n)
+#else /* GNUC & GLIBC */
+#define letohll(n) ((((unsigned long long)ntohl(n)) << 32) + ntohl(n >> 32))
+#define htolell(n) ((((unsigned long long)htonl(n)) << 32) + htonl(n >> 32))
+#endif
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
+#if defined(__GNUC__) && defined(__GLIBC__)
+#include <byteswap.h>
+#define ntohll(n) bswap_64(n)
+#define htonll(n) bswap_64(n)
+#else /* GNUC & GLIBC */
+#define ntohll(n) ((((unsigned long long)ntohl(n)) << 32) + ntohl(n >> 32))
+#define htonll(n) ((((unsigned long long)htonl(n)) << 32) + htonl(n >> 32))
+#endif /* GNUC & GLIBC */
+#define letohll(n) (n)
+#define htolell(n) (n)
+#else /* __BYTE_ORDER */
+#error "Can't define htonll or ntohll!"
+#endif
+
+#endif // THRIFT_PY_ENDIAN_H
diff --git a/lib/py/src/ext/module.cpp b/lib/py/src/ext/module.cpp
new file mode 100644
index 0000000..82e3fe7
--- /dev/null
+++ b/lib/py/src/ext/module.cpp
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <Python.h>
+#include "types.h"
+#include "binary.h"
+#include "compact.h"
+#include <stdint.h>
+#include <sys/resource.h>
+
+// TODO(dreiss): defval appears to be unused. Look into removing it.
+// TODO(dreiss): Make parse_spec_args recursive, and cache the output
+// permanently in the object. (Malloc and orphan.)
+// TODO(dreiss): Why do we need cStringIO for reading, why not just char*?
+// Can cStringIO let us work with a BufferedTransport?
+// TODO(dreiss): Don't ignore the rv from cwrite (maybe).
+
+// Doing a benchmark shows that interning actually makes a difference, amazingly.
+
+/** Pointer to interned string to speed up attribute lookup. */
+PyObject* INTERN_STRING(TFrozenDict);
+PyObject* INTERN_STRING(cstringio_buf);
+PyObject* INTERN_STRING(cstringio_refill);
+static PyObject* INTERN_STRING(string_length_limit);
+static PyObject* INTERN_STRING(container_length_limit);
+static PyObject* INTERN_STRING(trans);
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+template <typename T>
+static PyObject* encode_impl(PyObject* args) {
+ if (!args)
+ return NULL;
+
+ PyObject* enc_obj = NULL;
+ PyObject* type_args = NULL;
+ if (!PyArg_ParseTuple(args, "OO", &enc_obj, &type_args)) {
+ return NULL;
+ }
+ if (!enc_obj || !type_args) {
+ return NULL;
+ }
+
+ T protocol;
+ if (!protocol.prepareEncodeBuffer() || !protocol.encodeValue(enc_obj, T_STRUCT, type_args)) {
+ return NULL;
+ }
+
+ return protocol.getEncodedValue();
+}
+
+static inline long as_long_then_delete(PyObject* value, long default_value) {
+ ScopedPyObject scope(value);
+ long v = PyInt_AsLong(value);
+ if (INT_CONV_ERROR_OCCURRED(v)) {
+ PyErr_Clear();
+ return default_value;
+ }
+ return v;
+}
+
+template <typename T>
+static PyObject* decode_impl(PyObject* args) {
+ PyObject* output_obj = NULL;
+ PyObject* oprot = NULL;
+ PyObject* typeargs = NULL;
+ if (!PyArg_ParseTuple(args, "OOO", &output_obj, &oprot, &typeargs)) {
+ return NULL;
+ }
+
+ T protocol;
+ protocol.setStringLengthLimit(
+ as_long_then_delete(PyObject_GetAttr(oprot, INTERN_STRING(string_length_limit)), INT32_MAX));
+ protocol.setContainerLengthLimit(
+ as_long_then_delete(PyObject_GetAttr(oprot, INTERN_STRING(container_length_limit)),
+ INT32_MAX));
+ ScopedPyObject transport(PyObject_GetAttr(oprot, INTERN_STRING(trans)));
+ if (!transport) {
+ return NULL;
+ }
+
+ StructTypeArgs parsedargs;
+ if (!parse_struct_args(&parsedargs, typeargs)) {
+ return NULL;
+ }
+
+ if (!protocol.prepareDecodeBufferFromTransport(transport.get())) {
+ return NULL;
+ }
+
+ return protocol.readStruct(output_obj, parsedargs.klass, parsedargs.spec);
+}
+}
+}
+}
+
+using namespace apache::thrift::py;
+
+/* -- PYTHON MODULE SETUP STUFF --- */
+
+extern "C" {
+
+static PyObject* encode_binary(PyObject*, PyObject* args) {
+ return encode_impl<BinaryProtocol>(args);
+}
+
+static PyObject* decode_binary(PyObject*, PyObject* args) {
+ return decode_impl<BinaryProtocol>(args);
+}
+
+static PyObject* encode_compact(PyObject*, PyObject* args) {
+ return encode_impl<CompactProtocol>(args);
+}
+
+static PyObject* decode_compact(PyObject*, PyObject* args) {
+ return decode_impl<CompactProtocol>(args);
+}
+
+static PyMethodDef ThriftFastBinaryMethods[] = {
+ {"encode_binary", encode_binary, METH_VARARGS, ""},
+ {"decode_binary", decode_binary, METH_VARARGS, ""},
+ {"encode_compact", encode_compact, METH_VARARGS, ""},
+ {"decode_compact", decode_compact, METH_VARARGS, ""},
+ {NULL, NULL, 0, NULL} /* Sentinel */
+};
+
+#define INITERROR return;
+
+void initfastbinary() {
+
+ PycString_IMPORT;
+ if (PycStringIO == NULL)
+ INITERROR
+
+ const rlim_t kStackSize = 16 * 1024 * 1024; // min stack size = 16 MB
+ struct rlimit rl;
+ int result;
+
+ result = getrlimit(RLIMIT_STACK, &rl);
+ if (result == 0) {
+ if (rl.rlim_cur < kStackSize) {
+ rl.rlim_cur = kStackSize;
+ result = setrlimit(RLIMIT_STACK, &rl);
+ if (result != 0) {
+ fprintf(stderr, "setrlimit returned result = %d\n", result);
+ }
+ }
+ }
+
+#define INIT_INTERN_STRING(value) \
+ do { \
+ INTERN_STRING(value) = PyString_InternFromString(#value); \
+ if (!INTERN_STRING(value)) \
+ INITERROR \
+ } while (0)
+
+ INIT_INTERN_STRING(TFrozenDict);
+ INIT_INTERN_STRING(cstringio_buf);
+ INIT_INTERN_STRING(cstringio_refill);
+ INIT_INTERN_STRING(string_length_limit);
+ INIT_INTERN_STRING(container_length_limit);
+ INIT_INTERN_STRING(trans);
+#undef INIT_INTERN_STRING
+
+ PyObject* module =
+ Py_InitModule("thrift.protocol.fastbinary", ThriftFastBinaryMethods);
+ if (module == NULL)
+ INITERROR;
+
+}
+}
diff --git a/lib/py/src/ext/protocol.h b/lib/py/src/ext/protocol.h
new file mode 100644
index 0000000..bf3a6b8
--- /dev/null
+++ b/lib/py/src/ext/protocol.h
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THRIFT_PY_PROTOCOL_H
+#define THRIFT_PY_PROTOCOL_H
+
+#include "ext/types.h"
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+template <typename Impl>
+class ProtocolBase {
+
+public:
+ ProtocolBase() : stringLimit_(INT32_MAX), containerLimit_(INT32_MAX), output_(NULL) {}
+ inline virtual ~ProtocolBase();
+
+ bool prepareDecodeBufferFromTransport(PyObject* trans);
+
+ PyObject* readStruct(PyObject* output, PyObject* klass, PyObject* spec_seq);
+
+
+ bool prepareEncodeBuffer();
+
+ bool encodeValue(PyObject* value, TType type, PyObject* typeargs);
+
+ PyObject* getEncodedValue();
+
+ long stringLimit() const { return stringLimit_; }
+ void setStringLengthLimit(long limit) { stringLimit_ = limit; }
+
+ long containerLimit() const { return containerLimit_; }
+ void setContainerLengthLimit(long limit) { containerLimit_ = limit; }
+
+protected:
+ bool readBytes(char** output, int len);
+
+ bool readByte(uint8_t& val) {
+ char* buf;
+ if (!readBytes(&buf, 1)) {
+ return false;
+ }
+ val = static_cast<uint8_t>(buf[0]);
+ return true;
+ }
+
+ bool writeBuffer(char* data, size_t len);
+
+ void writeByte(uint8_t val) { writeBuffer(reinterpret_cast<char*>(&val), 1); }
+
+ PyObject* decodeValue(TType type, PyObject* typeargs);
+
+ bool skip(TType type);
+
+ inline bool checkType(TType got, TType expected);
+ inline bool checkLengthLimit(int32_t len, long limit);
+
+ inline bool isUtf8(PyObject* typeargs);
+
+private:
+ Impl* impl() { return static_cast<Impl*>(this); }
+
+ long stringLimit_;
+ long containerLimit_;
+ EncodeBuffer* output_;
+ DecodeBuffer input_;
+};
+}
+}
+}
+
+#include "ext/protocol.tcc"
+
+#endif // THRIFT_PY_PROTOCOL_H
diff --git a/lib/py/src/ext/protocol.tcc b/lib/py/src/ext/protocol.tcc
new file mode 100644
index 0000000..3df83a1
--- /dev/null
+++ b/lib/py/src/ext/protocol.tcc
@@ -0,0 +1,820 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THRIFT_PY_PROTOCOL_TCC
+#define THRIFT_PY_PROTOCOL_TCC
+
+#define CHECK_RANGE(v, min, max) (((v) <= (max)) && ((v) >= (min)))
+#define INIT_OUTBUF_SIZE 128
+
+#include <cStringIO.h>
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+namespace detail {
+
+inline bool input_check(PyObject* input) {
+ return PycStringIO_InputCheck(input);
+}
+
+inline EncodeBuffer* new_encode_buffer(size_t size) {
+ if (!PycStringIO) {
+ PycString_IMPORT;
+ }
+ if (!PycStringIO) {
+ return NULL;
+ }
+ return PycStringIO->NewOutput(size);
+}
+
+inline int read_buffer(PyObject* buf, char** output, int len) {
+ if (!PycStringIO) {
+ PycString_IMPORT;
+ }
+ if (!PycStringIO) {
+ PyErr_SetString(PyExc_ImportError, "failed to import native cStringIO");
+ return -1;
+ }
+ return PycStringIO->cread(buf, output, len);
+}
+}
+
+template <typename Impl>
+inline ProtocolBase<Impl>::~ProtocolBase() {
+ if (output_) {
+ Py_CLEAR(output_);
+ }
+}
+
+template <typename Impl>
+inline bool ProtocolBase<Impl>::isUtf8(PyObject* typeargs) {
+ return PyString_Check(typeargs) && !strncmp(PyString_AS_STRING(typeargs), "UTF8", 4);
+}
+
+template <typename Impl>
+PyObject* ProtocolBase<Impl>::getEncodedValue() {
+ if (!PycStringIO) {
+ PycString_IMPORT;
+ }
+ if (!PycStringIO) {
+ return NULL;
+ }
+ return PycStringIO->cgetvalue(output_);
+}
+
+template <typename Impl>
+inline bool ProtocolBase<Impl>::writeBuffer(char* data, size_t size) {
+ if (!PycStringIO) {
+ PycString_IMPORT;
+ }
+ if (!PycStringIO) {
+ PyErr_SetString(PyExc_ImportError, "failed to import native cStringIO");
+ return false;
+ }
+ int len = PycStringIO->cwrite(output_, data, size);
+ if (len < 0) {
+ PyErr_SetString(PyExc_IOError, "failed to write to cStringIO object");
+ return false;
+ }
+ if (len != size) {
+ PyErr_Format(PyExc_EOFError, "write length mismatch: expected %d got %d", size, len);
+ return false;
+ }
+ return true;
+}
+
+namespace detail {
+
+#define DECLARE_OP_SCOPE(name, op) \
+ template <typename Impl> \
+ struct name##Scope { \
+ Impl* impl; \
+ bool valid; \
+ name##Scope(Impl* thiz) : impl(thiz), valid(impl->op##Begin()) {} \
+ ~name##Scope() { \
+ if (valid) \
+ impl->op##End(); \
+ } \
+ operator bool() { return valid; } \
+ }; \
+ template <typename Impl, template <typename> class T> \
+ name##Scope<Impl> op##Scope(T<Impl>* thiz) { \
+ return name##Scope<Impl>(static_cast<Impl*>(thiz)); \
+ }
+DECLARE_OP_SCOPE(WriteStruct, writeStruct)
+DECLARE_OP_SCOPE(ReadStruct, readStruct)
+#undef DECLARE_OP_SCOPE
+
+inline bool check_ssize_t_32(Py_ssize_t len) {
+ // error from getting the int
+ if (INT_CONV_ERROR_OCCURRED(len)) {
+ return false;
+ }
+ if (!CHECK_RANGE(len, 0, INT32_MAX)) {
+ PyErr_SetString(PyExc_OverflowError, "size out of range: exceeded INT32_MAX");
+ return false;
+ }
+ return true;
+}
+}
+
+template <typename T>
+bool parse_pyint(PyObject* o, T* ret, int32_t min, int32_t max) {
+ long val = PyInt_AsLong(o);
+
+ if (INT_CONV_ERROR_OCCURRED(val)) {
+ return false;
+ }
+ if (!CHECK_RANGE(val, min, max)) {
+ PyErr_SetString(PyExc_OverflowError, "int out of range");
+ return false;
+ }
+
+ *ret = static_cast<T>(val);
+ return true;
+}
+
+template <typename Impl>
+inline bool ProtocolBase<Impl>::checkType(TType got, TType expected) {
+ if (expected != got) {
+ PyErr_SetString(PyExc_TypeError, "got wrong ttype while reading field");
+ return false;
+ }
+ return true;
+}
+
+template <typename Impl>
+bool ProtocolBase<Impl>::checkLengthLimit(int32_t len, long limit) {
+ if (len < 0) {
+ PyErr_Format(PyExc_OverflowError, "negative length: %d", limit);
+ return false;
+ }
+ if (len > limit) {
+ PyErr_Format(PyExc_OverflowError, "size exceeded specified limit: %d", limit);
+ return false;
+ }
+ return true;
+}
+
+template <typename Impl>
+bool ProtocolBase<Impl>::readBytes(char** output, int len) {
+ if (len < 0) {
+ PyErr_Format(PyExc_ValueError, "attempted to read negative length: %d", len);
+ return false;
+ }
+ // TODO(dreiss): Don't fear the malloc. Think about taking a copy of
+ // the partial read instead of forcing the transport
+ // to prepend it to its buffer.
+
+ int rlen = detail::read_buffer(input_.stringiobuf.get(), output, len);
+
+ if (rlen == len) {
+ return true;
+ } else if (rlen == -1) {
+ return false;
+ } else {
+ // using building functions as this is a rare codepath
+ ScopedPyObject newiobuf(
+ PyObject_CallFunction(input_.refill_callable.get(), refill_signature, *output, rlen, len, NULL));
+ if (!newiobuf) {
+ return false;
+ }
+
+ // must do this *AFTER* the call so that we don't deref the io buffer
+ input_.stringiobuf.reset(newiobuf.release());
+
+ rlen = detail::read_buffer(input_.stringiobuf.get(), output, len);
+
+ if (rlen == len) {
+ return true;
+ } else if (rlen == -1) {
+ return false;
+ } else {
+ // TODO(dreiss): This could be a valid code path for big binary blobs.
+ PyErr_SetString(PyExc_TypeError, "refill claimed to have refilled the buffer, but didn't!!");
+ return false;
+ }
+ }
+}
+
+template <typename Impl>
+bool ProtocolBase<Impl>::prepareDecodeBufferFromTransport(PyObject* trans) {
+ if (input_.stringiobuf) {
+ PyErr_SetString(PyExc_ValueError, "decode buffer is already initialized");
+ return false;
+ }
+
+ ScopedPyObject stringiobuf(PyObject_GetAttr(trans, INTERN_STRING(cstringio_buf)));
+ if (!stringiobuf) {
+ return false;
+ }
+ if (!detail::input_check(stringiobuf.get())) {
+ PyErr_SetString(PyExc_TypeError, "expecting stringio input_");
+ return false;
+ }
+
+ ScopedPyObject refill_callable(PyObject_GetAttr(trans, INTERN_STRING(cstringio_refill)));
+ if (!refill_callable) {
+ return false;
+ }
+ if (!PyCallable_Check(refill_callable.get())) {
+ PyErr_SetString(PyExc_TypeError, "expecting callable");
+ return false;
+ }
+
+ input_.stringiobuf.swap(stringiobuf);
+ input_.refill_callable.swap(refill_callable);
+ return true;
+}
+
+template <typename Impl>
+bool ProtocolBase<Impl>::prepareEncodeBuffer() {
+ output_ = detail::new_encode_buffer(INIT_OUTBUF_SIZE);
+ return output_ != NULL;
+}
+
+template <typename Impl>
+bool ProtocolBase<Impl>::encodeValue(PyObject* value, TType type, PyObject* typeargs) {
+ /*
+ * Refcounting Strategy:
+ *
+ * We assume that elements of the thrift_spec tuple are not going to be
+ * mutated, so we don't ref count those at all. Other than that, we try to
+ * keep a reference to all the user-created objects while we work with them.
+ * encodeValue assumes that a reference is already held. The *caller* is
+ * responsible for handling references
+ */
+
+ switch (type) {
+
+ case T_BOOL: {
+ int v = PyObject_IsTrue(value);
+ if (v == -1) {
+ return false;
+ }
+ impl()->writeBool(v);
+ return true;
+ }
+ case T_I08: {
+ int8_t val;
+
+ if (!parse_pyint(value, &val, INT8_MIN, INT8_MAX)) {
+ return false;
+ }
+
+ impl()->writeI8(val);
+ return true;
+ }
+ case T_I16: {
+ int16_t val;
+
+ if (!parse_pyint(value, &val, INT16_MIN, INT16_MAX)) {
+ return false;
+ }
+
+ impl()->writeI16(val);
+ return true;
+ }
+ case T_I32: {
+ int32_t val;
+
+ if (!parse_pyint(value, &val, INT32_MIN, INT32_MAX)) {
+ return false;
+ }
+
+ impl()->writeI32(val);
+ return true;
+ }
+ case T_I64: {
+ int64_t nval = PyLong_AsLongLong(value);
+
+ if (INT_CONV_ERROR_OCCURRED(nval)) {
+ return false;
+ }
+
+ if (!CHECK_RANGE(nval, INT64_MIN, INT64_MAX)) {
+ PyErr_SetString(PyExc_OverflowError, "int out of range");
+ return false;
+ }
+
+ impl()->writeI64(nval);
+ return true;
+ }
+
+ case T_DOUBLE: {
+ double nval = PyFloat_AsDouble(value);
+ if (nval == -1.0 && PyErr_Occurred()) {
+ return false;
+ }
+
+ impl()->writeDouble(nval);
+ return true;
+ }
+
+ case T_STRING: {
+ if (PyUnicode_Check(value)) {
+ value = PyUnicode_AsUTF8String(value);
+ if (!value) {
+ return false;
+ }
+ }
+
+ Py_ssize_t len = PyBytes_Size(value);
+ if (!detail::check_ssize_t_32(len)) {
+ return false;
+ }
+
+ impl()->writeString(value, static_cast<int32_t>(len));
+ return true;
+ }
+
+ case T_LIST:
+ case T_SET: {
+ SetListTypeArgs parsedargs;
+ if (!parse_set_list_args(&parsedargs, typeargs)) {
+ return false;
+ }
+
+ Py_ssize_t len = PyObject_Length(value);
+ if (!detail::check_ssize_t_32(len)) {
+ return false;
+ }
+
+ if (!impl()->writeListBegin(value, parsedargs, static_cast<int32_t>(len)) || PyErr_Occurred()) {
+ return false;
+ }
+ ScopedPyObject iterator(PyObject_GetIter(value));
+ if (!iterator) {
+ return false;
+ }
+
+ while (PyObject* rawItem = PyIter_Next(iterator.get())) {
+ ScopedPyObject item(rawItem);
+ if (!encodeValue(item.get(), parsedargs.element_type, parsedargs.typeargs)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ case T_MAP: {
+ Py_ssize_t len = PyDict_Size(value);
+ if (!detail::check_ssize_t_32(len)) {
+ return false;
+ }
+
+ MapTypeArgs parsedargs;
+ if (!parse_map_args(&parsedargs, typeargs)) {
+ return false;
+ }
+
+ if (!impl()->writeMapBegin(value, parsedargs, static_cast<int32_t>(len)) || PyErr_Occurred()) {
+ return false;
+ }
+ Py_ssize_t pos = 0;
+ PyObject* k = NULL;
+ PyObject* v = NULL;
+ // TODO(bmaurer): should support any mapping, not just dicts
+ while (PyDict_Next(value, &pos, &k, &v)) {
+ if (!encodeValue(k, parsedargs.ktag, parsedargs.ktypeargs)
+ || !encodeValue(v, parsedargs.vtag, parsedargs.vtypeargs)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ case T_STRUCT: {
+ StructTypeArgs parsedargs;
+ if (!parse_struct_args(&parsedargs, typeargs)) {
+ return false;
+ }
+
+ Py_ssize_t nspec = PyTuple_Size(parsedargs.spec);
+ if (nspec == -1) {
+ PyErr_SetString(PyExc_TypeError, "spec is not a tuple");
+ return false;
+ }
+
+ detail::WriteStructScope<Impl> scope = detail::writeStructScope(this);
+ if (!scope) {
+ return false;
+ }
+ for (Py_ssize_t i = 0; i < nspec; i++) {
+ PyObject* spec_tuple = PyTuple_GET_ITEM(parsedargs.spec, i);
+ if (spec_tuple == Py_None) {
+ continue;
+ }
+
+ StructItemSpec parsedspec;
+ if (!parse_struct_item_spec(&parsedspec, spec_tuple)) {
+ return false;
+ }
+
+ ScopedPyObject instval(PyObject_GetAttr(value, parsedspec.attrname));
+
+ if (!instval) {
+ return false;
+ }
+
+ if (instval.get() == Py_None) {
+ continue;
+ }
+
+ bool res = impl()->writeField(instval.get(), parsedspec);
+ if (!res) {
+ return false;
+ }
+ }
+ impl()->writeFieldStop();
+ return true;
+ }
+
+ case T_STOP:
+ case T_VOID:
+ case T_UTF16:
+ case T_UTF8:
+ case T_U64:
+ default:
+ PyErr_Format(PyExc_TypeError, "Unexpected TType for encodeValue: %d", type);
+ return false;
+ }
+
+ return true;
+}
+
+template <typename Impl>
+bool ProtocolBase<Impl>::skip(TType type) {
+ switch (type) {
+ case T_BOOL:
+ return impl()->skipBool();
+ case T_I08:
+ return impl()->skipByte();
+ case T_I16:
+ return impl()->skipI16();
+ case T_I32:
+ return impl()->skipI32();
+ case T_I64:
+ return impl()->skipI64();
+ case T_DOUBLE:
+ return impl()->skipDouble();
+
+ case T_STRING: {
+ return impl()->skipString();
+ }
+
+ case T_LIST:
+ case T_SET: {
+ TType etype = T_STOP;
+ int32_t len = impl()->readListBegin(etype);
+ if (len < 0) {
+ return false;
+ }
+ for (int32_t i = 0; i < len; i++) {
+ if (!skip(etype)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ case T_MAP: {
+ TType ktype = T_STOP;
+ TType vtype = T_STOP;
+ int32_t len = impl()->readMapBegin(ktype, vtype);
+ if (len < 0) {
+ return false;
+ }
+ for (int32_t i = 0; i < len; i++) {
+ if (!skip(ktype) || !skip(vtype)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ case T_STRUCT: {
+ detail::ReadStructScope<Impl> scope = detail::readStructScope(this);
+ if (!scope) {
+ return false;
+ }
+ while (true) {
+ TType type = T_STOP;
+ int16_t tag;
+ if (!impl()->readFieldBegin(type, tag)) {
+ return false;
+ }
+ if (type == T_STOP) {
+ return true;
+ }
+ if (!skip(type)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ case T_STOP:
+ case T_VOID:
+ case T_UTF16:
+ case T_UTF8:
+ case T_U64:
+ default:
+ PyErr_Format(PyExc_TypeError, "Unexpected TType for skip: %d", type);
+ return false;
+ }
+
+ return true;
+}
+
+// Returns a new reference.
+template <typename Impl>
+PyObject* ProtocolBase<Impl>::decodeValue(TType type, PyObject* typeargs) {
+ switch (type) {
+
+ case T_BOOL: {
+ bool v = 0;
+ if (!impl()->readBool(v)) {
+ return NULL;
+ }
+ if (v) {
+ Py_RETURN_TRUE;
+ } else {
+ Py_RETURN_FALSE;
+ }
+ }
+ case T_I08: {
+ int8_t v = 0;
+ if (!impl()->readI8(v)) {
+ return NULL;
+ }
+ return PyInt_FromLong(v);
+ }
+ case T_I16: {
+ int16_t v = 0;
+ if (!impl()->readI16(v)) {
+ return NULL;
+ }
+ return PyInt_FromLong(v);
+ }
+ case T_I32: {
+ int32_t v = 0;
+ if (!impl()->readI32(v)) {
+ return NULL;
+ }
+ return PyInt_FromLong(v);
+ }
+
+ case T_I64: {
+ int64_t v = 0;
+ if (!impl()->readI64(v)) {
+ return NULL;
+ }
+ // TODO(dreiss): Find out if we can take this fastpath always when
+ // sizeof(long) == sizeof(long long).
+ if (CHECK_RANGE(v, LONG_MIN, LONG_MAX)) {
+ return PyInt_FromLong((long)v);
+ }
+ return PyLong_FromLongLong(v);
+ }
+
+ case T_DOUBLE: {
+ double v = 0.0;
+ if (!impl()->readDouble(v)) {
+ return NULL;
+ }
+ return PyFloat_FromDouble(v);
+ }
+
+ case T_STRING: {
+ char* buf = NULL;
+ int len = impl()->readString(&buf);
+ if (len < 0) {
+ return NULL;
+ }
+ if (isUtf8(typeargs)) {
+ return PyUnicode_DecodeUTF8(buf, len, 0);
+ } else {
+ return PyBytes_FromStringAndSize(buf, len);
+ }
+ }
+
+ case T_LIST:
+ case T_SET: {
+ SetListTypeArgs parsedargs;
+ if (!parse_set_list_args(&parsedargs, typeargs)) {
+ return NULL;
+ }
+
+ TType etype = T_STOP;
+ int32_t len = impl()->readListBegin(etype);
+ if (len < 0) {
+ return NULL;
+ }
+ if (len > 0 && !checkType(etype, parsedargs.element_type)) {
+ return NULL;
+ }
+
+ bool use_tuple = type == T_LIST && parsedargs.immutable;
+ ScopedPyObject ret(use_tuple ? PyTuple_New(len) : PyList_New(len));
+ if (!ret) {
+ return NULL;
+ }
+
+ for (int i = 0; i < len; i++) {
+ PyObject* item = decodeValue(etype, parsedargs.typeargs);
+ if (!item) {
+ return NULL;
+ }
+ if (use_tuple) {
+ PyTuple_SET_ITEM(ret.get(), i, item);
+ } else {
+ PyList_SET_ITEM(ret.get(), i, item);
+ }
+ }
+
+ // TODO(dreiss): Consider biting the bullet and making two separate cases
+ // for list and set, avoiding this post facto conversion.
+ if (type == T_SET) {
+ PyObject* setret;
+ setret = parsedargs.immutable ? PyFrozenSet_New(ret.get()) : PySet_New(ret.get());
+ return setret;
+ }
+ return ret.release();
+ }
+
+ case T_MAP: {
+ MapTypeArgs parsedargs;
+ if (!parse_map_args(&parsedargs, typeargs)) {
+ return NULL;
+ }
+
+ TType ktype = T_STOP;
+ TType vtype = T_STOP;
+ uint32_t len = impl()->readMapBegin(ktype, vtype);
+ if (len > 0 && (!checkType(ktype, parsedargs.ktag) || !checkType(vtype, parsedargs.vtag))) {
+ return NULL;
+ }
+
+ ScopedPyObject ret(PyDict_New());
+ if (!ret) {
+ return NULL;
+ }
+
+ for (uint32_t i = 0; i < len; i++) {
+ ScopedPyObject k(decodeValue(ktype, parsedargs.ktypeargs));
+ if (!k) {
+ return NULL;
+ }
+ ScopedPyObject v(decodeValue(vtype, parsedargs.vtypeargs));
+ if (!v) {
+ return NULL;
+ }
+ if (PyDict_SetItem(ret.get(), k.get(), v.get()) == -1) {
+ return NULL;
+ }
+ }
+
+ if (parsedargs.immutable) {
+ if (!ThriftModule) {
+ ThriftModule = PyImport_ImportModule("thrift.Thrift");
+ }
+ if (!ThriftModule) {
+ return NULL;
+ }
+
+ ScopedPyObject cls(PyObject_GetAttr(ThriftModule, INTERN_STRING(TFrozenDict)));
+ if (!cls) {
+ return NULL;
+ }
+
+ ScopedPyObject arg(PyTuple_New(1));
+ PyTuple_SET_ITEM(arg.get(), 0, ret.release());
+ ret.reset(PyObject_CallObject(cls.get(), arg.get()));
+ }
+
+ return ret.release();
+ }
+
+ case T_STRUCT: {
+ StructTypeArgs parsedargs;
+ if (!parse_struct_args(&parsedargs, typeargs)) {
+ return NULL;
+ }
+ return readStruct(Py_None, parsedargs.klass, parsedargs.spec);
+ }
+
+ case T_STOP:
+ case T_VOID:
+ case T_UTF16:
+ case T_UTF8:
+ case T_U64:
+ default:
+ PyErr_Format(PyExc_TypeError, "Unexpected TType for decodeValue: %d", type);
+ return NULL;
+ }
+}
+
+template <typename Impl>
+PyObject* ProtocolBase<Impl>::readStruct(PyObject* output, PyObject* klass, PyObject* spec_seq) {
+ int spec_seq_len = PyTuple_Size(spec_seq);
+ bool immutable = output == Py_None;
+ ScopedPyObject kwargs;
+ if (spec_seq_len == -1) {
+ return NULL;
+ }
+
+ if (immutable) {
+ kwargs.reset(PyDict_New());
+ if (!kwargs) {
+ PyErr_SetString(PyExc_TypeError, "failed to prepare kwargument storage");
+ return NULL;
+ }
+ }
+
+ detail::ReadStructScope<Impl> scope = detail::readStructScope(this);
+ if (!scope) {
+ return NULL;
+ }
+ while (true) {
+ TType type = T_STOP;
+ int16_t tag;
+ if (!impl()->readFieldBegin(type, tag)) {
+ return NULL;
+ }
+ if (type == T_STOP) {
+ break;
+ }
+ if (tag < 0 || tag >= spec_seq_len) {
+ if (!skip(type)) {
+ PyErr_SetString(PyExc_TypeError, "Error while skipping unknown field");
+ return NULL;
+ }
+ continue;
+ }
+
+ PyObject* item_spec = PyTuple_GET_ITEM(spec_seq, tag);
+ if (item_spec == Py_None) {
+ if (!skip(type)) {
+ PyErr_SetString(PyExc_TypeError, "Error while skipping unknown field");
+ return NULL;
+ }
+ continue;
+ }
+ StructItemSpec parsedspec;
+ if (!parse_struct_item_spec(&parsedspec, item_spec)) {
+ return NULL;
+ }
+ if (parsedspec.type != type) {
+ if (!skip(type)) {
+ PyErr_Format(PyExc_TypeError, "struct field had wrong type: expected %d but got %d",
+ parsedspec.type, type);
+ return NULL;
+ }
+ continue;
+ }
+
+ ScopedPyObject fieldval(decodeValue(parsedspec.type, parsedspec.typeargs));
+ if (!fieldval) {
+ return NULL;
+ }
+
+ if ((immutable && PyDict_SetItem(kwargs.get(), parsedspec.attrname, fieldval.get()) == -1)
+ || (!immutable && PyObject_SetAttr(output, parsedspec.attrname, fieldval.get()) == -1)) {
+ return NULL;
+ }
+ }
+ if (immutable) {
+ ScopedPyObject args(PyTuple_New(0));
+ if (!args) {
+ PyErr_SetString(PyExc_TypeError, "failed to prepare argument storage");
+ return NULL;
+ }
+ return PyObject_Call(klass, args.get(), kwargs.get());
+ }
+ Py_INCREF(output);
+ return output;
+}
+}
+}
+}
+#endif // THRIFT_PY_PROTOCOL_H
diff --git a/lib/py/src/ext/types.cpp b/lib/py/src/ext/types.cpp
new file mode 100644
index 0000000..f3a29a2
--- /dev/null
+++ b/lib/py/src/ext/types.cpp
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "ext/types.h"
+#include "ext/protocol.h"
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+PyObject* ThriftModule = NULL;
+
+char refill_signature[] = {'s', '#', 'i'};
+
+bool parse_struct_item_spec(StructItemSpec* dest, PyObject* spec_tuple) {
+ // i'd like to use ParseArgs here, but it seems to be a bottleneck.
+ if (PyTuple_Size(spec_tuple) != 5) {
+ PyErr_Format(PyExc_TypeError, "expecting 5 arguments for spec tuple but got %d",
+ static_cast<int>(PyTuple_Size(spec_tuple)));
+ return false;
+ }
+
+ dest->tag = static_cast<TType>(PyInt_AsLong(PyTuple_GET_ITEM(spec_tuple, 0)));
+ if (INT_CONV_ERROR_OCCURRED(dest->tag)) {
+ return false;
+ }
+
+ dest->type = static_cast<TType>(PyInt_AsLong(PyTuple_GET_ITEM(spec_tuple, 1)));
+ if (INT_CONV_ERROR_OCCURRED(dest->type)) {
+ return false;
+ }
+
+ dest->attrname = PyTuple_GET_ITEM(spec_tuple, 2);
+ dest->typeargs = PyTuple_GET_ITEM(spec_tuple, 3);
+ dest->defval = PyTuple_GET_ITEM(spec_tuple, 4);
+ return true;
+}
+
+bool parse_set_list_args(SetListTypeArgs* dest, PyObject* typeargs) {
+ if (PyTuple_Size(typeargs) != 3) {
+ PyErr_SetString(PyExc_TypeError, "expecting tuple of size 3 for list/set type args");
+ return false;
+ }
+
+ dest->element_type = static_cast<TType>(PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 0)));
+ if (INT_CONV_ERROR_OCCURRED(dest->element_type)) {
+ return false;
+ }
+
+ dest->typeargs = PyTuple_GET_ITEM(typeargs, 1);
+
+ dest->immutable = Py_True == PyTuple_GET_ITEM(typeargs, 2);
+
+ return true;
+}
+
+bool parse_map_args(MapTypeArgs* dest, PyObject* typeargs) {
+ if (PyTuple_Size(typeargs) != 5) {
+ PyErr_SetString(PyExc_TypeError, "expecting 5 arguments for typeargs to map");
+ return false;
+ }
+
+ dest->ktag = static_cast<TType>(PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 0)));
+ if (INT_CONV_ERROR_OCCURRED(dest->ktag)) {
+ return false;
+ }
+
+ dest->vtag = static_cast<TType>(PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 2)));
+ if (INT_CONV_ERROR_OCCURRED(dest->vtag)) {
+ return false;
+ }
+
+ dest->ktypeargs = PyTuple_GET_ITEM(typeargs, 1);
+ dest->vtypeargs = PyTuple_GET_ITEM(typeargs, 3);
+ dest->immutable = Py_True == PyTuple_GET_ITEM(typeargs, 4);
+
+ return true;
+}
+
+bool parse_struct_args(StructTypeArgs* dest, PyObject* typeargs) {
+ if (PyTuple_Size(typeargs) != 2) {
+ PyErr_SetString(PyExc_TypeError, "expecting tuple of size 2 for struct args");
+ return false;
+ }
+
+ dest->klass = PyTuple_GET_ITEM(typeargs, 0);
+ dest->spec = PyTuple_GET_ITEM(typeargs, 1);
+
+ return true;
+}
+}
+}
+}
diff --git a/lib/py/src/ext/types.h b/lib/py/src/ext/types.h
new file mode 100644
index 0000000..749bb68
--- /dev/null
+++ b/lib/py/src/ext/types.h
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THRIFT_PY_TYPES_H
+#define THRIFT_PY_TYPES_H
+
+#include <Python.h>
+
+#define INTERN_STRING(value) _intern_##value
+
+#define INT_CONV_ERROR_OCCURRED(v) (((v) == -1) && PyErr_Occurred())
+
+extern "C" {
+extern PyObject* INTERN_STRING(TFrozenDict);
+extern PyObject* INTERN_STRING(cstringio_buf);
+extern PyObject* INTERN_STRING(cstringio_refill);
+}
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+extern PyObject* ThriftModule;
+
+// Stolen out of TProtocol.h.
+// It would be a huge pain to have both get this from one place.
+enum TType {
+ T_STOP = 0,
+ T_VOID = 1,
+ T_BOOL = 2,
+ T_BYTE = 3,
+ T_I08 = 3,
+ T_I16 = 6,
+ T_I32 = 8,
+ T_U64 = 9,
+ T_I64 = 10,
+ T_DOUBLE = 4,
+ T_STRING = 11,
+ T_UTF7 = 11,
+ T_STRUCT = 12,
+ T_MAP = 13,
+ T_SET = 14,
+ T_LIST = 15,
+ T_UTF8 = 16,
+ T_UTF16 = 17
+};
+
+// replace with unique_ptr when we're OK with C++11
+class ScopedPyObject {
+public:
+ ScopedPyObject() : obj_(NULL) {}
+ explicit ScopedPyObject(PyObject* py_object) : obj_(py_object) {}
+ ~ScopedPyObject() {
+ if (obj_)
+ Py_DECREF(obj_);
+ }
+ PyObject* get() throw() { return obj_; }
+ operator bool() { return obj_; }
+ void reset(PyObject* py_object) throw() {
+ if (obj_)
+ Py_DECREF(obj_);
+ obj_ = py_object;
+ }
+ PyObject* release() throw() {
+ PyObject* tmp = obj_;
+ obj_ = NULL;
+ return tmp;
+ }
+ void swap(ScopedPyObject& other) throw() {
+ ScopedPyObject tmp(other.release());
+ other.reset(release());
+ reset(tmp.release());
+ }
+
+private:
+ ScopedPyObject(const ScopedPyObject&) {}
+ ScopedPyObject& operator=(const ScopedPyObject&) { return *this; }
+
+ PyObject* obj_;
+};
+
+/**
+ * A cache of the two key attributes of a CReadableTransport,
+ * so we don't have to keep calling PyObject_GetAttr.
+ */
+struct DecodeBuffer {
+ ScopedPyObject stringiobuf;
+ ScopedPyObject refill_callable;
+};
+
+extern char refill_signature[3];
+typedef PyObject EncodeBuffer;
+
+/**
+ * A cache of the spec_args for a set or list,
+ * so we don't have to keep calling PyTuple_GET_ITEM.
+ */
+struct SetListTypeArgs {
+ TType element_type;
+ PyObject* typeargs;
+ bool immutable;
+};
+
+/**
+ * A cache of the spec_args for a map,
+ * so we don't have to keep calling PyTuple_GET_ITEM.
+ */
+struct MapTypeArgs {
+ TType ktag;
+ TType vtag;
+ PyObject* ktypeargs;
+ PyObject* vtypeargs;
+ bool immutable;
+};
+
+/**
+ * A cache of the spec_args for a struct,
+ * so we don't have to keep calling PyTuple_GET_ITEM.
+ */
+struct StructTypeArgs {
+ PyObject* klass;
+ PyObject* spec;
+ bool immutable;
+};
+
+/**
+ * A cache of the item spec from a struct specification,
+ * so we don't have to keep calling PyTuple_GET_ITEM.
+ */
+struct StructItemSpec {
+ int tag;
+ TType type;
+ PyObject* attrname;
+ PyObject* typeargs;
+ PyObject* defval;
+};
+
+bool parse_set_list_args(SetListTypeArgs* dest, PyObject* typeargs);
+
+bool parse_map_args(MapTypeArgs* dest, PyObject* typeargs);
+
+bool parse_struct_args(StructTypeArgs* dest, PyObject* typeargs);
+
+bool parse_struct_item_spec(StructItemSpec* dest, PyObject* spec_tuple);
+}
+}
+}
+
+#endif // THRIFT_PY_TYPES_H