THRIFT-3612 Add Python C extension for compact protocol
Client: Python
Patch: Nobuaki Sukegawa
This closes #844
diff --git a/lib/py/src/ext/compact.h b/lib/py/src/ext/compact.h
new file mode 100644
index 0000000..5bba237
--- /dev/null
+++ b/lib/py/src/ext/compact.h
@@ -0,0 +1,367 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THRIFT_PY_COMPACT_H
+#define THRIFT_PY_COMPACT_H
+
+#include <Python.h>
+#include "ext/protocol.h"
+#include "ext/endian.h"
+#include <stdint.h>
+#include <stack>
+
+namespace apache {
+namespace thrift {
+namespace py {
+
+class CompactProtocol : public ProtocolBase<CompactProtocol> {
+public:
+ CompactProtocol() { readBool_.exists = false; }
+
+ virtual ~CompactProtocol() {}
+
+ void writeI8(int8_t val) { writeBuffer(reinterpret_cast<char*>(&val), 1); }
+
+ void writeI16(int16_t val) { writeVarint(toZigZag(val)); }
+
+ int writeI32(int32_t val) { return writeVarint(toZigZag(val)); }
+
+ void writeI64(int64_t val) { writeVarint64(toZigZag64(val)); }
+
+ void writeDouble(double dub) {
+ union {
+ double f;
+ int64_t t;
+ } transfer;
+ transfer.f = htolell(dub);
+ writeBuffer(reinterpret_cast<char*>(&transfer.t), sizeof(int64_t));
+ }
+
+ void writeBool(int v) { writeByte(static_cast<uint8_t>(v ? CT_BOOLEAN_TRUE : CT_BOOLEAN_FALSE)); }
+
+ void writeString(PyObject* value, int32_t len) {
+ writeVarint(len);
+ writeBuffer(PyBytes_AS_STRING(value), len);
+ }
+
+ bool writeListBegin(PyObject* value, const SetListTypeArgs& args, int32_t len) {
+ int ctype = toCompactType(args.element_type);
+ if (len <= 14) {
+ writeByte(static_cast<uint8_t>(len << 4 | ctype));
+ } else {
+ writeByte(0xf0 | ctype);
+ writeVarint(len);
+ }
+ return true;
+ }
+
+ bool writeMapBegin(PyObject* value, const MapTypeArgs& args, int32_t len) {
+ if (len == 0) {
+ writeByte(0);
+ return true;
+ }
+ int ctype = toCompactType(args.ktag) << 4 | toCompactType(args.vtag);
+ writeVarint(len);
+ writeByte(ctype);
+ return true;
+ }
+
+ bool writeStructBegin() {
+ writeTags_.push(0);
+ return true;
+ }
+ bool writeStructEnd() {
+ writeTags_.pop();
+ return true;
+ }
+
+ bool writeField(PyObject* value, const StructItemSpec& spec) {
+ if (spec.type == T_BOOL) {
+ doWriteFieldBegin(spec, PyObject_IsTrue(value) ? CT_BOOLEAN_TRUE : CT_BOOLEAN_FALSE);
+ return true;
+ } else {
+ doWriteFieldBegin(spec, toCompactType(spec.type));
+ return encodeValue(value, spec.type, spec.typeargs);
+ }
+ }
+
+ void writeFieldStop() { writeByte(0); }
+
+ bool readBool(bool& val) {
+ if (readBool_.exists) {
+ readBool_.exists = false;
+ val = readBool_.value;
+ return true;
+ }
+ char* buf;
+ if (!readBytes(&buf, 1)) {
+ return false;
+ }
+ val = buf[0] == CT_BOOLEAN_TRUE;
+ return true;
+ }
+ bool readI8(int8_t& val) {
+ char* buf;
+ if (!readBytes(&buf, 1)) {
+ return false;
+ }
+ val = buf[0];
+ return true;
+ }
+
+ bool readI16(int16_t& val) {
+ uint16_t uval;
+ if (readVarint<uint16_t, 3>(uval)) {
+ val = fromZigZag<int16_t, uint16_t>(uval);
+ return true;
+ }
+ return false;
+ }
+
+ bool readI32(int32_t& val) {
+ uint32_t uval;
+ if (readVarint<uint32_t, 5>(uval)) {
+ val = fromZigZag<int32_t, uint32_t>(uval);
+ return true;
+ }
+ return false;
+ }
+
+ bool readI64(int64_t& val) {
+ uint64_t uval;
+ if (readVarint<uint64_t, 10>(uval)) {
+ val = fromZigZag<int64_t, uint64_t>(uval);
+ return true;
+ }
+ return false;
+ }
+
+ bool readDouble(double& val) {
+ union {
+ int64_t f;
+ double t;
+ } transfer;
+
+ char* buf;
+ if (!readBytes(&buf, 8)) {
+ return false;
+ }
+ transfer.f = letohll(*reinterpret_cast<int64_t*>(buf));
+ val = transfer.t;
+ return true;
+ }
+
+ int32_t readString(char** buf) {
+ uint32_t len;
+ if (!readVarint<uint32_t, 5>(len) || !checkLengthLimit(len, stringLimit())) {
+ return -1;
+ }
+ if (len == 0) {
+ return 0;
+ }
+ if (!readBytes(buf, len)) {
+ return -1;
+ }
+ return len;
+ }
+
+ int32_t readListBegin(TType& etype) {
+ uint8_t b;
+ if (!readByte(b)) {
+ return -1;
+ }
+ etype = getTType(b & 0xf);
+ if (etype == -1) {
+ return -1;
+ }
+ uint32_t len = (b >> 4) & 0xf;
+ if (len == 15 && !readVarint<uint32_t, 5>(len)) {
+ return -1;
+ }
+ if (!checkLengthLimit(len, containerLimit())) {
+ return -1;
+ }
+ return len;
+ }
+
+ int32_t readMapBegin(TType& ktype, TType& vtype) {
+ uint32_t len;
+ if (!readVarint<uint32_t, 5>(len) || !checkLengthLimit(len, containerLimit())) {
+ return -1;
+ }
+ if (len != 0) {
+ uint8_t kvType;
+ if (!readByte(kvType)) {
+ return -1;
+ }
+ ktype = getTType(kvType >> 4);
+ vtype = getTType(kvType & 0xf);
+ if (ktype == -1 || vtype == -1) {
+ return -1;
+ }
+ }
+ return len;
+ }
+
+ bool readStructBegin() {
+ readTags_.push(0);
+ return true;
+ }
+ bool readStructEnd() {
+ readTags_.pop();
+ return true;
+ }
+ bool readFieldBegin(TType& type, int16_t& tag);
+
+ bool skipBool() {
+ bool val;
+ return readBool(val);
+ }
+#define SKIPBYTES(n) \
+ do { \
+ if (!readBytes(&dummy_buf_, (n))) { \
+ return false; \
+ } \
+ return true; \
+ } while (0)
+ bool skipByte() { SKIPBYTES(1); }
+ bool skipDouble() { SKIPBYTES(8); }
+ bool skipI16() {
+ int16_t val;
+ return readI16(val);
+ }
+ bool skipI32() {
+ int32_t val;
+ return readI32(val);
+ }
+ bool skipI64() {
+ int64_t val;
+ return readI64(val);
+ }
+ bool skipString() {
+ uint32_t len;
+ if (!readVarint<uint32_t, 5>(len)) {
+ return false;
+ }
+ SKIPBYTES(len);
+ }
+#undef SKIPBYTES
+
+private:
+ enum Types {
+ CT_STOP = 0x00,
+ CT_BOOLEAN_TRUE = 0x01,
+ CT_BOOLEAN_FALSE = 0x02,
+ CT_BYTE = 0x03,
+ CT_I16 = 0x04,
+ CT_I32 = 0x05,
+ CT_I64 = 0x06,
+ CT_DOUBLE = 0x07,
+ CT_BINARY = 0x08,
+ CT_LIST = 0x09,
+ CT_SET = 0x0A,
+ CT_MAP = 0x0B,
+ CT_STRUCT = 0x0C
+ };
+
+ static const uint8_t TTypeToCType[];
+
+ TType getTType(uint8_t type);
+
+ int toCompactType(TType type) {
+ int i = static_cast<int>(type);
+ return i < 16 ? TTypeToCType[i] : -1;
+ }
+
+ uint32_t toZigZag(int32_t val) { return (val >> 31) ^ (val << 1); }
+
+ uint64_t toZigZag64(int64_t val) { return (val >> 63) ^ (val << 1); }
+
+ int writeVarint(uint32_t val) {
+ int cnt = 1;
+ while (val & ~0x7fU) {
+ writeByte(static_cast<char>((val & 0x7fU) | 0x80U));
+ val >>= 7;
+ ++cnt;
+ }
+ writeByte(static_cast<char>(val));
+ return cnt;
+ }
+
+ int writeVarint64(uint64_t val) {
+ int cnt = 1;
+ while (val & ~0x7fULL) {
+ writeByte(static_cast<char>((val & 0x7fULL) | 0x80ULL));
+ val >>= 7;
+ ++cnt;
+ }
+ writeByte(static_cast<char>(val));
+ return cnt;
+ }
+
+ template <typename T, int Max>
+ bool readVarint(T& result) {
+ uint8_t b;
+ T val = 0;
+ int shift = 0;
+ for (int i = 0; i < Max; ++i) {
+ if (!readByte(b)) {
+ return false;
+ }
+ if (b & 0x80) {
+ val |= static_cast<T>(b & 0x7f) << shift;
+ } else {
+ val |= static_cast<T>(b) << shift;
+ result = val;
+ return true;
+ }
+ shift += 7;
+ }
+ PyErr_Format(PyExc_OverflowError, "varint exceeded %d bytes", Max);
+ return false;
+ }
+
+ template <typename S, typename U>
+ S fromZigZag(U val) {
+ return (val >> 1) ^ static_cast<U>(-static_cast<S>(val & 1));
+ }
+
+ void doWriteFieldBegin(const StructItemSpec& spec, int ctype) {
+ int diff = spec.tag - writeTags_.top();
+ if (diff > 0 && diff <= 15) {
+ writeByte(static_cast<uint8_t>(diff << 4 | ctype));
+ } else {
+ writeByte(static_cast<uint8_t>(ctype));
+ writeI16(spec.tag);
+ }
+ writeTags_.top() = spec.tag;
+ }
+
+ std::stack<int> writeTags_;
+ std::stack<int> readTags_;
+ struct {
+ bool exists;
+ bool value;
+ } readBool_;
+ char* dummy_buf_;
+};
+}
+}
+}
+#endif // THRIFT_PY_COMPACT_H