THRIFT-3612 Add Python C extension for compact protocol
Client: Python
Patch: Nobuaki Sukegawa
This closes #844
diff --git a/lib/py/src/protocol/TBase.py b/lib/py/src/protocol/TBase.py
index 87caf0d..55da19e 100644
--- a/lib/py/src/protocol/TBase.py
+++ b/lib/py/src/protocol/TBase.py
@@ -17,14 +17,8 @@
# under the License.
#
-from thrift.protocol import TBinaryProtocol
from thrift.transport import TTransport
-try:
- from thrift.protocol import fastbinary
-except:
- fastbinary = None
-
class TBase(object):
__slots__ = ()
@@ -47,27 +41,19 @@
return not (self == other)
def read(self, iprot):
- if (iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and
+ if (iprot._fast_decode is not None and
isinstance(iprot.trans, TTransport.CReadableTransport) and
- self.thrift_spec is not None and
- fastbinary is not None):
- fastbinary.decode_binary(self,
- iprot.trans,
- (self.__class__, self.thrift_spec),
- iprot.string_length_limit,
- iprot.container_length_limit)
- return
- iprot.readStruct(self, self.thrift_spec)
+ self.thrift_spec is not None):
+ iprot._fast_decode(self, iprot, (self.__class__, self.thrift_spec))
+ else:
+ iprot.readStruct(self, self.thrift_spec)
def write(self, oprot):
- if (oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and
- self.thrift_spec is not None and
- fastbinary is not None):
+ if (oprot._fast_encode is not None and self.thrift_spec is not None):
oprot.trans.write(
- fastbinary.encode_binary(
- self, (self.__class__, self.thrift_spec)))
- return
- oprot.writeStruct(self, self.thrift_spec)
+ oprot._fast_encode(self, (self.__class__, self.thrift_spec)))
+ else:
+ oprot.writeStruct(self, self.thrift_spec)
class TExceptionBase(TBase, Exception):
@@ -86,14 +72,11 @@
@classmethod
def read(cls, iprot):
- if (iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and
+ if (iprot._fast_decode is not None and
isinstance(iprot.trans, TTransport.CReadableTransport) and
- cls.thrift_spec is not None and
- fastbinary is not None):
+ cls.thrift_spec is not None):
self = cls()
- return fastbinary.decode_binary(None,
- iprot.trans,
- (self.__class__, self.thrift_spec),
- iprot.string_length_limit,
- iprot.container_length_limit)
- return iprot.readStruct(cls, cls.thrift_spec, True)
+ return iprot._fast_decode(None, iprot,
+ (self.__class__, self.thrift_spec))
+ else:
+ return iprot.readStruct(cls, cls.thrift_spec, True)
diff --git a/lib/py/src/protocol/TBinaryProtocol.py b/lib/py/src/protocol/TBinaryProtocol.py
index 7fce12f..f6be772 100644
--- a/lib/py/src/protocol/TBinaryProtocol.py
+++ b/lib/py/src/protocol/TBinaryProtocol.py
@@ -258,6 +258,7 @@
We inherit from TBinaryProtocol so that the normal TBinaryProtocol
encoding can happen if the fastbinary module doesn't work for some
reason. (TODO(dreiss): Make this happen sanely in more cases.)
+ To disable this behavior, pass fallback=False constructor argument.
In order to take advantage of the C module, just use
TBinaryProtocolAccelerated instead of TBinaryProtocol.
@@ -270,16 +271,31 @@
"""
pass
+ def __init__(self, *args, **kwargs):
+ fallback = kwargs.pop('fallback', True)
+ super(TBinaryProtocolAccelerated, self).__init__(*args, **kwargs)
+ try:
+ from thrift.protocol import fastbinary
+ except ImportError:
+ if not fallback:
+ raise
+ else:
+ self._fast_decode = fastbinary.decode_binary
+ self._fast_encode = fastbinary.encode_binary
+
class TBinaryProtocolAcceleratedFactory(object):
def __init__(self,
string_length_limit=None,
- container_length_limit=None):
+ container_length_limit=None,
+ fallback=True):
self.string_length_limit = string_length_limit
self.container_length_limit = container_length_limit
+ self._fallback = fallback
def getProtocol(self, trans):
return TBinaryProtocolAccelerated(
trans,
string_length_limit=self.string_length_limit,
- container_length_limit=self.container_length_limit)
+ container_length_limit=self.container_length_limit,
+ fallback=self._fallback)
diff --git a/lib/py/src/protocol/TCompactProtocol.py b/lib/py/src/protocol/TCompactProtocol.py
index 8d3db1a..16fd9be 100644
--- a/lib/py/src/protocol/TCompactProtocol.py
+++ b/lib/py/src/protocol/TCompactProtocol.py
@@ -424,3 +424,49 @@
return TCompactProtocol(trans,
self.string_length_limit,
self.container_length_limit)
+
+
+class TCompactProtocolAccelerated(TCompactProtocol):
+ """C-Accelerated version of TCompactProtocol.
+
+ This class does not override any of TCompactProtocol's methods,
+ but the generated code recognizes it directly and will call into
+ our C module to do the encoding, bypassing this object entirely.
+ We inherit from TCompactProtocol so that the normal TCompactProtocol
+ encoding can happen if the fastbinary module doesn't work for some
+ reason.
+ To disable this behavior, pass fallback=False constructor argument.
+
+ In order to take advantage of the C module, just use
+ TCompactProtocolAccelerated instead of TCompactProtocol.
+ """
+ pass
+
+ def __init__(self, *args, **kwargs):
+ fallback = kwargs.pop('fallback', True)
+ super(TCompactProtocolAccelerated, self).__init__(*args, **kwargs)
+ try:
+ from thrift.protocol import fastbinary
+ except ImportError:
+ if not fallback:
+ raise
+ else:
+ self._fast_decode = fastbinary.decode_compact
+ self._fast_encode = fastbinary.encode_compact
+
+
+class TCompactProtocolAcceleratedFactory(object):
+ def __init__(self,
+ string_length_limit=None,
+ container_length_limit=None,
+ fallback=True):
+ self.string_length_limit = string_length_limit
+ self.container_length_limit = container_length_limit
+ self._fallback = fallback
+
+ def getProtocol(self, trans):
+ return TCompactProtocolAccelerated(
+ trans,
+ string_length_limit=self.string_length_limit,
+ container_length_limit=self.container_length_limit,
+ fallback=self._fallback)
diff --git a/lib/py/src/protocol/TProtocol.py b/lib/py/src/protocol/TProtocol.py
index ed6938b..f29c58d 100644
--- a/lib/py/src/protocol/TProtocol.py
+++ b/lib/py/src/protocol/TProtocol.py
@@ -48,6 +48,8 @@
def __init__(self, trans):
self.trans = trans
+ self._fast_decode = None
+ self._fast_encode = None
@staticmethod
def _check_length(limit, length):
@@ -276,7 +278,7 @@
yield read()
def readFieldByTType(self, ttype, spec):
- return self._read_by_ttype(ttype, spec, spec).next()
+ return next(self._read_by_ttype(ttype, spec, spec))
def readContainerList(self, spec):
ttype, tspec, is_immutable = spec
@@ -394,7 +396,7 @@
yield write(v)
def writeFieldByTType(self, ttype, val, spec):
- self._write_by_ttype(ttype, [val], spec, spec).next()
+ next(self._write_by_ttype(ttype, [val], spec, spec))
def checkIntegerLimits(i, bits):
diff --git a/lib/py/src/protocol/fastbinary.c b/lib/py/src/protocol/fastbinary.c
deleted file mode 100644
index da57c85..0000000
--- a/lib/py/src/protocol/fastbinary.c
+++ /dev/null
@@ -1,1289 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#include <Python.h>
-#include "cStringIO.h"
-#include <stdint.h>
-#ifndef _WIN32
-# include <stdbool.h>
-# include <netinet/in.h>
-#else
-# include <WinSock2.h>
-# pragma comment (lib, "ws2_32.lib")
-# define BIG_ENDIAN (4321)
-# define LITTLE_ENDIAN (1234)
-# define BYTE_ORDER LITTLE_ENDIAN
-# if defined(_MSC_VER) && _MSC_VER < 1600
- typedef int _Bool;
-# define bool _Bool
-# define false 0
-# define true 1
-# endif
-# define inline __inline
-#endif
-
-/* Fix endianness issues on Solaris */
-#if defined (__SVR4) && defined (__sun)
- #if defined(__i386) && !defined(__i386__)
- #define __i386__
- #endif
-
- #ifndef BIG_ENDIAN
- #define BIG_ENDIAN (4321)
- #endif
- #ifndef LITTLE_ENDIAN
- #define LITTLE_ENDIAN (1234)
- #endif
-
- /* I386 is LE, even on Solaris */
- #if !defined(BYTE_ORDER) && defined(__i386__)
- #define BYTE_ORDER LITTLE_ENDIAN
- #endif
-#endif
-
-// TODO(dreiss): defval appears to be unused. Look into removing it.
-// TODO(dreiss): Make parse_spec_args recursive, and cache the output
-// permanently in the object. (Malloc and orphan.)
-// TODO(dreiss): Why do we need cStringIO for reading, why not just char*?
-// Can cStringIO let us work with a BufferedTransport?
-// TODO(dreiss): Don't ignore the rv from cwrite (maybe).
-
-/* ====== BEGIN UTILITIES ====== */
-
-#define INIT_OUTBUF_SIZE 128
-
-// Stolen out of TProtocol.h.
-// It would be a huge pain to have both get this from one place.
-typedef enum TType {
- T_STOP = 0,
- T_VOID = 1,
- T_BOOL = 2,
- T_BYTE = 3,
- T_I08 = 3,
- T_I16 = 6,
- T_I32 = 8,
- T_U64 = 9,
- T_I64 = 10,
- T_DOUBLE = 4,
- T_STRING = 11,
- T_UTF7 = 11,
- T_STRUCT = 12,
- T_MAP = 13,
- T_SET = 14,
- T_LIST = 15,
- T_UTF8 = 16,
- T_UTF16 = 17
-} TType;
-
-#ifndef __BYTE_ORDER
-# if defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN)
-# define __BYTE_ORDER BYTE_ORDER
-# define __LITTLE_ENDIAN LITTLE_ENDIAN
-# define __BIG_ENDIAN BIG_ENDIAN
-# else
-# error "Cannot determine endianness"
-# endif
-#endif
-
-// Same comment as the enum. Sorry.
-#if __BYTE_ORDER == __BIG_ENDIAN
-# define ntohll(n) (n)
-# define htonll(n) (n)
-#elif __BYTE_ORDER == __LITTLE_ENDIAN
-# if defined(__GNUC__) && defined(__GLIBC__)
-# include <byteswap.h>
-# define ntohll(n) bswap_64(n)
-# define htonll(n) bswap_64(n)
-# else /* GNUC & GLIBC */
-# define ntohll(n) ( (((unsigned long long)ntohl(n)) << 32) + ntohl(n >> 32) )
-# define htonll(n) ( (((unsigned long long)htonl(n)) << 32) + htonl(n >> 32) )
-# endif /* GNUC & GLIBC */
-#else /* __BYTE_ORDER */
-# error "Can't define htonll or ntohll!"
-#endif
-
-// Doing a benchmark shows that interning actually makes a difference, amazingly.
-#define INTERN_STRING(value) _intern_ ## value
-
-#define INT_CONV_ERROR_OCCURRED(v) ( ((v) == -1) && PyErr_Occurred() )
-#define CHECK_RANGE(v, min, max) ( ((v) <= (max)) && ((v) >= (min)) )
-
-/**
- * A cache of the spec_args for a set or list,
- * so we don't have to keep calling PyTuple_GET_ITEM.
- */
-typedef struct {
- TType element_type;
- PyObject* typeargs;
- bool immutable;
-} SetListTypeArgs;
-
-/**
- * A cache of the spec_args for a map,
- * so we don't have to keep calling PyTuple_GET_ITEM.
- */
-typedef struct {
- TType ktag;
- TType vtag;
- PyObject* ktypeargs;
- PyObject* vtypeargs;
- bool immutable;
-} MapTypeArgs;
-
-/**
- * A cache of the spec_args for a struct,
- * so we don't have to keep calling PyTuple_GET_ITEM.
- */
-typedef struct {
- PyObject* klass;
- PyObject* spec;
- bool immutable;
-} StructTypeArgs;
-
-/**
- * A cache of the item spec from a struct specification,
- * so we don't have to keep calling PyTuple_GET_ITEM.
- */
-typedef struct {
- int tag;
- TType type;
- PyObject* attrname;
- PyObject* typeargs;
- PyObject* defval;
-} StructItemSpec;
-
-/**
- * A cache of the two key attributes of a CReadableTransport,
- * so we don't have to keep calling PyObject_GetAttr.
- */
-typedef struct {
- PyObject* stringiobuf;
- PyObject* refill_callable;
-} DecodeBuffer;
-
-/** Pointer to interned string to speed up attribute lookup. */
-static PyObject* INTERN_STRING(cstringio_buf);
-/** Pointer to interned string to speed up attribute lookup. */
-static PyObject* INTERN_STRING(cstringio_refill);
-
-static inline bool
-check_ssize_t_32(Py_ssize_t len) {
- // error from getting the int
- if (INT_CONV_ERROR_OCCURRED(len)) {
- return false;
- }
- if (!CHECK_RANGE(len, 0, INT32_MAX)) {
- PyErr_SetString(PyExc_OverflowError, "size out of range: exceeded INT32_MAX");
- return false;
- }
- return true;
-}
-
-static inline bool
-check_length_limit(Py_ssize_t len, long limit) {
- if (!check_ssize_t_32(len)) {
- return false;
- }
- if (len > limit) {
- PyErr_Format(PyExc_OverflowError, "size exceeded specified limit: %d", limit);
- return false;
- }
- return true;
-}
-
-static inline bool
-parse_pyint(PyObject* o, int32_t* ret, int32_t min, int32_t max) {
- long val = PyInt_AsLong(o);
-
- if (INT_CONV_ERROR_OCCURRED(val)) {
- return false;
- }
- if (!CHECK_RANGE(val, min, max)) {
- PyErr_SetString(PyExc_OverflowError, "int out of range");
- return false;
- }
-
- *ret = (int32_t) val;
- return true;
-}
-
-static bool
-is_utf8(PyObject* typeargs) {
- return PyString_Check(typeargs) && !strncmp(PyString_AS_STRING(typeargs), "UTF8", 4);
-}
-
-/* --- FUNCTIONS TO PARSE STRUCT SPECIFICATOINS --- */
-
-static bool
-parse_set_list_args(SetListTypeArgs* dest, PyObject* typeargs) {
- if (PyTuple_Size(typeargs) != 3) {
- PyErr_SetString(PyExc_TypeError, "expecting tuple of size 3 for list/set type args");
- return false;
- }
-
- dest->element_type = PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 0));
- if (INT_CONV_ERROR_OCCURRED(dest->element_type)) {
- return false;
- }
-
- dest->typeargs = PyTuple_GET_ITEM(typeargs, 1);
-
- dest->immutable = Py_True == PyTuple_GET_ITEM(typeargs, 2);
-
- return true;
-}
-
-static bool
-parse_map_args(MapTypeArgs* dest, PyObject* typeargs) {
- if (PyTuple_Size(typeargs) != 5) {
- PyErr_SetString(PyExc_TypeError, "expecting 5 arguments for typeargs to map");
- return false;
- }
-
- dest->ktag = PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 0));
- if (INT_CONV_ERROR_OCCURRED(dest->ktag)) {
- return false;
- }
-
- dest->vtag = PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 2));
- if (INT_CONV_ERROR_OCCURRED(dest->vtag)) {
- return false;
- }
-
- dest->ktypeargs = PyTuple_GET_ITEM(typeargs, 1);
- dest->vtypeargs = PyTuple_GET_ITEM(typeargs, 3);
- dest->immutable = Py_True == PyTuple_GET_ITEM(typeargs, 4);
-
- return true;
-}
-
-static bool
-parse_struct_args(StructTypeArgs* dest, PyObject* typeargs) {
- if (PyTuple_Size(typeargs) != 2) {
- PyErr_SetString(PyExc_TypeError, "expecting tuple of size 2 for struct args");
- return false;
- }
-
- dest->klass = PyTuple_GET_ITEM(typeargs, 0);
- dest->spec = PyTuple_GET_ITEM(typeargs, 1);
-
- return true;
-}
-
-static int
-parse_struct_item_spec(StructItemSpec* dest, PyObject* spec_tuple) {
-
- // i'd like to use ParseArgs here, but it seems to be a bottleneck.
- if (PyTuple_Size(spec_tuple) != 5) {
- PyErr_Format(PyExc_TypeError, "expecting 5 arguments for spec tuple but got %d", (int)PyTuple_Size(spec_tuple));
- return false;
- }
-
- dest->tag = PyInt_AsLong(PyTuple_GET_ITEM(spec_tuple, 0));
- if (INT_CONV_ERROR_OCCURRED(dest->tag)) {
- return false;
- }
-
- dest->type = PyInt_AsLong(PyTuple_GET_ITEM(spec_tuple, 1));
- if (INT_CONV_ERROR_OCCURRED(dest->type)) {
- return false;
- }
-
- dest->attrname = PyTuple_GET_ITEM(spec_tuple, 2);
- dest->typeargs = PyTuple_GET_ITEM(spec_tuple, 3);
- dest->defval = PyTuple_GET_ITEM(spec_tuple, 4);
- return true;
-}
-
-/* ====== END UTILITIES ====== */
-
-
-/* ====== BEGIN WRITING FUNCTIONS ====== */
-
-/* --- LOW-LEVEL WRITING FUNCTIONS --- */
-
-static void writeByte(PyObject* outbuf, int8_t val) {
- int8_t net = val;
- PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int8_t));
-}
-
-static void writeI16(PyObject* outbuf, int16_t val) {
- int16_t net = (int16_t)htons(val);
- PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int16_t));
-}
-
-static void writeI32(PyObject* outbuf, int32_t val) {
- int32_t net = (int32_t)htonl(val);
- PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int32_t));
-}
-
-static void writeI64(PyObject* outbuf, int64_t val) {
- int64_t net = (int64_t)htonll(val);
- PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int64_t));
-}
-
-static void writeDouble(PyObject* outbuf, double dub) {
- // Unfortunately, bitwise_cast doesn't work in C. Bad C!
- union {
- double f;
- int64_t t;
- } transfer;
- transfer.f = dub;
- writeI64(outbuf, transfer.t);
-}
-
-
-/* --- MAIN RECURSIVE OUTPUT FUNCTION -- */
-
-static bool
-output_val(PyObject* output, PyObject* value, TType type, PyObject* typeargs) {
- /*
- * Refcounting Strategy:
- *
- * We assume that elements of the thrift_spec tuple are not going to be
- * mutated, so we don't ref count those at all. Other than that, we try to
- * keep a reference to all the user-created objects while we work with them.
- * output_val assumes that a reference is already held. The *caller* is
- * responsible for handling references
- */
-
- switch (type) {
-
- case T_BOOL: {
- int v = PyObject_IsTrue(value);
- if (v == -1) {
- return false;
- }
-
- writeByte(output, (int8_t) v);
- break;
- }
- case T_I08: {
- int32_t val;
-
- if (!parse_pyint(value, &val, INT8_MIN, INT8_MAX)) {
- return false;
- }
-
- writeByte(output, (int8_t) val);
- break;
- }
- case T_I16: {
- int32_t val;
-
- if (!parse_pyint(value, &val, INT16_MIN, INT16_MAX)) {
- return false;
- }
-
- writeI16(output, (int16_t) val);
- break;
- }
- case T_I32: {
- int32_t val;
-
- if (!parse_pyint(value, &val, INT32_MIN, INT32_MAX)) {
- return false;
- }
-
- writeI32(output, val);
- break;
- }
- case T_I64: {
- int64_t nval = PyLong_AsLongLong(value);
-
- if (INT_CONV_ERROR_OCCURRED(nval)) {
- return false;
- }
-
- if (!CHECK_RANGE(nval, INT64_MIN, INT64_MAX)) {
- PyErr_SetString(PyExc_OverflowError, "int out of range");
- return false;
- }
-
- writeI64(output, nval);
- break;
- }
-
- case T_DOUBLE: {
- double nval = PyFloat_AsDouble(value);
- if (nval == -1.0 && PyErr_Occurred()) {
- return false;
- }
-
- writeDouble(output, nval);
- break;
- }
-
- case T_STRING: {
- Py_ssize_t len = 0;
- if (is_utf8(typeargs) && PyUnicode_Check(value))
- value = PyUnicode_AsUTF8String(value);
- len = PyString_Size(value);
-
- if (!check_ssize_t_32(len)) {
- return false;
- }
-
- writeI32(output, (int32_t) len);
- PycStringIO->cwrite(output, PyString_AsString(value), (int32_t) len);
- break;
- }
-
- case T_LIST:
- case T_SET: {
- Py_ssize_t len;
- SetListTypeArgs parsedargs;
- PyObject *item;
- PyObject *iterator;
-
- if (!parse_set_list_args(&parsedargs, typeargs)) {
- return false;
- }
-
- len = PyObject_Length(value);
-
- if (!check_ssize_t_32(len)) {
- return false;
- }
-
- writeByte(output, parsedargs.element_type);
- writeI32(output, (int32_t) len);
-
- iterator = PyObject_GetIter(value);
- if (iterator == NULL) {
- return false;
- }
-
- while ((item = PyIter_Next(iterator))) {
- if (!output_val(output, item, parsedargs.element_type, parsedargs.typeargs)) {
- Py_DECREF(item);
- Py_DECREF(iterator);
- return false;
- }
- Py_DECREF(item);
- }
-
- Py_DECREF(iterator);
-
- if (PyErr_Occurred()) {
- return false;
- }
-
- break;
- }
-
- case T_MAP: {
- PyObject *k, *v;
- Py_ssize_t pos = 0;
- Py_ssize_t len;
-
- MapTypeArgs parsedargs;
-
- len = PyDict_Size(value);
- if (!check_ssize_t_32(len)) {
- return false;
- }
-
- if (!parse_map_args(&parsedargs, typeargs)) {
- return false;
- }
-
- writeByte(output, parsedargs.ktag);
- writeByte(output, parsedargs.vtag);
- writeI32(output, len);
-
- // TODO(bmaurer): should support any mapping, not just dicts
- while (PyDict_Next(value, &pos, &k, &v)) {
- // TODO(dreiss): Think hard about whether these INCREFs actually
- // turn any unsafe scenarios into safe scenarios.
- Py_INCREF(k);
- Py_INCREF(v);
-
- if (!output_val(output, k, parsedargs.ktag, parsedargs.ktypeargs)
- || !output_val(output, v, parsedargs.vtag, parsedargs.vtypeargs)) {
- Py_DECREF(k);
- Py_DECREF(v);
- return false;
- }
- Py_DECREF(k);
- Py_DECREF(v);
- }
- break;
- }
-
- // TODO(dreiss): Consider breaking this out as a function
- // the way we did for decode_struct.
- case T_STRUCT: {
- StructTypeArgs parsedargs;
- Py_ssize_t nspec;
- Py_ssize_t i;
-
- if (!parse_struct_args(&parsedargs, typeargs)) {
- return false;
- }
-
- nspec = PyTuple_Size(parsedargs.spec);
-
- if (nspec == -1) {
- return false;
- }
-
- for (i = 0; i < nspec; i++) {
- StructItemSpec parsedspec;
- PyObject* spec_tuple;
- PyObject* instval = NULL;
-
- spec_tuple = PyTuple_GET_ITEM(parsedargs.spec, i);
- if (spec_tuple == Py_None) {
- continue;
- }
-
- if (!parse_struct_item_spec (&parsedspec, spec_tuple)) {
- return false;
- }
-
- instval = PyObject_GetAttr(value, parsedspec.attrname);
-
- if (!instval) {
- return false;
- }
-
- if (instval == Py_None) {
- Py_DECREF(instval);
- continue;
- }
-
- writeByte(output, (int8_t) parsedspec.type);
- writeI16(output, parsedspec.tag);
-
- if (!output_val(output, instval, parsedspec.type, parsedspec.typeargs)) {
- Py_DECREF(instval);
- return false;
- }
-
- Py_DECREF(instval);
- }
-
- writeByte(output, (int8_t)T_STOP);
- break;
- }
-
- case T_STOP:
- case T_VOID:
- case T_UTF16:
- case T_UTF8:
- case T_U64:
- default:
- PyErr_SetString(PyExc_TypeError, "Unexpected TType");
- return false;
-
- }
-
- return true;
-}
-
-
-/* --- TOP-LEVEL WRAPPER FOR OUTPUT -- */
-
-static PyObject *
-encode_binary(PyObject *self, PyObject *args) {
- PyObject* enc_obj;
- PyObject* type_args;
- PyObject* buf;
- PyObject* ret = NULL;
-
- if (!PyArg_ParseTuple(args, "OO", &enc_obj, &type_args)) {
- return NULL;
- }
-
- buf = PycStringIO->NewOutput(INIT_OUTBUF_SIZE);
- if (output_val(buf, enc_obj, T_STRUCT, type_args)) {
- ret = PycStringIO->cgetvalue(buf);
- }
-
- Py_DECREF(buf);
- return ret;
-}
-
-/* ====== END WRITING FUNCTIONS ====== */
-
-
-/* ====== BEGIN READING FUNCTIONS ====== */
-
-/* --- LOW-LEVEL READING FUNCTIONS --- */
-
-static void
-free_decodebuf(DecodeBuffer* d) {
- Py_XDECREF(d->stringiobuf);
- Py_XDECREF(d->refill_callable);
-}
-
-static bool
-decode_buffer_from_obj(DecodeBuffer* dest, PyObject* obj) {
- dest->stringiobuf = PyObject_GetAttr(obj, INTERN_STRING(cstringio_buf));
- if (!dest->stringiobuf) {
- return false;
- }
-
- if (!PycStringIO_InputCheck(dest->stringiobuf)) {
- free_decodebuf(dest);
- PyErr_SetString(PyExc_TypeError, "expecting stringio input");
- return false;
- }
-
- dest->refill_callable = PyObject_GetAttr(obj, INTERN_STRING(cstringio_refill));
-
- if(!dest->refill_callable) {
- free_decodebuf(dest);
- return false;
- }
-
- if (!PyCallable_Check(dest->refill_callable)) {
- free_decodebuf(dest);
- PyErr_SetString(PyExc_TypeError, "expecting callable");
- return false;
- }
-
- return true;
-}
-
-static bool readBytes(DecodeBuffer* input, char** output, int len) {
- int read;
-
- // TODO(dreiss): Don't fear the malloc. Think about taking a copy of
- // the partial read instead of forcing the transport
- // to prepend it to its buffer.
-
- read = PycStringIO->cread(input->stringiobuf, output, len);
-
- if (read == len) {
- return true;
- } else if (read == -1) {
- return false;
- } else {
- PyObject* newiobuf;
-
- // using building functions as this is a rare codepath
- newiobuf = PyObject_CallFunction(
- input->refill_callable, "s#i", *output, read, len, NULL);
- if (newiobuf == NULL) {
- return false;
- }
-
- // must do this *AFTER* the call so that we don't deref the io buffer
- Py_CLEAR(input->stringiobuf);
- input->stringiobuf = newiobuf;
-
- read = PycStringIO->cread(input->stringiobuf, output, len);
-
- if (read == len) {
- return true;
- } else if (read == -1) {
- return false;
- } else {
- // TODO(dreiss): This could be a valid code path for big binary blobs.
- PyErr_SetString(PyExc_TypeError,
- "refill claimed to have refilled the buffer, but didn't!!");
- return false;
- }
- }
-}
-
-static int8_t readByte(DecodeBuffer* input) {
- char* buf;
- if (!readBytes(input, &buf, sizeof(int8_t))) {
- return -1;
- }
-
- return *(int8_t*) buf;
-}
-
-static int16_t readI16(DecodeBuffer* input) {
- char* buf;
- if (!readBytes(input, &buf, sizeof(int16_t))) {
- return -1;
- }
-
- return (int16_t) ntohs(*(int16_t*) buf);
-}
-
-static int32_t readI32(DecodeBuffer* input) {
- char* buf;
- if (!readBytes(input, &buf, sizeof(int32_t))) {
- return -1;
- }
- return (int32_t) ntohl(*(int32_t*) buf);
-}
-
-
-static int64_t readI64(DecodeBuffer* input) {
- char* buf;
- if (!readBytes(input, &buf, sizeof(int64_t))) {
- return -1;
- }
-
- return (int64_t) ntohll(*(int64_t*) buf);
-}
-
-static double readDouble(DecodeBuffer* input) {
- union {
- int64_t f;
- double t;
- } transfer;
-
- transfer.f = readI64(input);
- if (transfer.f == -1) {
- return -1;
- }
- return transfer.t;
-}
-
-static bool
-checkTypeByte(DecodeBuffer* input, TType expected) {
- TType got = readByte(input);
- if (INT_CONV_ERROR_OCCURRED(got)) {
- return false;
- }
-
- if (expected != got) {
- PyErr_SetString(PyExc_TypeError, "got wrong ttype while reading field");
- return false;
- }
- return true;
-}
-
-static bool
-skip(DecodeBuffer* input, TType type) {
-#define SKIPBYTES(n) \
- do { \
- if (!readBytes(input, &dummy_buf, (n))) { \
- return false; \
- } \
- } while(0)
-
- char* dummy_buf;
-
- switch (type) {
-
- case T_BOOL:
- case T_I08: SKIPBYTES(1); break;
- case T_I16: SKIPBYTES(2); break;
- case T_I32: SKIPBYTES(4); break;
- case T_I64:
- case T_DOUBLE: SKIPBYTES(8); break;
-
- case T_STRING: {
- // TODO(dreiss): Find out if these check_ssize_t32s are really necessary.
- int len = readI32(input);
- if (!check_ssize_t_32(len)) {
- return false;
- }
- SKIPBYTES(len);
- break;
- }
-
- case T_LIST:
- case T_SET: {
- TType etype;
- int len, i;
-
- etype = readByte(input);
- if (etype == -1) {
- return false;
- }
-
- len = readI32(input);
- if (!check_ssize_t_32(len)) {
- return false;
- }
-
- for (i = 0; i < len; i++) {
- if (!skip(input, etype)) {
- return false;
- }
- }
- break;
- }
-
- case T_MAP: {
- TType ktype, vtype;
- int len, i;
-
- ktype = readByte(input);
- if (ktype == -1) {
- return false;
- }
-
- vtype = readByte(input);
- if (vtype == -1) {
- return false;
- }
-
- len = readI32(input);
- if (!check_ssize_t_32(len)) {
- return false;
- }
-
- for (i = 0; i < len; i++) {
- if (!(skip(input, ktype) && skip(input, vtype))) {
- return false;
- }
- }
- break;
- }
-
- case T_STRUCT: {
- while (true) {
- TType type;
-
- type = readByte(input);
- if (type == -1) {
- return false;
- }
-
- if (type == T_STOP)
- break;
-
- SKIPBYTES(2); // tag
- if (!skip(input, type)) {
- return false;
- }
- }
- break;
- }
-
- case T_STOP:
- case T_VOID:
- case T_UTF16:
- case T_UTF8:
- case T_U64:
- default:
- PyErr_SetString(PyExc_TypeError, "Unexpected TType");
- return false;
-
- }
-
- return true;
-
-#undef SKIPBYTES
-}
-
-
-/* --- HELPER FUNCTION FOR DECODE_VAL --- */
-
-static PyObject*
-decode_val(DecodeBuffer* input, TType type, PyObject* typeargs, long string_limit, long container_limit);
-
-static PyObject*
-decode_struct(DecodeBuffer* input, PyObject* output, PyObject* klass, PyObject* spec_seq, long string_limit, long container_limit) {
- int spec_seq_len = PyTuple_Size(spec_seq);
- bool immutable = output == Py_None;
- PyObject* kwargs = NULL;
- if (spec_seq_len == -1) {
- return NULL;
- }
-
- if (immutable) {
- kwargs = PyDict_New();
- if (!kwargs) {
- PyErr_SetString(PyExc_TypeError, "failed to prepare kwargument storage");
- return NULL;
- }
- }
-
- while (true) {
- TType type;
- int16_t tag;
- PyObject* item_spec;
- PyObject* fieldval = NULL;
- StructItemSpec parsedspec;
-
- type = readByte(input);
- if (type == -1) {
- goto error;
- }
- if (type == T_STOP) {
- break;
- }
- tag = readI16(input);
- if (INT_CONV_ERROR_OCCURRED(tag)) {
- goto error;
- }
- if (tag >= 0 && tag < spec_seq_len) {
- item_spec = PyTuple_GET_ITEM(spec_seq, tag);
- } else {
- item_spec = Py_None;
- }
-
- if (item_spec == Py_None) {
- if (!skip(input, type)) {
- goto error;
- } else {
- continue;
- }
- }
-
- if (!parse_struct_item_spec(&parsedspec, item_spec)) {
- goto error;
- }
- if (parsedspec.type != type) {
- if (!skip(input, type)) {
- PyErr_Format(PyExc_TypeError, "struct field had wrong type: expected %d but got %d", parsedspec.type, type);
- goto error;
- } else {
- continue;
- }
- }
-
- fieldval = decode_val(input, parsedspec.type, parsedspec.typeargs, string_limit, container_limit);
- if (fieldval == NULL) {
- goto error;
- }
-
- if ((immutable && PyDict_SetItem(kwargs, parsedspec.attrname, fieldval) == -1)
- || (!immutable && PyObject_SetAttr(output, parsedspec.attrname, fieldval) == -1)) {
- Py_DECREF(fieldval);
- goto error;
- }
- Py_DECREF(fieldval);
- }
- if (immutable) {
- PyObject* args = PyTuple_New(0);
- PyObject* ret = NULL;
- if (!args) {
- PyErr_SetString(PyExc_TypeError, "failed to prepare argument storage");
- goto error;
- }
- ret = PyObject_Call(klass, args, kwargs);
- Py_DECREF(kwargs);
- Py_DECREF(args);
- return ret;
- }
- Py_INCREF(output);
- return output;
-
- error:
- Py_XDECREF(kwargs);
- return NULL;
-}
-
-
-/* --- MAIN RECURSIVE INPUT FUNCTION --- */
-
-// Returns a new reference.
-static PyObject*
-decode_val(DecodeBuffer* input, TType type, PyObject* typeargs, long string_limit, long container_limit) {
- switch (type) {
-
- case T_BOOL: {
- int8_t v = readByte(input);
- if (INT_CONV_ERROR_OCCURRED(v)) {
- return NULL;
- }
-
- switch (v) {
- case 0: Py_RETURN_FALSE;
- case 1: Py_RETURN_TRUE;
- // Don't laugh. This is a potentially serious issue.
- default: PyErr_SetString(PyExc_TypeError, "boolean out of range"); return NULL;
- }
- break;
- }
- case T_I08: {
- int8_t v = readByte(input);
- if (INT_CONV_ERROR_OCCURRED(v)) {
- return NULL;
- }
-
- return PyInt_FromLong(v);
- }
- case T_I16: {
- int16_t v = readI16(input);
- if (INT_CONV_ERROR_OCCURRED(v)) {
- return NULL;
- }
- return PyInt_FromLong(v);
- }
- case T_I32: {
- int32_t v = readI32(input);
- if (INT_CONV_ERROR_OCCURRED(v)) {
- return NULL;
- }
- return PyInt_FromLong(v);
- }
-
- case T_I64: {
- int64_t v = readI64(input);
- if (INT_CONV_ERROR_OCCURRED(v)) {
- return NULL;
- }
- // TODO(dreiss): Find out if we can take this fastpath always when
- // sizeof(long) == sizeof(long long).
- if (CHECK_RANGE(v, LONG_MIN, LONG_MAX)) {
- return PyInt_FromLong((long) v);
- }
-
- return PyLong_FromLongLong(v);
- }
-
- case T_DOUBLE: {
- double v = readDouble(input);
- if (v == -1.0 && PyErr_Occurred()) {
- return false;
- }
- return PyFloat_FromDouble(v);
- }
-
- case T_STRING: {
- Py_ssize_t len = readI32(input);
- char* buf;
- if (!readBytes(input, &buf, len)) {
- return NULL;
- }
- if (!check_length_limit(len, string_limit)) {
- return NULL;
- }
-
- if (is_utf8(typeargs))
- return PyUnicode_DecodeUTF8(buf, len, 0);
- else
- return PyString_FromStringAndSize(buf, len);
- }
-
- case T_LIST:
- case T_SET: {
- SetListTypeArgs parsedargs;
- int32_t len;
- PyObject* ret = NULL;
- int i;
- bool use_tuple = false;
-
- if (!parse_set_list_args(&parsedargs, typeargs)) {
- return NULL;
- }
-
- if (!checkTypeByte(input, parsedargs.element_type)) {
- return NULL;
- }
-
- len = readI32(input);
- if (!check_length_limit(len, container_limit)) {
- return NULL;
- }
-
- use_tuple = type == T_LIST && parsedargs.immutable;
- ret = use_tuple ? PyTuple_New(len) : PyList_New(len);
- if (!ret) {
- return NULL;
- }
-
- for (i = 0; i < len; i++) {
- PyObject* item = decode_val(input, parsedargs.element_type, parsedargs.typeargs, string_limit, container_limit);
- if (!item) {
- Py_DECREF(ret);
- return NULL;
- }
- if (use_tuple) {
- PyTuple_SET_ITEM(ret, i, item);
- } else {
- PyList_SET_ITEM(ret, i, item);
- }
- }
-
- // TODO(dreiss): Consider biting the bullet and making two separate cases
- // for list and set, avoiding this post facto conversion.
- if (type == T_SET) {
- PyObject* setret;
- setret = parsedargs.immutable ? PyFrozenSet_New(ret) : PySet_New(ret);
- Py_DECREF(ret);
- return setret;
- }
- return ret;
- }
-
- case T_MAP: {
- int32_t len;
- int i;
- MapTypeArgs parsedargs;
- PyObject* ret = NULL;
-
- if (!parse_map_args(&parsedargs, typeargs)) {
- return NULL;
- }
-
- if (!checkTypeByte(input, parsedargs.ktag)) {
- return NULL;
- }
- if (!checkTypeByte(input, parsedargs.vtag)) {
- return NULL;
- }
-
- len = readI32(input);
- if (!check_length_limit(len, container_limit)) {
- return NULL;
- }
-
- ret = PyDict_New();
- if (!ret) {
- goto error;
- }
-
- for (i = 0; i < len; i++) {
- PyObject* k = NULL;
- PyObject* v = NULL;
- k = decode_val(input, parsedargs.ktag, parsedargs.ktypeargs, string_limit, container_limit);
- if (k == NULL) {
- goto loop_error;
- }
- v = decode_val(input, parsedargs.vtag, parsedargs.vtypeargs, string_limit, container_limit);
- if (v == NULL) {
- goto loop_error;
- }
- if (PyDict_SetItem(ret, k, v) == -1) {
- goto loop_error;
- }
-
- Py_DECREF(k);
- Py_DECREF(v);
- continue;
-
- // Yuck! Destructors, anyone?
- loop_error:
- Py_XDECREF(k);
- Py_XDECREF(v);
- goto error;
- }
-
- if (parsedargs.immutable) {
- PyObject* thrift = PyImport_ImportModule("thrift.Thrift");
- PyObject* cls = NULL;
- PyObject* arg = NULL;
- if (!thrift) {
- goto error;
- }
- cls = PyObject_GetAttrString(thrift, "TFrozenDict");
- if (!cls) {
- goto error;
- }
- arg = PyTuple_New(1);
- PyTuple_SET_ITEM(arg, 0, ret);
- return PyObject_CallObject(cls, arg);
- }
-
- return ret;
-
- error:
- Py_XDECREF(ret);
- return NULL;
- }
-
- case T_STRUCT: {
- StructTypeArgs parsedargs;
- if (!parse_struct_args(&parsedargs, typeargs)) {
- return NULL;
- }
-
- return decode_struct(input, Py_None, parsedargs.klass, parsedargs.spec, string_limit, container_limit);
- }
-
- case T_STOP:
- case T_VOID:
- case T_UTF16:
- case T_UTF8:
- case T_U64:
- default:
- PyErr_SetString(PyExc_TypeError, "Unexpected TType");
- return NULL;
- }
-}
-
-static long as_long_or(PyObject* value, long default_value) {
- long v = PyInt_AsLong(value);
- if (INT_CONV_ERROR_OCCURRED(v)) {
- PyErr_Clear();
- return default_value;
- }
- return v;
-}
-
-
-/* --- TOP-LEVEL WRAPPER FOR INPUT -- */
-
-static PyObject*
-decode_binary(PyObject *self, PyObject *args) {
- PyObject* output_obj = NULL;
- PyObject* transport = NULL;
- PyObject* typeargs = NULL;
- StructTypeArgs parsedargs;
- PyObject* string_limit_obj = NULL;
- PyObject* container_limit_obj = NULL;
- long string_limit = 0;
- long container_limit = 0;
- DecodeBuffer input = {0, 0};
- PyObject* ret = NULL;
-
- if (!PyArg_ParseTuple(args, "OOOOO", &output_obj, &transport, &typeargs, &string_limit_obj, &container_limit_obj)) {
- return NULL;
- }
- string_limit = as_long_or(string_limit_obj, INT32_MAX);
- container_limit = as_long_or(container_limit_obj, INT32_MAX);
-
- if (!parse_struct_args(&parsedargs, typeargs)) {
- return NULL;
- }
-
- if (!decode_buffer_from_obj(&input, transport)) {
- return NULL;
- }
-
- ret = decode_struct(&input, output_obj, parsedargs.klass, parsedargs.spec, string_limit, container_limit);
- free_decodebuf(&input);
- return ret;
-}
-
-/* ====== END READING FUNCTIONS ====== */
-
-
-/* -- PYTHON MODULE SETUP STUFF --- */
-
-static PyMethodDef ThriftFastBinaryMethods[] = {
-
- {"encode_binary", encode_binary, METH_VARARGS, ""},
- {"decode_binary", decode_binary, METH_VARARGS, ""},
-
- {NULL, NULL, 0, NULL} /* Sentinel */
-};
-
-PyMODINIT_FUNC
-initfastbinary(void) {
-#define INIT_INTERN_STRING(value) \
- do { \
- INTERN_STRING(value) = PyString_InternFromString(#value); \
- if(!INTERN_STRING(value)) return; \
- } while(0)
-
- INIT_INTERN_STRING(cstringio_buf);
- INIT_INTERN_STRING(cstringio_refill);
-#undef INIT_INTERN_STRING
-
- PycString_IMPORT;
- if (PycStringIO == NULL) return;
-
- (void) Py_InitModule("thrift.protocol.fastbinary", ThriftFastBinaryMethods);
-}