blob: da57c8559334caec9cd7c93362881fdf148b1e48 [file] [log] [blame]
David Reissea2cba82009-03-30 21:35:00 +00001/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
David Reiss382fc302007-08-25 18:01:30 +000019
20#include <Python.h>
21#include "cStringIO.h"
David Reiss382fc302007-08-25 18:01:30 +000022#include <stdint.h>
Roger Meierc3f033f2011-09-13 13:54:05 +000023#ifndef _WIN32
24# include <stdbool.h>
25# include <netinet/in.h>
26#else
27# include <WinSock2.h>
28# pragma comment (lib, "ws2_32.lib")
29# define BIG_ENDIAN (4321)
30# define LITTLE_ENDIAN (1234)
31# define BYTE_ORDER LITTLE_ENDIAN
32# if defined(_MSC_VER) && _MSC_VER < 1600
33 typedef int _Bool;
34# define bool _Bool
Roger Meier7daf00c2015-06-03 11:45:35 +020035# define false 0
Roger Meierc3f033f2011-09-13 13:54:05 +000036# define true 1
37# endif
38# define inline __inline
39#endif
David Reiss382fc302007-08-25 18:01:30 +000040
David Reiss49809102009-03-18 23:14:13 +000041/* Fix endianness issues on Solaris */
42#if defined (__SVR4) && defined (__sun)
43 #if defined(__i386) && !defined(__i386__)
44 #define __i386__
45 #endif
46
47 #ifndef BIG_ENDIAN
48 #define BIG_ENDIAN (4321)
49 #endif
50 #ifndef LITTLE_ENDIAN
51 #define LITTLE_ENDIAN (1234)
52 #endif
53
54 /* I386 is LE, even on Solaris */
55 #if !defined(BYTE_ORDER) && defined(__i386__)
56 #define BYTE_ORDER LITTLE_ENDIAN
57 #endif
58#endif
59
David Reiss382fc302007-08-25 18:01:30 +000060// TODO(dreiss): defval appears to be unused. Look into removing it.
61// TODO(dreiss): Make parse_spec_args recursive, and cache the output
62// permanently in the object. (Malloc and orphan.)
63// TODO(dreiss): Why do we need cStringIO for reading, why not just char*?
64// Can cStringIO let us work with a BufferedTransport?
65// TODO(dreiss): Don't ignore the rv from cwrite (maybe).
66
67/* ====== BEGIN UTILITIES ====== */
68
69#define INIT_OUTBUF_SIZE 128
70
71// Stolen out of TProtocol.h.
72// It would be a huge pain to have both get this from one place.
73typedef enum TType {
74 T_STOP = 0,
75 T_VOID = 1,
76 T_BOOL = 2,
77 T_BYTE = 3,
78 T_I08 = 3,
79 T_I16 = 6,
80 T_I32 = 8,
81 T_U64 = 9,
82 T_I64 = 10,
83 T_DOUBLE = 4,
84 T_STRING = 11,
85 T_UTF7 = 11,
86 T_STRUCT = 12,
87 T_MAP = 13,
88 T_SET = 14,
89 T_LIST = 15,
90 T_UTF8 = 16,
91 T_UTF16 = 17
92} TType;
93
David Reissfdd8b5a2009-02-17 20:06:08 +000094#ifndef __BYTE_ORDER
95# if defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN)
96# define __BYTE_ORDER BYTE_ORDER
97# define __LITTLE_ENDIAN LITTLE_ENDIAN
98# define __BIG_ENDIAN BIG_ENDIAN
99# else
100# error "Cannot determine endianness"
101# endif
102#endif
103
David Reiss382fc302007-08-25 18:01:30 +0000104// Same comment as the enum. Sorry.
105#if __BYTE_ORDER == __BIG_ENDIAN
106# define ntohll(n) (n)
107# define htonll(n) (n)
108#elif __BYTE_ORDER == __LITTLE_ENDIAN
109# if defined(__GNUC__) && defined(__GLIBC__)
110# include <byteswap.h>
111# define ntohll(n) bswap_64(n)
112# define htonll(n) bswap_64(n)
113# else /* GNUC & GLIBC */
114# define ntohll(n) ( (((unsigned long long)ntohl(n)) << 32) + ntohl(n >> 32) )
115# define htonll(n) ( (((unsigned long long)htonl(n)) << 32) + htonl(n >> 32) )
116# endif /* GNUC & GLIBC */
117#else /* __BYTE_ORDER */
118# error "Can't define htonll or ntohll!"
119#endif
120
121// Doing a benchmark shows that interning actually makes a difference, amazingly.
122#define INTERN_STRING(value) _intern_ ## value
123
124#define INT_CONV_ERROR_OCCURRED(v) ( ((v) == -1) && PyErr_Occurred() )
125#define CHECK_RANGE(v, min, max) ( ((v) <= (max)) && ((v) >= (min)) )
126
David Reiss382fc302007-08-25 18:01:30 +0000127/**
128 * A cache of the spec_args for a set or list,
129 * so we don't have to keep calling PyTuple_GET_ITEM.
130 */
131typedef struct {
132 TType element_type;
133 PyObject* typeargs;
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900134 bool immutable;
David Reiss382fc302007-08-25 18:01:30 +0000135} SetListTypeArgs;
136
137/**
138 * A cache of the spec_args for a map,
139 * so we don't have to keep calling PyTuple_GET_ITEM.
140 */
141typedef struct {
142 TType ktag;
143 TType vtag;
144 PyObject* ktypeargs;
145 PyObject* vtypeargs;
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900146 bool immutable;
David Reiss382fc302007-08-25 18:01:30 +0000147} MapTypeArgs;
148
149/**
150 * A cache of the spec_args for a struct,
151 * so we don't have to keep calling PyTuple_GET_ITEM.
152 */
153typedef struct {
154 PyObject* klass;
155 PyObject* spec;
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900156 bool immutable;
David Reiss382fc302007-08-25 18:01:30 +0000157} StructTypeArgs;
158
159/**
160 * A cache of the item spec from a struct specification,
161 * so we don't have to keep calling PyTuple_GET_ITEM.
162 */
163typedef struct {
164 int tag;
165 TType type;
166 PyObject* attrname;
167 PyObject* typeargs;
168 PyObject* defval;
169} StructItemSpec;
170
171/**
172 * A cache of the two key attributes of a CReadableTransport,
173 * so we don't have to keep calling PyObject_GetAttr.
174 */
175typedef struct {
176 PyObject* stringiobuf;
177 PyObject* refill_callable;
178} DecodeBuffer;
179
180/** Pointer to interned string to speed up attribute lookup. */
181static PyObject* INTERN_STRING(cstringio_buf);
182/** Pointer to interned string to speed up attribute lookup. */
183static PyObject* INTERN_STRING(cstringio_refill);
184
185static inline bool
186check_ssize_t_32(Py_ssize_t len) {
187 // error from getting the int
188 if (INT_CONV_ERROR_OCCURRED(len)) {
189 return false;
190 }
191 if (!CHECK_RANGE(len, 0, INT32_MAX)) {
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +0900192 PyErr_SetString(PyExc_OverflowError, "size out of range: exceeded INT32_MAX");
David Reiss382fc302007-08-25 18:01:30 +0000193 return false;
194 }
195 return true;
196}
197
Roger Meier7daf00c2015-06-03 11:45:35 +0200198static inline bool
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +0900199check_length_limit(Py_ssize_t len, long limit) {
200 if (!check_ssize_t_32(len)) {
Roger Meier7daf00c2015-06-03 11:45:35 +0200201 return false;
202 }
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +0900203 if (len > limit) {
204 PyErr_Format(PyExc_OverflowError, "size exceeded specified limit: %d", limit);
Roger Meier7daf00c2015-06-03 11:45:35 +0200205 return false;
206 }
207 return true;
208}
209
David Reiss382fc302007-08-25 18:01:30 +0000210static inline bool
211parse_pyint(PyObject* o, int32_t* ret, int32_t min, int32_t max) {
212 long val = PyInt_AsLong(o);
213
214 if (INT_CONV_ERROR_OCCURRED(val)) {
215 return false;
216 }
217 if (!CHECK_RANGE(val, min, max)) {
218 PyErr_SetString(PyExc_OverflowError, "int out of range");
219 return false;
220 }
221
222 *ret = (int32_t) val;
223 return true;
224}
225
Nobuaki Sukegawa4733db42016-01-05 02:50:57 +0900226static bool
227is_utf8(PyObject* typeargs) {
Nobuaki Sukegawaa3b88a02016-01-06 20:44:17 +0900228 return PyString_Check(typeargs) && !strncmp(PyString_AS_STRING(typeargs), "UTF8", 4);
Nobuaki Sukegawa4733db42016-01-05 02:50:57 +0900229}
David Reiss382fc302007-08-25 18:01:30 +0000230
231/* --- FUNCTIONS TO PARSE STRUCT SPECIFICATOINS --- */
232
233static bool
234parse_set_list_args(SetListTypeArgs* dest, PyObject* typeargs) {
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900235 if (PyTuple_Size(typeargs) != 3) {
236 PyErr_SetString(PyExc_TypeError, "expecting tuple of size 3 for list/set type args");
David Reiss382fc302007-08-25 18:01:30 +0000237 return false;
238 }
239
240 dest->element_type = PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 0));
241 if (INT_CONV_ERROR_OCCURRED(dest->element_type)) {
242 return false;
243 }
244
245 dest->typeargs = PyTuple_GET_ITEM(typeargs, 1);
246
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900247 dest->immutable = Py_True == PyTuple_GET_ITEM(typeargs, 2);
248
David Reiss382fc302007-08-25 18:01:30 +0000249 return true;
250}
251
252static bool
253parse_map_args(MapTypeArgs* dest, PyObject* typeargs) {
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900254 if (PyTuple_Size(typeargs) != 5) {
255 PyErr_SetString(PyExc_TypeError, "expecting 5 arguments for typeargs to map");
David Reiss382fc302007-08-25 18:01:30 +0000256 return false;
257 }
258
259 dest->ktag = PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 0));
260 if (INT_CONV_ERROR_OCCURRED(dest->ktag)) {
261 return false;
262 }
263
264 dest->vtag = PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 2));
265 if (INT_CONV_ERROR_OCCURRED(dest->vtag)) {
266 return false;
267 }
268
269 dest->ktypeargs = PyTuple_GET_ITEM(typeargs, 1);
270 dest->vtypeargs = PyTuple_GET_ITEM(typeargs, 3);
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900271 dest->immutable = Py_True == PyTuple_GET_ITEM(typeargs, 4);
David Reiss382fc302007-08-25 18:01:30 +0000272
273 return true;
274}
275
276static bool
277parse_struct_args(StructTypeArgs* dest, PyObject* typeargs) {
278 if (PyTuple_Size(typeargs) != 2) {
279 PyErr_SetString(PyExc_TypeError, "expecting tuple of size 2 for struct args");
280 return false;
281 }
282
283 dest->klass = PyTuple_GET_ITEM(typeargs, 0);
284 dest->spec = PyTuple_GET_ITEM(typeargs, 1);
285
286 return true;
287}
288
289static int
290parse_struct_item_spec(StructItemSpec* dest, PyObject* spec_tuple) {
291
292 // i'd like to use ParseArgs here, but it seems to be a bottleneck.
293 if (PyTuple_Size(spec_tuple) != 5) {
Nobuaki Sukegawa7b894692015-12-23 21:45:06 +0900294 PyErr_Format(PyExc_TypeError, "expecting 5 arguments for spec tuple but got %d", (int)PyTuple_Size(spec_tuple));
David Reiss382fc302007-08-25 18:01:30 +0000295 return false;
296 }
297
298 dest->tag = PyInt_AsLong(PyTuple_GET_ITEM(spec_tuple, 0));
299 if (INT_CONV_ERROR_OCCURRED(dest->tag)) {
300 return false;
301 }
302
303 dest->type = PyInt_AsLong(PyTuple_GET_ITEM(spec_tuple, 1));
304 if (INT_CONV_ERROR_OCCURRED(dest->type)) {
305 return false;
306 }
307
308 dest->attrname = PyTuple_GET_ITEM(spec_tuple, 2);
309 dest->typeargs = PyTuple_GET_ITEM(spec_tuple, 3);
310 dest->defval = PyTuple_GET_ITEM(spec_tuple, 4);
311 return true;
312}
313
314/* ====== END UTILITIES ====== */
315
316
317/* ====== BEGIN WRITING FUNCTIONS ====== */
318
319/* --- LOW-LEVEL WRITING FUNCTIONS --- */
320
321static void writeByte(PyObject* outbuf, int8_t val) {
322 int8_t net = val;
323 PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int8_t));
324}
325
326static void writeI16(PyObject* outbuf, int16_t val) {
327 int16_t net = (int16_t)htons(val);
328 PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int16_t));
329}
330
331static void writeI32(PyObject* outbuf, int32_t val) {
332 int32_t net = (int32_t)htonl(val);
333 PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int32_t));
334}
335
336static void writeI64(PyObject* outbuf, int64_t val) {
337 int64_t net = (int64_t)htonll(val);
338 PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int64_t));
339}
340
341static void writeDouble(PyObject* outbuf, double dub) {
342 // Unfortunately, bitwise_cast doesn't work in C. Bad C!
343 union {
344 double f;
345 int64_t t;
346 } transfer;
347 transfer.f = dub;
348 writeI64(outbuf, transfer.t);
349}
350
351
Konrad Grochowski3b5dacb2014-11-24 10:55:31 +0100352/* --- MAIN RECURSIVE OUTPUT FUNCTION -- */
David Reiss382fc302007-08-25 18:01:30 +0000353
Nobuaki Sukegawa7b894692015-12-23 21:45:06 +0900354static bool
David Reiss382fc302007-08-25 18:01:30 +0000355output_val(PyObject* output, PyObject* value, TType type, PyObject* typeargs) {
356 /*
357 * Refcounting Strategy:
358 *
359 * We assume that elements of the thrift_spec tuple are not going to be
360 * mutated, so we don't ref count those at all. Other than that, we try to
361 * keep a reference to all the user-created objects while we work with them.
362 * output_val assumes that a reference is already held. The *caller* is
363 * responsible for handling references
364 */
365
366 switch (type) {
367
368 case T_BOOL: {
369 int v = PyObject_IsTrue(value);
370 if (v == -1) {
371 return false;
372 }
373
374 writeByte(output, (int8_t) v);
375 break;
376 }
377 case T_I08: {
378 int32_t val;
379
380 if (!parse_pyint(value, &val, INT8_MIN, INT8_MAX)) {
381 return false;
382 }
383
384 writeByte(output, (int8_t) val);
385 break;
386 }
387 case T_I16: {
388 int32_t val;
389
390 if (!parse_pyint(value, &val, INT16_MIN, INT16_MAX)) {
391 return false;
392 }
393
394 writeI16(output, (int16_t) val);
395 break;
396 }
397 case T_I32: {
398 int32_t val;
399
400 if (!parse_pyint(value, &val, INT32_MIN, INT32_MAX)) {
401 return false;
402 }
403
404 writeI32(output, val);
405 break;
406 }
407 case T_I64: {
408 int64_t nval = PyLong_AsLongLong(value);
409
410 if (INT_CONV_ERROR_OCCURRED(nval)) {
411 return false;
412 }
413
414 if (!CHECK_RANGE(nval, INT64_MIN, INT64_MAX)) {
415 PyErr_SetString(PyExc_OverflowError, "int out of range");
416 return false;
417 }
418
419 writeI64(output, nval);
420 break;
421 }
422
423 case T_DOUBLE: {
424 double nval = PyFloat_AsDouble(value);
425 if (nval == -1.0 && PyErr_Occurred()) {
426 return false;
427 }
428
429 writeDouble(output, nval);
430 break;
431 }
432
433 case T_STRING: {
Nobuaki Sukegawa4733db42016-01-05 02:50:57 +0900434 Py_ssize_t len = 0;
435 if (is_utf8(typeargs) && PyUnicode_Check(value))
436 value = PyUnicode_AsUTF8String(value);
437 len = PyString_Size(value);
David Reiss382fc302007-08-25 18:01:30 +0000438
439 if (!check_ssize_t_32(len)) {
440 return false;
441 }
442
443 writeI32(output, (int32_t) len);
444 PycStringIO->cwrite(output, PyString_AsString(value), (int32_t) len);
445 break;
446 }
447
448 case T_LIST:
449 case T_SET: {
450 Py_ssize_t len;
451 SetListTypeArgs parsedargs;
452 PyObject *item;
453 PyObject *iterator;
454
455 if (!parse_set_list_args(&parsedargs, typeargs)) {
456 return false;
457 }
458
459 len = PyObject_Length(value);
460
461 if (!check_ssize_t_32(len)) {
462 return false;
463 }
464
465 writeByte(output, parsedargs.element_type);
466 writeI32(output, (int32_t) len);
467
468 iterator = PyObject_GetIter(value);
469 if (iterator == NULL) {
470 return false;
471 }
472
473 while ((item = PyIter_Next(iterator))) {
474 if (!output_val(output, item, parsedargs.element_type, parsedargs.typeargs)) {
475 Py_DECREF(item);
476 Py_DECREF(iterator);
477 return false;
478 }
479 Py_DECREF(item);
480 }
481
482 Py_DECREF(iterator);
483
484 if (PyErr_Occurred()) {
485 return false;
486 }
487
488 break;
489 }
490
491 case T_MAP: {
492 PyObject *k, *v;
David Reiss58434e62008-10-07 21:08:10 +0000493 Py_ssize_t pos = 0;
David Reiss382fc302007-08-25 18:01:30 +0000494 Py_ssize_t len;
495
496 MapTypeArgs parsedargs;
497
498 len = PyDict_Size(value);
499 if (!check_ssize_t_32(len)) {
500 return false;
501 }
502
503 if (!parse_map_args(&parsedargs, typeargs)) {
504 return false;
505 }
506
507 writeByte(output, parsedargs.ktag);
508 writeByte(output, parsedargs.vtag);
509 writeI32(output, len);
510
511 // TODO(bmaurer): should support any mapping, not just dicts
512 while (PyDict_Next(value, &pos, &k, &v)) {
513 // TODO(dreiss): Think hard about whether these INCREFs actually
514 // turn any unsafe scenarios into safe scenarios.
515 Py_INCREF(k);
516 Py_INCREF(v);
517
518 if (!output_val(output, k, parsedargs.ktag, parsedargs.ktypeargs)
519 || !output_val(output, v, parsedargs.vtag, parsedargs.vtypeargs)) {
520 Py_DECREF(k);
521 Py_DECREF(v);
522 return false;
523 }
Kevin Clark127d01c2009-03-24 01:40:56 +0000524 Py_DECREF(k);
525 Py_DECREF(v);
David Reiss382fc302007-08-25 18:01:30 +0000526 }
527 break;
528 }
529
530 // TODO(dreiss): Consider breaking this out as a function
531 // the way we did for decode_struct.
532 case T_STRUCT: {
533 StructTypeArgs parsedargs;
534 Py_ssize_t nspec;
535 Py_ssize_t i;
536
537 if (!parse_struct_args(&parsedargs, typeargs)) {
538 return false;
539 }
540
541 nspec = PyTuple_Size(parsedargs.spec);
542
543 if (nspec == -1) {
544 return false;
545 }
546
547 for (i = 0; i < nspec; i++) {
548 StructItemSpec parsedspec;
549 PyObject* spec_tuple;
550 PyObject* instval = NULL;
551
552 spec_tuple = PyTuple_GET_ITEM(parsedargs.spec, i);
553 if (spec_tuple == Py_None) {
554 continue;
555 }
556
557 if (!parse_struct_item_spec (&parsedspec, spec_tuple)) {
558 return false;
559 }
560
561 instval = PyObject_GetAttr(value, parsedspec.attrname);
562
563 if (!instval) {
564 return false;
565 }
566
567 if (instval == Py_None) {
568 Py_DECREF(instval);
569 continue;
570 }
571
572 writeByte(output, (int8_t) parsedspec.type);
573 writeI16(output, parsedspec.tag);
574
575 if (!output_val(output, instval, parsedspec.type, parsedspec.typeargs)) {
576 Py_DECREF(instval);
577 return false;
578 }
579
580 Py_DECREF(instval);
581 }
582
583 writeByte(output, (int8_t)T_STOP);
584 break;
585 }
586
587 case T_STOP:
588 case T_VOID:
589 case T_UTF16:
590 case T_UTF8:
591 case T_U64:
592 default:
593 PyErr_SetString(PyExc_TypeError, "Unexpected TType");
594 return false;
595
596 }
597
598 return true;
599}
600
601
602/* --- TOP-LEVEL WRAPPER FOR OUTPUT -- */
603
604static PyObject *
605encode_binary(PyObject *self, PyObject *args) {
606 PyObject* enc_obj;
607 PyObject* type_args;
608 PyObject* buf;
609 PyObject* ret = NULL;
610
611 if (!PyArg_ParseTuple(args, "OO", &enc_obj, &type_args)) {
612 return NULL;
613 }
614
615 buf = PycStringIO->NewOutput(INIT_OUTBUF_SIZE);
616 if (output_val(buf, enc_obj, T_STRUCT, type_args)) {
617 ret = PycStringIO->cgetvalue(buf);
618 }
619
620 Py_DECREF(buf);
621 return ret;
622}
623
624/* ====== END WRITING FUNCTIONS ====== */
625
626
627/* ====== BEGIN READING FUNCTIONS ====== */
628
629/* --- LOW-LEVEL READING FUNCTIONS --- */
630
631static void
632free_decodebuf(DecodeBuffer* d) {
633 Py_XDECREF(d->stringiobuf);
634 Py_XDECREF(d->refill_callable);
635}
636
637static bool
638decode_buffer_from_obj(DecodeBuffer* dest, PyObject* obj) {
639 dest->stringiobuf = PyObject_GetAttr(obj, INTERN_STRING(cstringio_buf));
640 if (!dest->stringiobuf) {
641 return false;
642 }
643
644 if (!PycStringIO_InputCheck(dest->stringiobuf)) {
645 free_decodebuf(dest);
646 PyErr_SetString(PyExc_TypeError, "expecting stringio input");
647 return false;
648 }
649
650 dest->refill_callable = PyObject_GetAttr(obj, INTERN_STRING(cstringio_refill));
651
652 if(!dest->refill_callable) {
653 free_decodebuf(dest);
654 return false;
655 }
656
657 if (!PyCallable_Check(dest->refill_callable)) {
658 free_decodebuf(dest);
659 PyErr_SetString(PyExc_TypeError, "expecting callable");
660 return false;
661 }
662
663 return true;
664}
665
666static bool readBytes(DecodeBuffer* input, char** output, int len) {
667 int read;
668
669 // TODO(dreiss): Don't fear the malloc. Think about taking a copy of
670 // the partial read instead of forcing the transport
671 // to prepend it to its buffer.
672
673 read = PycStringIO->cread(input->stringiobuf, output, len);
674
675 if (read == len) {
676 return true;
677 } else if (read == -1) {
678 return false;
679 } else {
680 PyObject* newiobuf;
681
682 // using building functions as this is a rare codepath
683 newiobuf = PyObject_CallFunction(
David Reiss2c2e6d22007-09-05 01:14:09 +0000684 input->refill_callable, "s#i", *output, read, len, NULL);
David Reiss382fc302007-08-25 18:01:30 +0000685 if (newiobuf == NULL) {
686 return false;
687 }
688
689 // must do this *AFTER* the call so that we don't deref the io buffer
690 Py_CLEAR(input->stringiobuf);
691 input->stringiobuf = newiobuf;
692
693 read = PycStringIO->cread(input->stringiobuf, output, len);
694
695 if (read == len) {
696 return true;
697 } else if (read == -1) {
698 return false;
699 } else {
700 // TODO(dreiss): This could be a valid code path for big binary blobs.
701 PyErr_SetString(PyExc_TypeError,
702 "refill claimed to have refilled the buffer, but didn't!!");
703 return false;
704 }
705 }
706}
707
708static int8_t readByte(DecodeBuffer* input) {
709 char* buf;
710 if (!readBytes(input, &buf, sizeof(int8_t))) {
711 return -1;
712 }
713
714 return *(int8_t*) buf;
715}
716
717static int16_t readI16(DecodeBuffer* input) {
718 char* buf;
719 if (!readBytes(input, &buf, sizeof(int16_t))) {
720 return -1;
721 }
722
723 return (int16_t) ntohs(*(int16_t*) buf);
724}
725
726static int32_t readI32(DecodeBuffer* input) {
727 char* buf;
728 if (!readBytes(input, &buf, sizeof(int32_t))) {
729 return -1;
730 }
731 return (int32_t) ntohl(*(int32_t*) buf);
732}
733
734
735static int64_t readI64(DecodeBuffer* input) {
736 char* buf;
737 if (!readBytes(input, &buf, sizeof(int64_t))) {
738 return -1;
739 }
740
741 return (int64_t) ntohll(*(int64_t*) buf);
742}
743
744static double readDouble(DecodeBuffer* input) {
745 union {
746 int64_t f;
747 double t;
748 } transfer;
749
750 transfer.f = readI64(input);
751 if (transfer.f == -1) {
752 return -1;
753 }
754 return transfer.t;
755}
756
757static bool
758checkTypeByte(DecodeBuffer* input, TType expected) {
759 TType got = readByte(input);
Mark Slee53d9c0c2007-11-26 21:15:40 +0000760 if (INT_CONV_ERROR_OCCURRED(got)) {
761 return false;
762 }
David Reiss382fc302007-08-25 18:01:30 +0000763
764 if (expected != got) {
765 PyErr_SetString(PyExc_TypeError, "got wrong ttype while reading field");
766 return false;
767 }
768 return true;
769}
770
771static bool
772skip(DecodeBuffer* input, TType type) {
773#define SKIPBYTES(n) \
774 do { \
775 if (!readBytes(input, &dummy_buf, (n))) { \
776 return false; \
777 } \
778 } while(0)
779
780 char* dummy_buf;
781
782 switch (type) {
783
784 case T_BOOL:
785 case T_I08: SKIPBYTES(1); break;
786 case T_I16: SKIPBYTES(2); break;
787 case T_I32: SKIPBYTES(4); break;
788 case T_I64:
789 case T_DOUBLE: SKIPBYTES(8); break;
790
791 case T_STRING: {
792 // TODO(dreiss): Find out if these check_ssize_t32s are really necessary.
793 int len = readI32(input);
794 if (!check_ssize_t_32(len)) {
795 return false;
796 }
797 SKIPBYTES(len);
798 break;
799 }
800
801 case T_LIST:
802 case T_SET: {
803 TType etype;
804 int len, i;
805
806 etype = readByte(input);
807 if (etype == -1) {
808 return false;
809 }
810
811 len = readI32(input);
812 if (!check_ssize_t_32(len)) {
813 return false;
814 }
815
816 for (i = 0; i < len; i++) {
817 if (!skip(input, etype)) {
818 return false;
819 }
820 }
821 break;
822 }
823
824 case T_MAP: {
825 TType ktype, vtype;
826 int len, i;
827
828 ktype = readByte(input);
829 if (ktype == -1) {
830 return false;
831 }
832
833 vtype = readByte(input);
834 if (vtype == -1) {
835 return false;
836 }
837
838 len = readI32(input);
839 if (!check_ssize_t_32(len)) {
840 return false;
841 }
842
843 for (i = 0; i < len; i++) {
844 if (!(skip(input, ktype) && skip(input, vtype))) {
845 return false;
846 }
847 }
848 break;
849 }
850
851 case T_STRUCT: {
852 while (true) {
853 TType type;
854
855 type = readByte(input);
856 if (type == -1) {
857 return false;
858 }
859
860 if (type == T_STOP)
861 break;
862
863 SKIPBYTES(2); // tag
864 if (!skip(input, type)) {
865 return false;
866 }
867 }
868 break;
869 }
870
871 case T_STOP:
872 case T_VOID:
873 case T_UTF16:
874 case T_UTF8:
875 case T_U64:
876 default:
877 PyErr_SetString(PyExc_TypeError, "Unexpected TType");
878 return false;
879
880 }
881
David Reissbc444b02008-02-14 20:20:08 +0000882 return true;
David Reiss382fc302007-08-25 18:01:30 +0000883
884#undef SKIPBYTES
885}
886
887
888/* --- HELPER FUNCTION FOR DECODE_VAL --- */
889
890static PyObject*
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +0900891decode_val(DecodeBuffer* input, TType type, PyObject* typeargs, long string_limit, long container_limit);
David Reiss382fc302007-08-25 18:01:30 +0000892
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900893static PyObject*
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +0900894decode_struct(DecodeBuffer* input, PyObject* output, PyObject* klass, PyObject* spec_seq, long string_limit, long container_limit) {
David Reiss382fc302007-08-25 18:01:30 +0000895 int spec_seq_len = PyTuple_Size(spec_seq);
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900896 bool immutable = output == Py_None;
897 PyObject* kwargs = NULL;
David Reiss382fc302007-08-25 18:01:30 +0000898 if (spec_seq_len == -1) {
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900899 return NULL;
900 }
901
902 if (immutable) {
903 kwargs = PyDict_New();
904 if (!kwargs) {
905 PyErr_SetString(PyExc_TypeError, "failed to prepare kwargument storage");
906 return NULL;
907 }
David Reiss382fc302007-08-25 18:01:30 +0000908 }
909
910 while (true) {
911 TType type;
912 int16_t tag;
913 PyObject* item_spec;
914 PyObject* fieldval = NULL;
915 StructItemSpec parsedspec;
916
917 type = readByte(input);
Mark Slee53d9c0c2007-11-26 21:15:40 +0000918 if (type == -1) {
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900919 goto error;
Mark Slee53d9c0c2007-11-26 21:15:40 +0000920 }
David Reiss382fc302007-08-25 18:01:30 +0000921 if (type == T_STOP) {
922 break;
923 }
924 tag = readI16(input);
Mark Slee53d9c0c2007-11-26 21:15:40 +0000925 if (INT_CONV_ERROR_OCCURRED(tag)) {
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900926 goto error;
Mark Slee53d9c0c2007-11-26 21:15:40 +0000927 }
David Reiss382fc302007-08-25 18:01:30 +0000928 if (tag >= 0 && tag < spec_seq_len) {
929 item_spec = PyTuple_GET_ITEM(spec_seq, tag);
930 } else {
931 item_spec = Py_None;
932 }
933
934 if (item_spec == Py_None) {
935 if (!skip(input, type)) {
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900936 goto error;
David Reissbc444b02008-02-14 20:20:08 +0000937 } else {
938 continue;
David Reiss382fc302007-08-25 18:01:30 +0000939 }
940 }
941
942 if (!parse_struct_item_spec(&parsedspec, item_spec)) {
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900943 goto error;
David Reiss382fc302007-08-25 18:01:30 +0000944 }
945 if (parsedspec.type != type) {
David Reissa528f542009-03-24 22:48:40 +0000946 if (!skip(input, type)) {
Nobuaki Sukegawa299255a2016-01-06 14:52:50 +0900947 PyErr_Format(PyExc_TypeError, "struct field had wrong type: expected %d but got %d", parsedspec.type, type);
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900948 goto error;
David Reissa528f542009-03-24 22:48:40 +0000949 } else {
950 continue;
951 }
David Reiss382fc302007-08-25 18:01:30 +0000952 }
953
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +0900954 fieldval = decode_val(input, parsedspec.type, parsedspec.typeargs, string_limit, container_limit);
David Reiss382fc302007-08-25 18:01:30 +0000955 if (fieldval == NULL) {
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900956 goto error;
David Reiss382fc302007-08-25 18:01:30 +0000957 }
958
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900959 if ((immutable && PyDict_SetItem(kwargs, parsedspec.attrname, fieldval) == -1)
960 || (!immutable && PyObject_SetAttr(output, parsedspec.attrname, fieldval) == -1)) {
David Reiss382fc302007-08-25 18:01:30 +0000961 Py_DECREF(fieldval);
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900962 goto error;
David Reiss382fc302007-08-25 18:01:30 +0000963 }
964 Py_DECREF(fieldval);
965 }
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900966 if (immutable) {
967 PyObject* args = PyTuple_New(0);
968 PyObject* ret = NULL;
969 if (!args) {
970 PyErr_SetString(PyExc_TypeError, "failed to prepare argument storage");
971 goto error;
972 }
973 ret = PyObject_Call(klass, args, kwargs);
974 Py_DECREF(kwargs);
975 Py_DECREF(args);
976 return ret;
977 }
978 Py_INCREF(output);
979 return output;
980
981 error:
982 Py_XDECREF(kwargs);
983 return NULL;
David Reiss382fc302007-08-25 18:01:30 +0000984}
985
986
Konrad Grochowski3b5dacb2014-11-24 10:55:31 +0100987/* --- MAIN RECURSIVE INPUT FUNCTION --- */
David Reiss382fc302007-08-25 18:01:30 +0000988
989// Returns a new reference.
990static PyObject*
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +0900991decode_val(DecodeBuffer* input, TType type, PyObject* typeargs, long string_limit, long container_limit) {
David Reiss382fc302007-08-25 18:01:30 +0000992 switch (type) {
993
994 case T_BOOL: {
995 int8_t v = readByte(input);
996 if (INT_CONV_ERROR_OCCURRED(v)) {
997 return NULL;
998 }
999
1000 switch (v) {
1001 case 0: Py_RETURN_FALSE;
1002 case 1: Py_RETURN_TRUE;
1003 // Don't laugh. This is a potentially serious issue.
1004 default: PyErr_SetString(PyExc_TypeError, "boolean out of range"); return NULL;
1005 }
1006 break;
1007 }
1008 case T_I08: {
1009 int8_t v = readByte(input);
1010 if (INT_CONV_ERROR_OCCURRED(v)) {
1011 return NULL;
1012 }
1013
1014 return PyInt_FromLong(v);
1015 }
1016 case T_I16: {
1017 int16_t v = readI16(input);
1018 if (INT_CONV_ERROR_OCCURRED(v)) {
1019 return NULL;
1020 }
1021 return PyInt_FromLong(v);
1022 }
1023 case T_I32: {
1024 int32_t v = readI32(input);
1025 if (INT_CONV_ERROR_OCCURRED(v)) {
1026 return NULL;
1027 }
1028 return PyInt_FromLong(v);
1029 }
1030
1031 case T_I64: {
1032 int64_t v = readI64(input);
1033 if (INT_CONV_ERROR_OCCURRED(v)) {
1034 return NULL;
1035 }
1036 // TODO(dreiss): Find out if we can take this fastpath always when
1037 // sizeof(long) == sizeof(long long).
1038 if (CHECK_RANGE(v, LONG_MIN, LONG_MAX)) {
1039 return PyInt_FromLong((long) v);
1040 }
1041
1042 return PyLong_FromLongLong(v);
1043 }
1044
1045 case T_DOUBLE: {
1046 double v = readDouble(input);
1047 if (v == -1.0 && PyErr_Occurred()) {
1048 return false;
1049 }
1050 return PyFloat_FromDouble(v);
1051 }
1052
1053 case T_STRING: {
1054 Py_ssize_t len = readI32(input);
1055 char* buf;
1056 if (!readBytes(input, &buf, len)) {
1057 return NULL;
1058 }
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +09001059 if (!check_length_limit(len, string_limit)) {
1060 return NULL;
1061 }
David Reiss382fc302007-08-25 18:01:30 +00001062
Nobuaki Sukegawa4733db42016-01-05 02:50:57 +09001063 if (is_utf8(typeargs))
1064 return PyUnicode_DecodeUTF8(buf, len, 0);
1065 else
1066 return PyString_FromStringAndSize(buf, len);
David Reiss382fc302007-08-25 18:01:30 +00001067 }
1068
1069 case T_LIST:
1070 case T_SET: {
1071 SetListTypeArgs parsedargs;
1072 int32_t len;
1073 PyObject* ret = NULL;
1074 int i;
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +09001075 bool use_tuple = false;
David Reiss382fc302007-08-25 18:01:30 +00001076
1077 if (!parse_set_list_args(&parsedargs, typeargs)) {
1078 return NULL;
1079 }
1080
1081 if (!checkTypeByte(input, parsedargs.element_type)) {
1082 return NULL;
1083 }
1084
1085 len = readI32(input);
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +09001086 if (!check_length_limit(len, container_limit)) {
David Reiss382fc302007-08-25 18:01:30 +00001087 return NULL;
1088 }
1089
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +09001090 use_tuple = type == T_LIST && parsedargs.immutable;
1091 ret = use_tuple ? PyTuple_New(len) : PyList_New(len);
David Reiss382fc302007-08-25 18:01:30 +00001092 if (!ret) {
1093 return NULL;
1094 }
1095
1096 for (i = 0; i < len; i++) {
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +09001097 PyObject* item = decode_val(input, parsedargs.element_type, parsedargs.typeargs, string_limit, container_limit);
David Reiss382fc302007-08-25 18:01:30 +00001098 if (!item) {
1099 Py_DECREF(ret);
1100 return NULL;
1101 }
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +09001102 if (use_tuple) {
1103 PyTuple_SET_ITEM(ret, i, item);
1104 } else {
1105 PyList_SET_ITEM(ret, i, item);
1106 }
David Reiss382fc302007-08-25 18:01:30 +00001107 }
1108
1109 // TODO(dreiss): Consider biting the bullet and making two separate cases
1110 // for list and set, avoiding this post facto conversion.
1111 if (type == T_SET) {
1112 PyObject* setret;
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +09001113 setret = parsedargs.immutable ? PyFrozenSet_New(ret) : PySet_New(ret);
David Reiss382fc302007-08-25 18:01:30 +00001114 Py_DECREF(ret);
1115 return setret;
1116 }
1117 return ret;
1118 }
1119
1120 case T_MAP: {
1121 int32_t len;
1122 int i;
1123 MapTypeArgs parsedargs;
1124 PyObject* ret = NULL;
1125
1126 if (!parse_map_args(&parsedargs, typeargs)) {
1127 return NULL;
1128 }
1129
1130 if (!checkTypeByte(input, parsedargs.ktag)) {
1131 return NULL;
1132 }
1133 if (!checkTypeByte(input, parsedargs.vtag)) {
1134 return NULL;
1135 }
1136
1137 len = readI32(input);
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +09001138 if (!check_length_limit(len, container_limit)) {
1139 return NULL;
David Reiss382fc302007-08-25 18:01:30 +00001140 }
1141
1142 ret = PyDict_New();
1143 if (!ret) {
1144 goto error;
1145 }
1146
1147 for (i = 0; i < len; i++) {
1148 PyObject* k = NULL;
1149 PyObject* v = NULL;
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +09001150 k = decode_val(input, parsedargs.ktag, parsedargs.ktypeargs, string_limit, container_limit);
David Reiss382fc302007-08-25 18:01:30 +00001151 if (k == NULL) {
1152 goto loop_error;
1153 }
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +09001154 v = decode_val(input, parsedargs.vtag, parsedargs.vtypeargs, string_limit, container_limit);
David Reiss382fc302007-08-25 18:01:30 +00001155 if (v == NULL) {
1156 goto loop_error;
1157 }
1158 if (PyDict_SetItem(ret, k, v) == -1) {
1159 goto loop_error;
1160 }
1161
1162 Py_DECREF(k);
1163 Py_DECREF(v);
1164 continue;
1165
1166 // Yuck! Destructors, anyone?
1167 loop_error:
1168 Py_XDECREF(k);
1169 Py_XDECREF(v);
1170 goto error;
1171 }
1172
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +09001173 if (parsedargs.immutable) {
1174 PyObject* thrift = PyImport_ImportModule("thrift.Thrift");
1175 PyObject* cls = NULL;
1176 PyObject* arg = NULL;
1177 if (!thrift) {
1178 goto error;
1179 }
1180 cls = PyObject_GetAttrString(thrift, "TFrozenDict");
1181 if (!cls) {
1182 goto error;
1183 }
1184 arg = PyTuple_New(1);
1185 PyTuple_SET_ITEM(arg, 0, ret);
1186 return PyObject_CallObject(cls, arg);
1187 }
1188
David Reiss382fc302007-08-25 18:01:30 +00001189 return ret;
1190
1191 error:
1192 Py_XDECREF(ret);
1193 return NULL;
1194 }
1195
1196 case T_STRUCT: {
1197 StructTypeArgs parsedargs;
1198 if (!parse_struct_args(&parsedargs, typeargs)) {
1199 return NULL;
1200 }
1201
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +09001202 return decode_struct(input, Py_None, parsedargs.klass, parsedargs.spec, string_limit, container_limit);
David Reiss382fc302007-08-25 18:01:30 +00001203 }
1204
1205 case T_STOP:
1206 case T_VOID:
1207 case T_UTF16:
1208 case T_UTF8:
1209 case T_U64:
1210 default:
1211 PyErr_SetString(PyExc_TypeError, "Unexpected TType");
1212 return NULL;
1213 }
1214}
1215
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +09001216static long as_long_or(PyObject* value, long default_value) {
1217 long v = PyInt_AsLong(value);
1218 if (INT_CONV_ERROR_OCCURRED(v)) {
1219 PyErr_Clear();
1220 return default_value;
1221 }
1222 return v;
1223}
1224
David Reiss382fc302007-08-25 18:01:30 +00001225
1226/* --- TOP-LEVEL WRAPPER FOR INPUT -- */
1227
1228static PyObject*
1229decode_binary(PyObject *self, PyObject *args) {
1230 PyObject* output_obj = NULL;
1231 PyObject* transport = NULL;
1232 PyObject* typeargs = NULL;
1233 StructTypeArgs parsedargs;
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +09001234 PyObject* string_limit_obj = NULL;
1235 PyObject* container_limit_obj = NULL;
1236 long string_limit = 0;
1237 long container_limit = 0;
Roger Meierc3f033f2011-09-13 13:54:05 +00001238 DecodeBuffer input = {0, 0};
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +09001239 PyObject* ret = NULL;
Roger Meier7daf00c2015-06-03 11:45:35 +02001240
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +09001241 if (!PyArg_ParseTuple(args, "OOOOO", &output_obj, &transport, &typeargs, &string_limit_obj, &container_limit_obj)) {
David Reiss382fc302007-08-25 18:01:30 +00001242 return NULL;
1243 }
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +09001244 string_limit = as_long_or(string_limit_obj, INT32_MAX);
1245 container_limit = as_long_or(container_limit_obj, INT32_MAX);
David Reiss382fc302007-08-25 18:01:30 +00001246
1247 if (!parse_struct_args(&parsedargs, typeargs)) {
1248 return NULL;
1249 }
1250
1251 if (!decode_buffer_from_obj(&input, transport)) {
1252 return NULL;
1253 }
1254
Nobuaki Sukegawa7b545b52016-01-11 13:46:04 +09001255 ret = decode_struct(&input, output_obj, parsedargs.klass, parsedargs.spec, string_limit, container_limit);
David Reiss382fc302007-08-25 18:01:30 +00001256 free_decodebuf(&input);
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +09001257 return ret;
David Reiss382fc302007-08-25 18:01:30 +00001258}
1259
1260/* ====== END READING FUNCTIONS ====== */
1261
1262
1263/* -- PYTHON MODULE SETUP STUFF --- */
1264
1265static PyMethodDef ThriftFastBinaryMethods[] = {
1266
1267 {"encode_binary", encode_binary, METH_VARARGS, ""},
1268 {"decode_binary", decode_binary, METH_VARARGS, ""},
1269
1270 {NULL, NULL, 0, NULL} /* Sentinel */
1271};
1272
1273PyMODINIT_FUNC
1274initfastbinary(void) {
1275#define INIT_INTERN_STRING(value) \
1276 do { \
1277 INTERN_STRING(value) = PyString_InternFromString(#value); \
1278 if(!INTERN_STRING(value)) return; \
1279 } while(0)
1280
1281 INIT_INTERN_STRING(cstringio_buf);
1282 INIT_INTERN_STRING(cstringio_refill);
1283#undef INIT_INTERN_STRING
1284
1285 PycString_IMPORT;
1286 if (PycStringIO == NULL) return;
1287
1288 (void) Py_InitModule("thrift.protocol.fastbinary", ThriftFastBinaryMethods);
1289}