blob: 091a6170be49fa406c6234cb08e5faeeee779fac [file] [log] [blame]
David Reissea2cba82009-03-30 21:35:00 +00001/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
David Reiss382fc302007-08-25 18:01:30 +000019
20#include <Python.h>
21#include "cStringIO.h"
David Reiss382fc302007-08-25 18:01:30 +000022#include <stdint.h>
Roger Meierc3f033f2011-09-13 13:54:05 +000023#ifndef _WIN32
24# include <stdbool.h>
25# include <netinet/in.h>
26#else
27# include <WinSock2.h>
28# pragma comment (lib, "ws2_32.lib")
29# define BIG_ENDIAN (4321)
30# define LITTLE_ENDIAN (1234)
31# define BYTE_ORDER LITTLE_ENDIAN
32# if defined(_MSC_VER) && _MSC_VER < 1600
33 typedef int _Bool;
34# define bool _Bool
Roger Meier7daf00c2015-06-03 11:45:35 +020035# define false 0
Roger Meierc3f033f2011-09-13 13:54:05 +000036# define true 1
37# endif
38# define inline __inline
39#endif
David Reiss382fc302007-08-25 18:01:30 +000040
David Reiss49809102009-03-18 23:14:13 +000041/* Fix endianness issues on Solaris */
42#if defined (__SVR4) && defined (__sun)
43 #if defined(__i386) && !defined(__i386__)
44 #define __i386__
45 #endif
46
47 #ifndef BIG_ENDIAN
48 #define BIG_ENDIAN (4321)
49 #endif
50 #ifndef LITTLE_ENDIAN
51 #define LITTLE_ENDIAN (1234)
52 #endif
53
54 /* I386 is LE, even on Solaris */
55 #if !defined(BYTE_ORDER) && defined(__i386__)
56 #define BYTE_ORDER LITTLE_ENDIAN
57 #endif
58#endif
59
David Reiss382fc302007-08-25 18:01:30 +000060// TODO(dreiss): defval appears to be unused. Look into removing it.
61// TODO(dreiss): Make parse_spec_args recursive, and cache the output
62// permanently in the object. (Malloc and orphan.)
63// TODO(dreiss): Why do we need cStringIO for reading, why not just char*?
64// Can cStringIO let us work with a BufferedTransport?
65// TODO(dreiss): Don't ignore the rv from cwrite (maybe).
66
67/* ====== BEGIN UTILITIES ====== */
68
69#define INIT_OUTBUF_SIZE 128
70
71// Stolen out of TProtocol.h.
72// It would be a huge pain to have both get this from one place.
73typedef enum TType {
74 T_STOP = 0,
75 T_VOID = 1,
76 T_BOOL = 2,
77 T_BYTE = 3,
78 T_I08 = 3,
79 T_I16 = 6,
80 T_I32 = 8,
81 T_U64 = 9,
82 T_I64 = 10,
83 T_DOUBLE = 4,
84 T_STRING = 11,
85 T_UTF7 = 11,
86 T_STRUCT = 12,
87 T_MAP = 13,
88 T_SET = 14,
89 T_LIST = 15,
90 T_UTF8 = 16,
91 T_UTF16 = 17
92} TType;
93
David Reissfdd8b5a2009-02-17 20:06:08 +000094#ifndef __BYTE_ORDER
95# if defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN)
96# define __BYTE_ORDER BYTE_ORDER
97# define __LITTLE_ENDIAN LITTLE_ENDIAN
98# define __BIG_ENDIAN BIG_ENDIAN
99# else
100# error "Cannot determine endianness"
101# endif
102#endif
103
David Reiss382fc302007-08-25 18:01:30 +0000104// Same comment as the enum. Sorry.
105#if __BYTE_ORDER == __BIG_ENDIAN
106# define ntohll(n) (n)
107# define htonll(n) (n)
108#elif __BYTE_ORDER == __LITTLE_ENDIAN
109# if defined(__GNUC__) && defined(__GLIBC__)
110# include <byteswap.h>
111# define ntohll(n) bswap_64(n)
112# define htonll(n) bswap_64(n)
113# else /* GNUC & GLIBC */
114# define ntohll(n) ( (((unsigned long long)ntohl(n)) << 32) + ntohl(n >> 32) )
115# define htonll(n) ( (((unsigned long long)htonl(n)) << 32) + htonl(n >> 32) )
116# endif /* GNUC & GLIBC */
117#else /* __BYTE_ORDER */
118# error "Can't define htonll or ntohll!"
119#endif
120
121// Doing a benchmark shows that interning actually makes a difference, amazingly.
122#define INTERN_STRING(value) _intern_ ## value
123
124#define INT_CONV_ERROR_OCCURRED(v) ( ((v) == -1) && PyErr_Occurred() )
125#define CHECK_RANGE(v, min, max) ( ((v) <= (max)) && ((v) >= (min)) )
126
David Reiss382fc302007-08-25 18:01:30 +0000127/**
128 * A cache of the spec_args for a set or list,
129 * so we don't have to keep calling PyTuple_GET_ITEM.
130 */
131typedef struct {
132 TType element_type;
133 PyObject* typeargs;
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900134 bool immutable;
David Reiss382fc302007-08-25 18:01:30 +0000135} SetListTypeArgs;
136
137/**
138 * A cache of the spec_args for a map,
139 * so we don't have to keep calling PyTuple_GET_ITEM.
140 */
141typedef struct {
142 TType ktag;
143 TType vtag;
144 PyObject* ktypeargs;
145 PyObject* vtypeargs;
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900146 bool immutable;
David Reiss382fc302007-08-25 18:01:30 +0000147} MapTypeArgs;
148
149/**
150 * A cache of the spec_args for a struct,
151 * so we don't have to keep calling PyTuple_GET_ITEM.
152 */
153typedef struct {
154 PyObject* klass;
155 PyObject* spec;
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900156 bool immutable;
David Reiss382fc302007-08-25 18:01:30 +0000157} StructTypeArgs;
158
159/**
160 * A cache of the item spec from a struct specification,
161 * so we don't have to keep calling PyTuple_GET_ITEM.
162 */
163typedef struct {
164 int tag;
165 TType type;
166 PyObject* attrname;
167 PyObject* typeargs;
168 PyObject* defval;
169} StructItemSpec;
170
171/**
172 * A cache of the two key attributes of a CReadableTransport,
173 * so we don't have to keep calling PyObject_GetAttr.
174 */
175typedef struct {
176 PyObject* stringiobuf;
177 PyObject* refill_callable;
178} DecodeBuffer;
179
180/** Pointer to interned string to speed up attribute lookup. */
181static PyObject* INTERN_STRING(cstringio_buf);
182/** Pointer to interned string to speed up attribute lookup. */
183static PyObject* INTERN_STRING(cstringio_refill);
184
185static inline bool
186check_ssize_t_32(Py_ssize_t len) {
187 // error from getting the int
188 if (INT_CONV_ERROR_OCCURRED(len)) {
189 return false;
190 }
191 if (!CHECK_RANGE(len, 0, INT32_MAX)) {
192 PyErr_SetString(PyExc_OverflowError, "string size out of range");
193 return false;
194 }
195 return true;
196}
197
Roger Meier7daf00c2015-06-03 11:45:35 +0200198#define MAX_LIST_SIZE (10000)
199
200static inline bool
201check_list_length(Py_ssize_t len) {
202 // error from getting the int
203 if (INT_CONV_ERROR_OCCURRED(len)) {
204 return false;
205 }
206 if (!CHECK_RANGE(len, 0, MAX_LIST_SIZE)) {
207 PyErr_SetString(PyExc_OverflowError, "list size out of the sanity limit (10000 items max)");
208 return false;
209 }
210 return true;
211}
212
David Reiss382fc302007-08-25 18:01:30 +0000213static inline bool
214parse_pyint(PyObject* o, int32_t* ret, int32_t min, int32_t max) {
215 long val = PyInt_AsLong(o);
216
217 if (INT_CONV_ERROR_OCCURRED(val)) {
218 return false;
219 }
220 if (!CHECK_RANGE(val, min, max)) {
221 PyErr_SetString(PyExc_OverflowError, "int out of range");
222 return false;
223 }
224
225 *ret = (int32_t) val;
226 return true;
227}
228
229
230/* --- FUNCTIONS TO PARSE STRUCT SPECIFICATOINS --- */
231
232static bool
233parse_set_list_args(SetListTypeArgs* dest, PyObject* typeargs) {
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900234 if (PyTuple_Size(typeargs) != 3) {
235 PyErr_SetString(PyExc_TypeError, "expecting tuple of size 3 for list/set type args");
David Reiss382fc302007-08-25 18:01:30 +0000236 return false;
237 }
238
239 dest->element_type = PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 0));
240 if (INT_CONV_ERROR_OCCURRED(dest->element_type)) {
241 return false;
242 }
243
244 dest->typeargs = PyTuple_GET_ITEM(typeargs, 1);
245
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900246 dest->immutable = Py_True == PyTuple_GET_ITEM(typeargs, 2);
247
David Reiss382fc302007-08-25 18:01:30 +0000248 return true;
249}
250
251static bool
252parse_map_args(MapTypeArgs* dest, PyObject* typeargs) {
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900253 if (PyTuple_Size(typeargs) != 5) {
254 PyErr_SetString(PyExc_TypeError, "expecting 5 arguments for typeargs to map");
David Reiss382fc302007-08-25 18:01:30 +0000255 return false;
256 }
257
258 dest->ktag = PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 0));
259 if (INT_CONV_ERROR_OCCURRED(dest->ktag)) {
260 return false;
261 }
262
263 dest->vtag = PyInt_AsLong(PyTuple_GET_ITEM(typeargs, 2));
264 if (INT_CONV_ERROR_OCCURRED(dest->vtag)) {
265 return false;
266 }
267
268 dest->ktypeargs = PyTuple_GET_ITEM(typeargs, 1);
269 dest->vtypeargs = PyTuple_GET_ITEM(typeargs, 3);
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900270 dest->immutable = Py_True == PyTuple_GET_ITEM(typeargs, 4);
David Reiss382fc302007-08-25 18:01:30 +0000271
272 return true;
273}
274
275static bool
276parse_struct_args(StructTypeArgs* dest, PyObject* typeargs) {
277 if (PyTuple_Size(typeargs) != 2) {
278 PyErr_SetString(PyExc_TypeError, "expecting tuple of size 2 for struct args");
279 return false;
280 }
281
282 dest->klass = PyTuple_GET_ITEM(typeargs, 0);
283 dest->spec = PyTuple_GET_ITEM(typeargs, 1);
284
285 return true;
286}
287
288static int
289parse_struct_item_spec(StructItemSpec* dest, PyObject* spec_tuple) {
290
291 // i'd like to use ParseArgs here, but it seems to be a bottleneck.
292 if (PyTuple_Size(spec_tuple) != 5) {
Nobuaki Sukegawa7b894692015-12-23 21:45:06 +0900293 PyErr_Format(PyExc_TypeError, "expecting 5 arguments for spec tuple but got %d", (int)PyTuple_Size(spec_tuple));
David Reiss382fc302007-08-25 18:01:30 +0000294 return false;
295 }
296
297 dest->tag = PyInt_AsLong(PyTuple_GET_ITEM(spec_tuple, 0));
298 if (INT_CONV_ERROR_OCCURRED(dest->tag)) {
299 return false;
300 }
301
302 dest->type = PyInt_AsLong(PyTuple_GET_ITEM(spec_tuple, 1));
303 if (INT_CONV_ERROR_OCCURRED(dest->type)) {
304 return false;
305 }
306
307 dest->attrname = PyTuple_GET_ITEM(spec_tuple, 2);
308 dest->typeargs = PyTuple_GET_ITEM(spec_tuple, 3);
309 dest->defval = PyTuple_GET_ITEM(spec_tuple, 4);
310 return true;
311}
312
313/* ====== END UTILITIES ====== */
314
315
316/* ====== BEGIN WRITING FUNCTIONS ====== */
317
318/* --- LOW-LEVEL WRITING FUNCTIONS --- */
319
320static void writeByte(PyObject* outbuf, int8_t val) {
321 int8_t net = val;
322 PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int8_t));
323}
324
325static void writeI16(PyObject* outbuf, int16_t val) {
326 int16_t net = (int16_t)htons(val);
327 PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int16_t));
328}
329
330static void writeI32(PyObject* outbuf, int32_t val) {
331 int32_t net = (int32_t)htonl(val);
332 PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int32_t));
333}
334
335static void writeI64(PyObject* outbuf, int64_t val) {
336 int64_t net = (int64_t)htonll(val);
337 PycStringIO->cwrite(outbuf, (char*)&net, sizeof(int64_t));
338}
339
340static void writeDouble(PyObject* outbuf, double dub) {
341 // Unfortunately, bitwise_cast doesn't work in C. Bad C!
342 union {
343 double f;
344 int64_t t;
345 } transfer;
346 transfer.f = dub;
347 writeI64(outbuf, transfer.t);
348}
349
350
Konrad Grochowski3b5dacb2014-11-24 10:55:31 +0100351/* --- MAIN RECURSIVE OUTPUT FUNCTION -- */
David Reiss382fc302007-08-25 18:01:30 +0000352
Nobuaki Sukegawa7b894692015-12-23 21:45:06 +0900353static bool
David Reiss382fc302007-08-25 18:01:30 +0000354output_val(PyObject* output, PyObject* value, TType type, PyObject* typeargs) {
355 /*
356 * Refcounting Strategy:
357 *
358 * We assume that elements of the thrift_spec tuple are not going to be
359 * mutated, so we don't ref count those at all. Other than that, we try to
360 * keep a reference to all the user-created objects while we work with them.
361 * output_val assumes that a reference is already held. The *caller* is
362 * responsible for handling references
363 */
364
365 switch (type) {
366
367 case T_BOOL: {
368 int v = PyObject_IsTrue(value);
369 if (v == -1) {
370 return false;
371 }
372
373 writeByte(output, (int8_t) v);
374 break;
375 }
376 case T_I08: {
377 int32_t val;
378
379 if (!parse_pyint(value, &val, INT8_MIN, INT8_MAX)) {
380 return false;
381 }
382
383 writeByte(output, (int8_t) val);
384 break;
385 }
386 case T_I16: {
387 int32_t val;
388
389 if (!parse_pyint(value, &val, INT16_MIN, INT16_MAX)) {
390 return false;
391 }
392
393 writeI16(output, (int16_t) val);
394 break;
395 }
396 case T_I32: {
397 int32_t val;
398
399 if (!parse_pyint(value, &val, INT32_MIN, INT32_MAX)) {
400 return false;
401 }
402
403 writeI32(output, val);
404 break;
405 }
406 case T_I64: {
407 int64_t nval = PyLong_AsLongLong(value);
408
409 if (INT_CONV_ERROR_OCCURRED(nval)) {
410 return false;
411 }
412
413 if (!CHECK_RANGE(nval, INT64_MIN, INT64_MAX)) {
414 PyErr_SetString(PyExc_OverflowError, "int out of range");
415 return false;
416 }
417
418 writeI64(output, nval);
419 break;
420 }
421
422 case T_DOUBLE: {
423 double nval = PyFloat_AsDouble(value);
424 if (nval == -1.0 && PyErr_Occurred()) {
425 return false;
426 }
427
428 writeDouble(output, nval);
429 break;
430 }
431
432 case T_STRING: {
433 Py_ssize_t len = PyString_Size(value);
434
435 if (!check_ssize_t_32(len)) {
436 return false;
437 }
438
439 writeI32(output, (int32_t) len);
440 PycStringIO->cwrite(output, PyString_AsString(value), (int32_t) len);
441 break;
442 }
443
444 case T_LIST:
445 case T_SET: {
446 Py_ssize_t len;
447 SetListTypeArgs parsedargs;
448 PyObject *item;
449 PyObject *iterator;
450
451 if (!parse_set_list_args(&parsedargs, typeargs)) {
452 return false;
453 }
454
455 len = PyObject_Length(value);
456
457 if (!check_ssize_t_32(len)) {
458 return false;
459 }
460
461 writeByte(output, parsedargs.element_type);
462 writeI32(output, (int32_t) len);
463
464 iterator = PyObject_GetIter(value);
465 if (iterator == NULL) {
466 return false;
467 }
468
469 while ((item = PyIter_Next(iterator))) {
470 if (!output_val(output, item, parsedargs.element_type, parsedargs.typeargs)) {
471 Py_DECREF(item);
472 Py_DECREF(iterator);
473 return false;
474 }
475 Py_DECREF(item);
476 }
477
478 Py_DECREF(iterator);
479
480 if (PyErr_Occurred()) {
481 return false;
482 }
483
484 break;
485 }
486
487 case T_MAP: {
488 PyObject *k, *v;
David Reiss58434e62008-10-07 21:08:10 +0000489 Py_ssize_t pos = 0;
David Reiss382fc302007-08-25 18:01:30 +0000490 Py_ssize_t len;
491
492 MapTypeArgs parsedargs;
493
494 len = PyDict_Size(value);
495 if (!check_ssize_t_32(len)) {
496 return false;
497 }
498
499 if (!parse_map_args(&parsedargs, typeargs)) {
500 return false;
501 }
502
503 writeByte(output, parsedargs.ktag);
504 writeByte(output, parsedargs.vtag);
505 writeI32(output, len);
506
507 // TODO(bmaurer): should support any mapping, not just dicts
508 while (PyDict_Next(value, &pos, &k, &v)) {
509 // TODO(dreiss): Think hard about whether these INCREFs actually
510 // turn any unsafe scenarios into safe scenarios.
511 Py_INCREF(k);
512 Py_INCREF(v);
513
514 if (!output_val(output, k, parsedargs.ktag, parsedargs.ktypeargs)
515 || !output_val(output, v, parsedargs.vtag, parsedargs.vtypeargs)) {
516 Py_DECREF(k);
517 Py_DECREF(v);
518 return false;
519 }
Kevin Clark127d01c2009-03-24 01:40:56 +0000520 Py_DECREF(k);
521 Py_DECREF(v);
David Reiss382fc302007-08-25 18:01:30 +0000522 }
523 break;
524 }
525
526 // TODO(dreiss): Consider breaking this out as a function
527 // the way we did for decode_struct.
528 case T_STRUCT: {
529 StructTypeArgs parsedargs;
530 Py_ssize_t nspec;
531 Py_ssize_t i;
532
533 if (!parse_struct_args(&parsedargs, typeargs)) {
534 return false;
535 }
536
537 nspec = PyTuple_Size(parsedargs.spec);
538
539 if (nspec == -1) {
540 return false;
541 }
542
543 for (i = 0; i < nspec; i++) {
544 StructItemSpec parsedspec;
545 PyObject* spec_tuple;
546 PyObject* instval = NULL;
547
548 spec_tuple = PyTuple_GET_ITEM(parsedargs.spec, i);
549 if (spec_tuple == Py_None) {
550 continue;
551 }
552
553 if (!parse_struct_item_spec (&parsedspec, spec_tuple)) {
554 return false;
555 }
556
557 instval = PyObject_GetAttr(value, parsedspec.attrname);
558
559 if (!instval) {
560 return false;
561 }
562
563 if (instval == Py_None) {
564 Py_DECREF(instval);
565 continue;
566 }
567
568 writeByte(output, (int8_t) parsedspec.type);
569 writeI16(output, parsedspec.tag);
570
571 if (!output_val(output, instval, parsedspec.type, parsedspec.typeargs)) {
572 Py_DECREF(instval);
573 return false;
574 }
575
576 Py_DECREF(instval);
577 }
578
579 writeByte(output, (int8_t)T_STOP);
580 break;
581 }
582
583 case T_STOP:
584 case T_VOID:
585 case T_UTF16:
586 case T_UTF8:
587 case T_U64:
588 default:
589 PyErr_SetString(PyExc_TypeError, "Unexpected TType");
590 return false;
591
592 }
593
594 return true;
595}
596
597
598/* --- TOP-LEVEL WRAPPER FOR OUTPUT -- */
599
600static PyObject *
601encode_binary(PyObject *self, PyObject *args) {
602 PyObject* enc_obj;
603 PyObject* type_args;
604 PyObject* buf;
605 PyObject* ret = NULL;
606
607 if (!PyArg_ParseTuple(args, "OO", &enc_obj, &type_args)) {
608 return NULL;
609 }
610
611 buf = PycStringIO->NewOutput(INIT_OUTBUF_SIZE);
612 if (output_val(buf, enc_obj, T_STRUCT, type_args)) {
613 ret = PycStringIO->cgetvalue(buf);
614 }
615
616 Py_DECREF(buf);
617 return ret;
618}
619
620/* ====== END WRITING FUNCTIONS ====== */
621
622
623/* ====== BEGIN READING FUNCTIONS ====== */
624
625/* --- LOW-LEVEL READING FUNCTIONS --- */
626
627static void
628free_decodebuf(DecodeBuffer* d) {
629 Py_XDECREF(d->stringiobuf);
630 Py_XDECREF(d->refill_callable);
631}
632
633static bool
634decode_buffer_from_obj(DecodeBuffer* dest, PyObject* obj) {
635 dest->stringiobuf = PyObject_GetAttr(obj, INTERN_STRING(cstringio_buf));
636 if (!dest->stringiobuf) {
637 return false;
638 }
639
640 if (!PycStringIO_InputCheck(dest->stringiobuf)) {
641 free_decodebuf(dest);
642 PyErr_SetString(PyExc_TypeError, "expecting stringio input");
643 return false;
644 }
645
646 dest->refill_callable = PyObject_GetAttr(obj, INTERN_STRING(cstringio_refill));
647
648 if(!dest->refill_callable) {
649 free_decodebuf(dest);
650 return false;
651 }
652
653 if (!PyCallable_Check(dest->refill_callable)) {
654 free_decodebuf(dest);
655 PyErr_SetString(PyExc_TypeError, "expecting callable");
656 return false;
657 }
658
659 return true;
660}
661
662static bool readBytes(DecodeBuffer* input, char** output, int len) {
663 int read;
664
665 // TODO(dreiss): Don't fear the malloc. Think about taking a copy of
666 // the partial read instead of forcing the transport
667 // to prepend it to its buffer.
668
669 read = PycStringIO->cread(input->stringiobuf, output, len);
670
671 if (read == len) {
672 return true;
673 } else if (read == -1) {
674 return false;
675 } else {
676 PyObject* newiobuf;
677
678 // using building functions as this is a rare codepath
679 newiobuf = PyObject_CallFunction(
David Reiss2c2e6d22007-09-05 01:14:09 +0000680 input->refill_callable, "s#i", *output, read, len, NULL);
David Reiss382fc302007-08-25 18:01:30 +0000681 if (newiobuf == NULL) {
682 return false;
683 }
684
685 // must do this *AFTER* the call so that we don't deref the io buffer
686 Py_CLEAR(input->stringiobuf);
687 input->stringiobuf = newiobuf;
688
689 read = PycStringIO->cread(input->stringiobuf, output, len);
690
691 if (read == len) {
692 return true;
693 } else if (read == -1) {
694 return false;
695 } else {
696 // TODO(dreiss): This could be a valid code path for big binary blobs.
697 PyErr_SetString(PyExc_TypeError,
698 "refill claimed to have refilled the buffer, but didn't!!");
699 return false;
700 }
701 }
702}
703
704static int8_t readByte(DecodeBuffer* input) {
705 char* buf;
706 if (!readBytes(input, &buf, sizeof(int8_t))) {
707 return -1;
708 }
709
710 return *(int8_t*) buf;
711}
712
713static int16_t readI16(DecodeBuffer* input) {
714 char* buf;
715 if (!readBytes(input, &buf, sizeof(int16_t))) {
716 return -1;
717 }
718
719 return (int16_t) ntohs(*(int16_t*) buf);
720}
721
722static int32_t readI32(DecodeBuffer* input) {
723 char* buf;
724 if (!readBytes(input, &buf, sizeof(int32_t))) {
725 return -1;
726 }
727 return (int32_t) ntohl(*(int32_t*) buf);
728}
729
730
731static int64_t readI64(DecodeBuffer* input) {
732 char* buf;
733 if (!readBytes(input, &buf, sizeof(int64_t))) {
734 return -1;
735 }
736
737 return (int64_t) ntohll(*(int64_t*) buf);
738}
739
740static double readDouble(DecodeBuffer* input) {
741 union {
742 int64_t f;
743 double t;
744 } transfer;
745
746 transfer.f = readI64(input);
747 if (transfer.f == -1) {
748 return -1;
749 }
750 return transfer.t;
751}
752
753static bool
754checkTypeByte(DecodeBuffer* input, TType expected) {
755 TType got = readByte(input);
Mark Slee53d9c0c2007-11-26 21:15:40 +0000756 if (INT_CONV_ERROR_OCCURRED(got)) {
757 return false;
758 }
David Reiss382fc302007-08-25 18:01:30 +0000759
760 if (expected != got) {
761 PyErr_SetString(PyExc_TypeError, "got wrong ttype while reading field");
762 return false;
763 }
764 return true;
765}
766
767static bool
768skip(DecodeBuffer* input, TType type) {
769#define SKIPBYTES(n) \
770 do { \
771 if (!readBytes(input, &dummy_buf, (n))) { \
772 return false; \
773 } \
774 } while(0)
775
776 char* dummy_buf;
777
778 switch (type) {
779
780 case T_BOOL:
781 case T_I08: SKIPBYTES(1); break;
782 case T_I16: SKIPBYTES(2); break;
783 case T_I32: SKIPBYTES(4); break;
784 case T_I64:
785 case T_DOUBLE: SKIPBYTES(8); break;
786
787 case T_STRING: {
788 // TODO(dreiss): Find out if these check_ssize_t32s are really necessary.
789 int len = readI32(input);
790 if (!check_ssize_t_32(len)) {
791 return false;
792 }
793 SKIPBYTES(len);
794 break;
795 }
796
797 case T_LIST:
798 case T_SET: {
799 TType etype;
800 int len, i;
801
802 etype = readByte(input);
803 if (etype == -1) {
804 return false;
805 }
806
807 len = readI32(input);
808 if (!check_ssize_t_32(len)) {
809 return false;
810 }
811
812 for (i = 0; i < len; i++) {
813 if (!skip(input, etype)) {
814 return false;
815 }
816 }
817 break;
818 }
819
820 case T_MAP: {
821 TType ktype, vtype;
822 int len, i;
823
824 ktype = readByte(input);
825 if (ktype == -1) {
826 return false;
827 }
828
829 vtype = readByte(input);
830 if (vtype == -1) {
831 return false;
832 }
833
834 len = readI32(input);
835 if (!check_ssize_t_32(len)) {
836 return false;
837 }
838
839 for (i = 0; i < len; i++) {
840 if (!(skip(input, ktype) && skip(input, vtype))) {
841 return false;
842 }
843 }
844 break;
845 }
846
847 case T_STRUCT: {
848 while (true) {
849 TType type;
850
851 type = readByte(input);
852 if (type == -1) {
853 return false;
854 }
855
856 if (type == T_STOP)
857 break;
858
859 SKIPBYTES(2); // tag
860 if (!skip(input, type)) {
861 return false;
862 }
863 }
864 break;
865 }
866
867 case T_STOP:
868 case T_VOID:
869 case T_UTF16:
870 case T_UTF8:
871 case T_U64:
872 default:
873 PyErr_SetString(PyExc_TypeError, "Unexpected TType");
874 return false;
875
876 }
877
David Reissbc444b02008-02-14 20:20:08 +0000878 return true;
David Reiss382fc302007-08-25 18:01:30 +0000879
880#undef SKIPBYTES
881}
882
883
884/* --- HELPER FUNCTION FOR DECODE_VAL --- */
885
886static PyObject*
887decode_val(DecodeBuffer* input, TType type, PyObject* typeargs);
888
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900889static PyObject*
890decode_struct(DecodeBuffer* input, PyObject* output, PyObject* klass, PyObject* spec_seq) {
David Reiss382fc302007-08-25 18:01:30 +0000891 int spec_seq_len = PyTuple_Size(spec_seq);
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900892 bool immutable = output == Py_None;
893 PyObject* kwargs = NULL;
David Reiss382fc302007-08-25 18:01:30 +0000894 if (spec_seq_len == -1) {
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900895 return NULL;
896 }
897
898 if (immutable) {
899 kwargs = PyDict_New();
900 if (!kwargs) {
901 PyErr_SetString(PyExc_TypeError, "failed to prepare kwargument storage");
902 return NULL;
903 }
David Reiss382fc302007-08-25 18:01:30 +0000904 }
905
906 while (true) {
907 TType type;
908 int16_t tag;
909 PyObject* item_spec;
910 PyObject* fieldval = NULL;
911 StructItemSpec parsedspec;
912
913 type = readByte(input);
Mark Slee53d9c0c2007-11-26 21:15:40 +0000914 if (type == -1) {
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900915 goto error;
Mark Slee53d9c0c2007-11-26 21:15:40 +0000916 }
David Reiss382fc302007-08-25 18:01:30 +0000917 if (type == T_STOP) {
918 break;
919 }
920 tag = readI16(input);
Mark Slee53d9c0c2007-11-26 21:15:40 +0000921 if (INT_CONV_ERROR_OCCURRED(tag)) {
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900922 goto error;
Mark Slee53d9c0c2007-11-26 21:15:40 +0000923 }
David Reiss382fc302007-08-25 18:01:30 +0000924 if (tag >= 0 && tag < spec_seq_len) {
925 item_spec = PyTuple_GET_ITEM(spec_seq, tag);
926 } else {
927 item_spec = Py_None;
928 }
929
930 if (item_spec == Py_None) {
931 if (!skip(input, type)) {
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900932 goto error;
David Reissbc444b02008-02-14 20:20:08 +0000933 } else {
934 continue;
David Reiss382fc302007-08-25 18:01:30 +0000935 }
936 }
937
938 if (!parse_struct_item_spec(&parsedspec, item_spec)) {
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900939 goto error;
David Reiss382fc302007-08-25 18:01:30 +0000940 }
941 if (parsedspec.type != type) {
David Reissa528f542009-03-24 22:48:40 +0000942 if (!skip(input, type)) {
943 PyErr_SetString(PyExc_TypeError, "struct field had wrong type while reading and can't be skipped");
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900944 goto error;
David Reissa528f542009-03-24 22:48:40 +0000945 } else {
946 continue;
947 }
David Reiss382fc302007-08-25 18:01:30 +0000948 }
949
950 fieldval = decode_val(input, parsedspec.type, parsedspec.typeargs);
951 if (fieldval == NULL) {
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900952 goto error;
David Reiss382fc302007-08-25 18:01:30 +0000953 }
954
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900955 if ((immutable && PyDict_SetItem(kwargs, parsedspec.attrname, fieldval) == -1)
956 || (!immutable && PyObject_SetAttr(output, parsedspec.attrname, fieldval) == -1)) {
David Reiss382fc302007-08-25 18:01:30 +0000957 Py_DECREF(fieldval);
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900958 goto error;
David Reiss382fc302007-08-25 18:01:30 +0000959 }
960 Py_DECREF(fieldval);
961 }
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +0900962 if (immutable) {
963 PyObject* args = PyTuple_New(0);
964 PyObject* ret = NULL;
965 if (!args) {
966 PyErr_SetString(PyExc_TypeError, "failed to prepare argument storage");
967 goto error;
968 }
969 ret = PyObject_Call(klass, args, kwargs);
970 Py_DECREF(kwargs);
971 Py_DECREF(args);
972 return ret;
973 }
974 Py_INCREF(output);
975 return output;
976
977 error:
978 Py_XDECREF(kwargs);
979 return NULL;
David Reiss382fc302007-08-25 18:01:30 +0000980}
981
982
Konrad Grochowski3b5dacb2014-11-24 10:55:31 +0100983/* --- MAIN RECURSIVE INPUT FUNCTION --- */
David Reiss382fc302007-08-25 18:01:30 +0000984
985// Returns a new reference.
986static PyObject*
987decode_val(DecodeBuffer* input, TType type, PyObject* typeargs) {
988 switch (type) {
989
990 case T_BOOL: {
991 int8_t v = readByte(input);
992 if (INT_CONV_ERROR_OCCURRED(v)) {
993 return NULL;
994 }
995
996 switch (v) {
997 case 0: Py_RETURN_FALSE;
998 case 1: Py_RETURN_TRUE;
999 // Don't laugh. This is a potentially serious issue.
1000 default: PyErr_SetString(PyExc_TypeError, "boolean out of range"); return NULL;
1001 }
1002 break;
1003 }
1004 case T_I08: {
1005 int8_t v = readByte(input);
1006 if (INT_CONV_ERROR_OCCURRED(v)) {
1007 return NULL;
1008 }
1009
1010 return PyInt_FromLong(v);
1011 }
1012 case T_I16: {
1013 int16_t v = readI16(input);
1014 if (INT_CONV_ERROR_OCCURRED(v)) {
1015 return NULL;
1016 }
1017 return PyInt_FromLong(v);
1018 }
1019 case T_I32: {
1020 int32_t v = readI32(input);
1021 if (INT_CONV_ERROR_OCCURRED(v)) {
1022 return NULL;
1023 }
1024 return PyInt_FromLong(v);
1025 }
1026
1027 case T_I64: {
1028 int64_t v = readI64(input);
1029 if (INT_CONV_ERROR_OCCURRED(v)) {
1030 return NULL;
1031 }
1032 // TODO(dreiss): Find out if we can take this fastpath always when
1033 // sizeof(long) == sizeof(long long).
1034 if (CHECK_RANGE(v, LONG_MIN, LONG_MAX)) {
1035 return PyInt_FromLong((long) v);
1036 }
1037
1038 return PyLong_FromLongLong(v);
1039 }
1040
1041 case T_DOUBLE: {
1042 double v = readDouble(input);
1043 if (v == -1.0 && PyErr_Occurred()) {
1044 return false;
1045 }
1046 return PyFloat_FromDouble(v);
1047 }
1048
1049 case T_STRING: {
1050 Py_ssize_t len = readI32(input);
1051 char* buf;
1052 if (!readBytes(input, &buf, len)) {
1053 return NULL;
1054 }
1055
1056 return PyString_FromStringAndSize(buf, len);
1057 }
1058
1059 case T_LIST:
1060 case T_SET: {
1061 SetListTypeArgs parsedargs;
1062 int32_t len;
1063 PyObject* ret = NULL;
1064 int i;
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +09001065 bool use_tuple = false;
David Reiss382fc302007-08-25 18:01:30 +00001066
1067 if (!parse_set_list_args(&parsedargs, typeargs)) {
1068 return NULL;
1069 }
1070
1071 if (!checkTypeByte(input, parsedargs.element_type)) {
1072 return NULL;
1073 }
1074
1075 len = readI32(input);
Roger Meier7daf00c2015-06-03 11:45:35 +02001076 if (!check_list_length(len)) {
David Reiss382fc302007-08-25 18:01:30 +00001077 return NULL;
1078 }
1079
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +09001080 use_tuple = type == T_LIST && parsedargs.immutable;
1081 ret = use_tuple ? PyTuple_New(len) : PyList_New(len);
David Reiss382fc302007-08-25 18:01:30 +00001082 if (!ret) {
1083 return NULL;
1084 }
1085
1086 for (i = 0; i < len; i++) {
1087 PyObject* item = decode_val(input, parsedargs.element_type, parsedargs.typeargs);
1088 if (!item) {
1089 Py_DECREF(ret);
1090 return NULL;
1091 }
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +09001092 if (use_tuple) {
1093 PyTuple_SET_ITEM(ret, i, item);
1094 } else {
1095 PyList_SET_ITEM(ret, i, item);
1096 }
David Reiss382fc302007-08-25 18:01:30 +00001097 }
1098
1099 // TODO(dreiss): Consider biting the bullet and making two separate cases
1100 // for list and set, avoiding this post facto conversion.
1101 if (type == T_SET) {
1102 PyObject* setret;
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +09001103 setret = parsedargs.immutable ? PyFrozenSet_New(ret) : PySet_New(ret);
David Reiss382fc302007-08-25 18:01:30 +00001104 Py_DECREF(ret);
1105 return setret;
1106 }
1107 return ret;
1108 }
1109
1110 case T_MAP: {
1111 int32_t len;
1112 int i;
1113 MapTypeArgs parsedargs;
1114 PyObject* ret = NULL;
1115
1116 if (!parse_map_args(&parsedargs, typeargs)) {
1117 return NULL;
1118 }
1119
1120 if (!checkTypeByte(input, parsedargs.ktag)) {
1121 return NULL;
1122 }
1123 if (!checkTypeByte(input, parsedargs.vtag)) {
1124 return NULL;
1125 }
1126
1127 len = readI32(input);
1128 if (!check_ssize_t_32(len)) {
1129 return false;
1130 }
1131
1132 ret = PyDict_New();
1133 if (!ret) {
1134 goto error;
1135 }
1136
1137 for (i = 0; i < len; i++) {
1138 PyObject* k = NULL;
1139 PyObject* v = NULL;
1140 k = decode_val(input, parsedargs.ktag, parsedargs.ktypeargs);
1141 if (k == NULL) {
1142 goto loop_error;
1143 }
1144 v = decode_val(input, parsedargs.vtag, parsedargs.vtypeargs);
1145 if (v == NULL) {
1146 goto loop_error;
1147 }
1148 if (PyDict_SetItem(ret, k, v) == -1) {
1149 goto loop_error;
1150 }
1151
1152 Py_DECREF(k);
1153 Py_DECREF(v);
1154 continue;
1155
1156 // Yuck! Destructors, anyone?
1157 loop_error:
1158 Py_XDECREF(k);
1159 Py_XDECREF(v);
1160 goto error;
1161 }
1162
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +09001163 if (parsedargs.immutable) {
1164 PyObject* thrift = PyImport_ImportModule("thrift.Thrift");
1165 PyObject* cls = NULL;
1166 PyObject* arg = NULL;
1167 if (!thrift) {
1168 goto error;
1169 }
1170 cls = PyObject_GetAttrString(thrift, "TFrozenDict");
1171 if (!cls) {
1172 goto error;
1173 }
1174 arg = PyTuple_New(1);
1175 PyTuple_SET_ITEM(arg, 0, ret);
1176 return PyObject_CallObject(cls, arg);
1177 }
1178
David Reiss382fc302007-08-25 18:01:30 +00001179 return ret;
1180
1181 error:
1182 Py_XDECREF(ret);
1183 return NULL;
1184 }
1185
1186 case T_STRUCT: {
1187 StructTypeArgs parsedargs;
1188 if (!parse_struct_args(&parsedargs, typeargs)) {
1189 return NULL;
1190 }
1191
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +09001192 return decode_struct(input, Py_None, parsedargs.klass, parsedargs.spec);
David Reiss382fc302007-08-25 18:01:30 +00001193 }
1194
1195 case T_STOP:
1196 case T_VOID:
1197 case T_UTF16:
1198 case T_UTF8:
1199 case T_U64:
1200 default:
1201 PyErr_SetString(PyExc_TypeError, "Unexpected TType");
1202 return NULL;
1203 }
1204}
1205
1206
1207/* --- TOP-LEVEL WRAPPER FOR INPUT -- */
1208
1209static PyObject*
1210decode_binary(PyObject *self, PyObject *args) {
1211 PyObject* output_obj = NULL;
1212 PyObject* transport = NULL;
1213 PyObject* typeargs = NULL;
1214 StructTypeArgs parsedargs;
Roger Meierc3f033f2011-09-13 13:54:05 +00001215 DecodeBuffer input = {0, 0};
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +09001216 PyObject* ret = NULL;
Roger Meier7daf00c2015-06-03 11:45:35 +02001217
David Reiss382fc302007-08-25 18:01:30 +00001218 if (!PyArg_ParseTuple(args, "OOO", &output_obj, &transport, &typeargs)) {
1219 return NULL;
1220 }
1221
1222 if (!parse_struct_args(&parsedargs, typeargs)) {
1223 return NULL;
1224 }
1225
1226 if (!decode_buffer_from_obj(&input, transport)) {
1227 return NULL;
1228 }
1229
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +09001230 ret = decode_struct(&input, output_obj, parsedargs.klass, parsedargs.spec);
David Reiss382fc302007-08-25 18:01:30 +00001231 free_decodebuf(&input);
Nobuaki Sukegawae841b3d2015-11-17 11:01:17 +09001232 return ret;
David Reiss382fc302007-08-25 18:01:30 +00001233}
1234
1235/* ====== END READING FUNCTIONS ====== */
1236
1237
1238/* -- PYTHON MODULE SETUP STUFF --- */
1239
1240static PyMethodDef ThriftFastBinaryMethods[] = {
1241
1242 {"encode_binary", encode_binary, METH_VARARGS, ""},
1243 {"decode_binary", decode_binary, METH_VARARGS, ""},
1244
1245 {NULL, NULL, 0, NULL} /* Sentinel */
1246};
1247
1248PyMODINIT_FUNC
1249initfastbinary(void) {
1250#define INIT_INTERN_STRING(value) \
1251 do { \
1252 INTERN_STRING(value) = PyString_InternFromString(#value); \
1253 if(!INTERN_STRING(value)) return; \
1254 } while(0)
1255
1256 INIT_INTERN_STRING(cstringio_buf);
1257 INIT_INTERN_STRING(cstringio_refill);
1258#undef INIT_INTERN_STRING
1259
1260 PycString_IMPORT;
1261 if (PycStringIO == NULL) return;
1262
1263 (void) Py_InitModule("thrift.protocol.fastbinary", ThriftFastBinaryMethods);
1264}