blob: ce2ee54d22da5ff7006195d2f96f897d12ef755d [file] [log] [blame]
David Reisse4d4ea02009-04-02 21:37:17 +00001/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#include "TCompactProtocol.h"
21
22#include <config.h>
Bryan Duxbury141eab42009-04-03 15:05:28 +000023#include <limits>
David Reisse4d4ea02009-04-02 21:37:17 +000024
25/*
26 * TCompactProtocol::i*ToZigzag depend on the fact that the right shift
27 * operator on a signed integer is an arithmetic (sign-extending) shift.
28 * If this is not the case, the current implementation will not work.
29 * If anyone encounters this error, we can try to figure out the best
30 * way to implement an arithmetic right shift on their platform.
31 */
32#if !defined(SIGNED_RIGHT_SHIFT_IS) || !defined(ARITHMETIC_RIGHT_SHIFT)
33# error "Unable to determine the behavior of a signed right shift"
34#endif
35#if SIGNED_RIGHT_SHIFT_IS != ARITHMETIC_RIGHT_SHIFT
36# error "TCompactProtocol currenly only works if a signed right shift is arithmetic"
37#endif
38
39#ifdef __GNUC__
40#define UNLIKELY(val) (__builtin_expect((val), 0))
41#else
42#define UNLIKELY(val) (val)
43#endif
44
45namespace apache { namespace thrift { namespace protocol {
46
47const int8_t TCompactProtocol::TTypeToCType[16] = {
48 CT_STOP, // T_STOP
49 0, // unused
50 CT_BOOLEAN_TRUE, // T_BOOL
51 CT_BYTE, // T_BYTE
52 CT_DOUBLE, // T_DOUBLE
53 0, // unused
54 CT_I16, // T_I16
55 0, // unused
56 CT_I32, // T_I32
57 0, // unused
58 CT_I64, // T_I64
59 CT_BINARY, // T_STRING
60 CT_STRUCT, // T_STRUCT
61 CT_MAP, // T_MAP
62 CT_SET, // T_SET
63 CT_LIST, // T_LIST
64 };
65
66
67uint32_t TCompactProtocol::writeMessageBegin(const std::string& name,
68 const TMessageType messageType,
69 const int32_t seqid) {
70 uint32_t wsize = 0;
71 wsize += writeByte(PROTOCOL_ID);
72 wsize += writeByte((VERSION_N & VERSION_MASK) | (((int32_t)messageType << TYPE_SHIFT_AMOUNT) & TYPE_MASK));
73 wsize += writeVarint32(seqid);
74 wsize += writeString(name);
75 return wsize;
76}
77
78/**
79 * Write a field header containing the field id and field type. If the
80 * difference between the current field id and the last one is small (< 15),
81 * then the field id will be encoded in the 4 MSB as a delta. Otherwise, the
82 * field id will follow the type header as a zigzag varint.
83 */
84uint32_t TCompactProtocol::writeFieldBegin(const char* name,
85 const TType fieldType,
86 const int16_t fieldId) {
87 if (fieldType == T_BOOL) {
88 booleanField_.name = name;
89 booleanField_.fieldType = fieldType;
90 booleanField_.fieldId = fieldId;
91 } else {
92 return writeFieldBeginInternal(name, fieldType, fieldId, -1);
93 }
94 return 0;
95}
96
97/**
98 * Write the STOP symbol so we know there are no more fields in this struct.
99 */
100uint32_t TCompactProtocol::writeFieldStop() {
101 return writeByte(T_STOP);
102}
103
104/**
105 * Write a struct begin. This doesn't actually put anything on the wire. We
106 * use it as an opportunity to put special placeholder markers on the field
107 * stack so we can get the field id deltas correct.
108 */
109uint32_t TCompactProtocol::writeStructBegin(const char* name) {
110 lastField_.push(lastFieldId_);
111 lastFieldId_ = 0;
112 return 0;
113}
114
115/**
116 * Write a struct end. This doesn't actually put anything on the wire. We use
117 * this as an opportunity to pop the last field from the current struct off
118 * of the field stack.
119 */
120uint32_t TCompactProtocol::writeStructEnd() {
121 lastFieldId_ = lastField_.top();
122 lastField_.pop();
123 return 0;
124}
125
126/**
127 * Write a List header.
128 */
129uint32_t TCompactProtocol::writeListBegin(const TType elemType,
130 const uint32_t size) {
131 return writeCollectionBegin(elemType, size);
132}
133
134/**
135 * Write a set header.
136 */
137uint32_t TCompactProtocol::writeSetBegin(const TType elemType,
138 const uint32_t size) {
139 return writeCollectionBegin(elemType, size);
140}
141
142/**
143 * Write a map header. If the map is empty, omit the key and value type
144 * headers, as we don't need any additional information to skip it.
145 */
146uint32_t TCompactProtocol::writeMapBegin(const TType keyType,
147 const TType valType,
148 const uint32_t size) {
149 uint32_t wsize = 0;
150
151 if (size == 0) {
152 wsize += writeByte(0);
153 } else {
154 wsize += writeVarint32(size);
155 wsize += writeByte(getCompactType(keyType) << 4 | getCompactType(valType));
156 }
157 return wsize;
158}
159
160/**
161 * Write a boolean value. Potentially, this could be a boolean field, in
162 * which case the field header info isn't written yet. If so, decide what the
163 * right type header is for the value and then write the field header.
164 * Otherwise, write a single byte.
165 */
166uint32_t TCompactProtocol::writeBool(const bool value) {
167 uint32_t wsize = 0;
168
169 if (booleanField_.name != NULL) {
170 // we haven't written the field header yet
171 wsize += writeFieldBeginInternal(booleanField_.name,
172 booleanField_.fieldType,
173 booleanField_.fieldId,
174 value ? CT_BOOLEAN_TRUE : CT_BOOLEAN_FALSE);
175 booleanField_.name = NULL;
176 } else {
177 // we're not part of a field, so just write the value
178 wsize += writeByte(value ? CT_BOOLEAN_TRUE : CT_BOOLEAN_FALSE);
179 }
180 return wsize;
181}
182
183uint32_t TCompactProtocol::writeByte(const int8_t byte) {
184 trans_->write((uint8_t*)&byte, 1);
185 return 1;
186}
187
188/**
189 * Write an i16 as a zigzag varint.
190 */
191uint32_t TCompactProtocol::writeI16(const int16_t i16) {
192 return writeVarint32(i32ToZigzag(i16));
193}
194
195/**
196 * Write an i32 as a zigzag varint.
197 */
198uint32_t TCompactProtocol::writeI32(const int32_t i32) {
199 return writeVarint32(i32ToZigzag(i32));
200}
201
202/**
203 * Write an i64 as a zigzag varint.
204 */
205uint32_t TCompactProtocol::writeI64(const int64_t i64) {
206 return writeVarint64(i64ToZigzag(i64));
207}
208
209/**
210 * Write a double to the wire as 8 bytes.
211 */
212uint32_t TCompactProtocol::writeDouble(const double dub) {
213 BOOST_STATIC_ASSERT(sizeof(double) == sizeof(uint64_t));
214 BOOST_STATIC_ASSERT(std::numeric_limits<double>::is_iec559);
215
216 uint64_t bits = bitwise_cast<uint64_t>(dub);
217 bits = htolell(bits);
218 trans_->write((uint8_t*)&bits, 8);
219 return 8;
220}
221
222/**
223 * Write a string to the wire with a varint size preceeding.
224 */
225uint32_t TCompactProtocol::writeString(const std::string& str) {
226 return writeBinary(str);
227}
228
229uint32_t TCompactProtocol::writeBinary(const std::string& str) {
230 uint32_t ssize = str.size();
231 uint32_t wsize = writeVarint32(ssize) + ssize;
232 trans_->write((uint8_t*)str.data(), ssize);
233 return wsize;
234}
235
236//
237// Internal Writing methods
238//
239
240/**
241 * The workhorse of writeFieldBegin. It has the option of doing a
242 * 'type override' of the type header. This is used specifically in the
243 * boolean field case.
244 */
245int32_t TCompactProtocol::writeFieldBeginInternal(const char* name,
246 const TType fieldType,
247 const int16_t fieldId,
248 int8_t typeOverride) {
249 uint32_t wsize = 0;
250
251 // if there's a type override, use that.
252 int8_t typeToWrite = (typeOverride == -1 ? getCompactType(fieldType) : typeOverride);
253
254 // check if we can use delta encoding for the field id
255 if (fieldId > lastFieldId_ && fieldId - lastFieldId_ <= 15) {
256 // write them together
257 wsize += writeByte((fieldId - lastFieldId_) << 4 | typeToWrite);
258 } else {
259 // write them separate
260 wsize += writeByte(typeToWrite);
261 wsize += writeI16(fieldId);
262 }
263
264 lastFieldId_ = fieldId;
265 return wsize;
266}
267
268/**
269 * Abstract method for writing the start of lists and sets. List and sets on
270 * the wire differ only by the type indicator.
271 */
272uint32_t TCompactProtocol::writeCollectionBegin(int8_t elemType, int32_t size) {
273 uint32_t wsize = 0;
274 if (size <= 14) {
275 wsize += writeByte(size << 4 | getCompactType(elemType));
276 } else {
277 wsize += writeByte(0xf0 | getCompactType(elemType));
278 wsize += writeVarint32(size);
279 }
280 return wsize;
281}
282
283/**
284 * Write an i32 as a varint. Results in 1-5 bytes on the wire.
285 */
286uint32_t TCompactProtocol::writeVarint32(uint32_t n) {
287 uint8_t buf[5];
288 uint32_t wsize = 0;
289
290 while (true) {
291 if ((n & ~0x7F) == 0) {
292 buf[wsize++] = (int8_t)n;
293 break;
294 } else {
295 buf[wsize++] = (int8_t)((n & 0x7F) | 0x80);
296 n >>= 7;
297 }
298 }
299 trans_->write(buf, wsize);
300 return wsize;
301}
302
303/**
304 * Write an i64 as a varint. Results in 1-10 bytes on the wire.
305 */
306uint32_t TCompactProtocol::writeVarint64(uint64_t n) {
307 uint8_t buf[10];
308 uint32_t wsize = 0;
309
310 while (true) {
311 if ((n & ~0x7FL) == 0) {
312 buf[wsize++] = (int8_t)n;
313 break;
314 } else {
315 buf[wsize++] = (int8_t)((n & 0x7F) | 0x80);
316 n >>= 7;
317 }
318 }
319 trans_->write(buf, wsize);
320 return wsize;
321}
322
323/**
324 * Convert l into a zigzag long. This allows negative numbers to be
325 * represented compactly as a varint.
326 */
327uint64_t TCompactProtocol::i64ToZigzag(const int64_t l) {
328 return (l << 1) ^ (l >> 63);
329}
330
331/**
332 * Convert n into a zigzag int. This allows negative numbers to be
333 * represented compactly as a varint.
334 */
335uint32_t TCompactProtocol::i32ToZigzag(const int32_t n) {
336 return (n << 1) ^ (n >> 31);
337}
338
339/**
340 * Given a TType value, find the appropriate TCompactProtocol.Type value
341 */
342int8_t TCompactProtocol::getCompactType(int8_t ttype) {
343 return TTypeToCType[ttype];
344}
345
346//
347// Reading Methods
348//
349
350/**
351 * Read a message header.
352 */
353uint32_t TCompactProtocol::readMessageBegin(std::string& name,
354 TMessageType& messageType,
355 int32_t& seqid) {
356 uint32_t rsize = 0;
357 int8_t protocolId;
358 int8_t versionAndType;
359 int8_t version;
360
361 rsize += readByte(protocolId);
362 if (protocolId != PROTOCOL_ID) {
363 throw TProtocolException(TProtocolException::BAD_VERSION, "Bad protocol identifier");
364 }
365
366 rsize += readByte(versionAndType);
367 version = (int8_t)(versionAndType & VERSION_MASK);
368 if (version != VERSION_N) {
369 throw TProtocolException(TProtocolException::BAD_VERSION, "Bad protocol version");
370 }
371
372 messageType = (TMessageType)((versionAndType >> TYPE_SHIFT_AMOUNT) & 0x03);
373 rsize += readVarint32(seqid);
374 rsize += readString(name);
375
376 return rsize;
377}
378
379/**
380 * Read a struct begin. There's nothing on the wire for this, but it is our
381 * opportunity to push a new struct begin marker on the field stack.
382 */
383uint32_t TCompactProtocol::readStructBegin(std::string& name) {
384 name = "";
385 lastField_.push(lastFieldId_);
386 lastFieldId_ = 0;
387 return 0;
388}
389
390/**
391 * Doesn't actually consume any wire data, just removes the last field for
392 * this struct from the field stack.
393 */
394uint32_t TCompactProtocol::readStructEnd() {
395 lastFieldId_ = lastField_.top();
396 lastField_.pop();
397 return 0;
398}
399
400/**
401 * Read a field header off the wire.
402 */
403uint32_t TCompactProtocol::readFieldBegin(std::string& name,
404 TType& fieldType,
405 int16_t& fieldId) {
406 uint32_t rsize = 0;
407 int8_t byte;
408 int8_t type;
409
410 rsize += readByte(byte);
411 type = (byte & 0x0f);
412
413 // if it's a stop, then we can return immediately, as the struct is over.
414 if (type == T_STOP) {
415 fieldType = T_STOP;
416 fieldId = 0;
417 return rsize;
418 }
419
420 // mask off the 4 MSB of the type header. it could contain a field id delta.
421 int16_t modifier = (int16_t)(((uint8_t)byte & 0xf0) >> 4);
422 if (modifier == 0) {
423 // not a delta, look ahead for the zigzag varint field id.
424 rsize += readI16(fieldId);
425 } else {
426 fieldId = (int16_t)(lastFieldId_ + modifier);
427 }
428 fieldType = getTType(type);
429
430 // if this happens to be a boolean field, the value is encoded in the type
431 if (type == CT_BOOLEAN_TRUE || type == CT_BOOLEAN_FALSE) {
432 // save the boolean value in a special instance variable.
433 boolValue_.hasBoolValue = true;
434 boolValue_.boolValue = (type == CT_BOOLEAN_TRUE ? true : false);
435 }
436
437 // push the new field onto the field stack so we can keep the deltas going.
438 lastFieldId_ = fieldId;
439 return rsize;
440}
441
442/**
443 * Read a map header off the wire. If the size is zero, skip reading the key
444 * and value type. This means that 0-length maps will yield TMaps without the
445 * "correct" types.
446 */
447uint32_t TCompactProtocol::readMapBegin(TType& keyType,
448 TType& valType,
449 uint32_t& size) {
450 uint32_t rsize = 0;
451 int8_t kvType = 0;
452 int32_t msize = 0;
453
454 rsize += readVarint32(msize);
455 if (msize != 0)
456 rsize += readByte(kvType);
457
458 if (msize < 0) {
459 throw TProtocolException(TProtocolException::NEGATIVE_SIZE);
460 } else if (container_limit_ && msize > container_limit_) {
461 throw TProtocolException(TProtocolException::SIZE_LIMIT);
462 }
463
464 keyType = getTType((int8_t)((uint8_t)kvType >> 4));
465 valType = getTType((int8_t)((uint8_t)kvType & 0xf));
466 size = (uint32_t)msize;
467
468 return rsize;
469}
470
471/**
472 * Read a list header off the wire. If the list size is 0-14, the size will
473 * be packed into the element type header. If it's a longer list, the 4 MSB
474 * of the element type header will be 0xF, and a varint will follow with the
475 * true size.
476 */
477uint32_t TCompactProtocol::readListBegin(TType& elemType,
478 uint32_t& size) {
479 int8_t size_and_type;
480 uint32_t rsize = 0;
481 int32_t lsize;
482
483 rsize += readByte(size_and_type);
484
485 lsize = ((uint8_t)size_and_type >> 4) & 0x0f;
486 if (lsize == 15) {
487 rsize += readVarint32(lsize);
488 }
489
490 if (lsize < 0) {
491 throw TProtocolException(TProtocolException::NEGATIVE_SIZE);
492 } else if (container_limit_ && lsize > container_limit_) {
493 throw TProtocolException(TProtocolException::SIZE_LIMIT);
494 }
495
496 elemType = getTType((int8_t)(size_and_type & 0x0f));
497 size = (uint32_t)lsize;
498
499 return rsize;
500}
501
502/**
503 * Read a set header off the wire. If the set size is 0-14, the size will
504 * be packed into the element type header. If it's a longer set, the 4 MSB
505 * of the element type header will be 0xF, and a varint will follow with the
506 * true size.
507 */
508uint32_t TCompactProtocol::readSetBegin(TType& elemType,
509 uint32_t& size) {
510 return readListBegin(elemType, size);
511}
512
513/**
514 * Read a boolean off the wire. If this is a boolean field, the value should
515 * already have been read during readFieldBegin, so we'll just consume the
516 * pre-stored value. Otherwise, read a byte.
517 */
518uint32_t TCompactProtocol::readBool(bool& value) {
519 if (boolValue_.hasBoolValue == true) {
520 value = boolValue_.boolValue;
521 boolValue_.hasBoolValue = false;
522 return 0;
523 } else {
524 int8_t val;
525 readByte(val);
526 value = (val == CT_BOOLEAN_TRUE);
527 return 1;
528 }
529}
530
531/**
532 * Read a single byte off the wire. Nothing interesting here.
533 */
534uint32_t TCompactProtocol::readByte(int8_t& byte) {
535 uint8_t b[1];
536 trans_->readAll(b, 1);
537 byte = *(int8_t*)b;
538 return 1;
539}
540
541/**
542 * Read an i16 from the wire as a zigzag varint.
543 */
544uint32_t TCompactProtocol::readI16(int16_t& i16) {
545 int32_t value;
546 uint32_t rsize = readVarint32(value);
547 i16 = (int16_t)zigzagToI32(value);
548 return rsize;
549}
550
551/**
552 * Read an i32 from the wire as a zigzag varint.
553 */
554uint32_t TCompactProtocol::readI32(int32_t& i32) {
555 int32_t value;
556 uint32_t rsize = readVarint32(value);
557 i32 = zigzagToI32(value);
558 return rsize;
559}
560
561/**
562 * Read an i64 from the wire as a zigzag varint.
563 */
564uint32_t TCompactProtocol::readI64(int64_t& i64) {
565 int64_t value;
566 uint32_t rsize = readVarint64(value);
567 i64 = zigzagToI64(value);
568 return rsize;
569}
570
571/**
572 * No magic here - just read a double off the wire.
573 */
574uint32_t TCompactProtocol::readDouble(double& dub) {
575 BOOST_STATIC_ASSERT(sizeof(double) == sizeof(uint64_t));
576 BOOST_STATIC_ASSERT(std::numeric_limits<double>::is_iec559);
577
578 uint64_t bits;
579 uint8_t b[8];
580 trans_->readAll(b, 8);
581 bits = *(uint64_t*)b;
582 bits = letohll(bits);
583 dub = bitwise_cast<double>(bits);
584 return 8;
585}
586
587uint32_t TCompactProtocol::readString(std::string& str) {
588 return readBinary(str);
589}
590
591/**
592 * Read a byte[] from the wire.
593 */
594uint32_t TCompactProtocol::readBinary(std::string& str) {
595 int32_t rsize = 0;
596 int32_t size;
597
598 rsize += readVarint32(size);
599 // Catch empty string case
600 if (size == 0) {
601 str = "";
602 return rsize;
603 }
604
605 // Catch error cases
606 if (size < 0) {
607 throw TProtocolException(TProtocolException::NEGATIVE_SIZE);
608 }
609 if (string_limit_ > 0 && size > string_limit_) {
610 throw TProtocolException(TProtocolException::SIZE_LIMIT);
611 }
612
613 // Use the heap here to prevent stack overflow for v. large strings
614 if (size > string_buf_size_ || string_buf_ == NULL) {
615 void* new_string_buf = std::realloc(string_buf_, (uint32_t)size);
616 if (new_string_buf == NULL) {
617 throw TProtocolException(TProtocolException::UNKNOWN, "Out of memory in TCompactProtocol::readString");
618 }
619 string_buf_ = (uint8_t*)new_string_buf;
620 string_buf_size_ = size;
621 }
622 trans_->readAll(string_buf_, size);
623 str.assign((char*)string_buf_, size);
624
625 return rsize + (uint32_t)size;
626}
627
628/**
629 * Read an i32 from the wire as a varint. The MSB of each byte is set
630 * if there is another byte to follow. This can read up to 5 bytes.
631 */
632uint32_t TCompactProtocol::readVarint32(int32_t& i32) {
633 int64_t val;
634 uint32_t rsize = readVarint64(val);
635 i32 = (int32_t)val;
636 return rsize;
637}
638
639/**
640 * Read an i64 from the wire as a proper varint. The MSB of each byte is set
641 * if there is another byte to follow. This can read up to 10 bytes.
642 */
643uint32_t TCompactProtocol::readVarint64(int64_t& i64) {
644 uint32_t rsize = 0;
645 uint64_t val = 0;
646 int shift = 0;
647 uint8_t buf[10]; // 64 bits / (7 bits/byte) = 10 bytes.
648 uint32_t buf_size = sizeof(buf);
649 const uint8_t* borrowed = trans_->borrow(buf, &buf_size);
650
651 // Fast path.
652 if (borrowed != NULL) {
653 while (true) {
654 uint8_t byte = borrowed[rsize];
655 rsize++;
656 val |= (uint64_t)(byte & 0x7f) << shift;
657 shift += 7;
658 if (!(byte & 0x80)) {
659 i64 = val;
660 trans_->consume(rsize);
661 return rsize;
662 }
663 // Have to check for invalid data so we don't crash.
664 if (UNLIKELY(rsize == sizeof(buf))) {
665 throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes.");
666 }
667 }
668 }
669
670 // Slow path.
671 else {
672 while (true) {
673 uint8_t byte;
674 rsize += trans_->readAll(&byte, 1);
675 val |= (uint64_t)(byte & 0x7f) << shift;
676 shift += 7;
677 if (!(byte & 0x80)) {
678 i64 = val;
679 return rsize;
680 }
681 // Might as well check for invalid data on the slow path too.
682 if (UNLIKELY(rsize >= sizeof(buf))) {
683 throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes.");
684 }
685 }
686 }
687}
688
689/**
690 * Convert from zigzag int to int.
691 */
692int32_t TCompactProtocol::zigzagToI32(uint32_t n) {
693 return (n >> 1) ^ -(n & 1);
694}
695
696/**
697 * Convert from zigzag long to long.
698 */
699int64_t TCompactProtocol::zigzagToI64(uint64_t n) {
700 return (n >> 1) ^ -(n & 1);
701}
702
703TType TCompactProtocol::getTType(int8_t type) {
704 switch (type) {
705 case T_STOP:
706 return T_STOP;
707 case CT_BOOLEAN_FALSE:
708 case CT_BOOLEAN_TRUE:
709 return T_BOOL;
710 case CT_BYTE:
711 return T_BYTE;
712 case CT_I16:
713 return T_I16;
714 case CT_I32:
715 return T_I32;
716 case CT_I64:
717 return T_I64;
718 case CT_DOUBLE:
719 return T_DOUBLE;
720 case CT_BINARY:
721 return T_STRING;
722 case CT_LIST:
723 return T_LIST;
724 case CT_SET:
725 return T_SET;
726 case CT_MAP:
727 return T_MAP;
728 case CT_STRUCT:
729 return T_STRUCT;
730 default:
731 throw TException("don't know what type: " + type);
732 }
733 return T_STOP;
734}
735
736}}} // apache::thrift::protocol