blob: 136c16cb264670abf2ea67fe2d0c160b9f16a4c6 [file] [log] [blame]
David Reisse4d4ea02009-04-02 21:37:17 +00001/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#include "TCompactProtocol.h"
21
22#include <config.h>
23
24/*
25 * TCompactProtocol::i*ToZigzag depend on the fact that the right shift
26 * operator on a signed integer is an arithmetic (sign-extending) shift.
27 * If this is not the case, the current implementation will not work.
28 * If anyone encounters this error, we can try to figure out the best
29 * way to implement an arithmetic right shift on their platform.
30 */
31#if !defined(SIGNED_RIGHT_SHIFT_IS) || !defined(ARITHMETIC_RIGHT_SHIFT)
32# error "Unable to determine the behavior of a signed right shift"
33#endif
34#if SIGNED_RIGHT_SHIFT_IS != ARITHMETIC_RIGHT_SHIFT
35# error "TCompactProtocol currenly only works if a signed right shift is arithmetic"
36#endif
37
38#ifdef __GNUC__
39#define UNLIKELY(val) (__builtin_expect((val), 0))
40#else
41#define UNLIKELY(val) (val)
42#endif
43
44namespace apache { namespace thrift { namespace protocol {
45
46const int8_t TCompactProtocol::TTypeToCType[16] = {
47 CT_STOP, // T_STOP
48 0, // unused
49 CT_BOOLEAN_TRUE, // T_BOOL
50 CT_BYTE, // T_BYTE
51 CT_DOUBLE, // T_DOUBLE
52 0, // unused
53 CT_I16, // T_I16
54 0, // unused
55 CT_I32, // T_I32
56 0, // unused
57 CT_I64, // T_I64
58 CT_BINARY, // T_STRING
59 CT_STRUCT, // T_STRUCT
60 CT_MAP, // T_MAP
61 CT_SET, // T_SET
62 CT_LIST, // T_LIST
63 };
64
65
66uint32_t TCompactProtocol::writeMessageBegin(const std::string& name,
67 const TMessageType messageType,
68 const int32_t seqid) {
69 uint32_t wsize = 0;
70 wsize += writeByte(PROTOCOL_ID);
71 wsize += writeByte((VERSION_N & VERSION_MASK) | (((int32_t)messageType << TYPE_SHIFT_AMOUNT) & TYPE_MASK));
72 wsize += writeVarint32(seqid);
73 wsize += writeString(name);
74 return wsize;
75}
76
77/**
78 * Write a field header containing the field id and field type. If the
79 * difference between the current field id and the last one is small (< 15),
80 * then the field id will be encoded in the 4 MSB as a delta. Otherwise, the
81 * field id will follow the type header as a zigzag varint.
82 */
83uint32_t TCompactProtocol::writeFieldBegin(const char* name,
84 const TType fieldType,
85 const int16_t fieldId) {
86 if (fieldType == T_BOOL) {
87 booleanField_.name = name;
88 booleanField_.fieldType = fieldType;
89 booleanField_.fieldId = fieldId;
90 } else {
91 return writeFieldBeginInternal(name, fieldType, fieldId, -1);
92 }
93 return 0;
94}
95
96/**
97 * Write the STOP symbol so we know there are no more fields in this struct.
98 */
99uint32_t TCompactProtocol::writeFieldStop() {
100 return writeByte(T_STOP);
101}
102
103/**
104 * Write a struct begin. This doesn't actually put anything on the wire. We
105 * use it as an opportunity to put special placeholder markers on the field
106 * stack so we can get the field id deltas correct.
107 */
108uint32_t TCompactProtocol::writeStructBegin(const char* name) {
109 lastField_.push(lastFieldId_);
110 lastFieldId_ = 0;
111 return 0;
112}
113
114/**
115 * Write a struct end. This doesn't actually put anything on the wire. We use
116 * this as an opportunity to pop the last field from the current struct off
117 * of the field stack.
118 */
119uint32_t TCompactProtocol::writeStructEnd() {
120 lastFieldId_ = lastField_.top();
121 lastField_.pop();
122 return 0;
123}
124
125/**
126 * Write a List header.
127 */
128uint32_t TCompactProtocol::writeListBegin(const TType elemType,
129 const uint32_t size) {
130 return writeCollectionBegin(elemType, size);
131}
132
133/**
134 * Write a set header.
135 */
136uint32_t TCompactProtocol::writeSetBegin(const TType elemType,
137 const uint32_t size) {
138 return writeCollectionBegin(elemType, size);
139}
140
141/**
142 * Write a map header. If the map is empty, omit the key and value type
143 * headers, as we don't need any additional information to skip it.
144 */
145uint32_t TCompactProtocol::writeMapBegin(const TType keyType,
146 const TType valType,
147 const uint32_t size) {
148 uint32_t wsize = 0;
149
150 if (size == 0) {
151 wsize += writeByte(0);
152 } else {
153 wsize += writeVarint32(size);
154 wsize += writeByte(getCompactType(keyType) << 4 | getCompactType(valType));
155 }
156 return wsize;
157}
158
159/**
160 * Write a boolean value. Potentially, this could be a boolean field, in
161 * which case the field header info isn't written yet. If so, decide what the
162 * right type header is for the value and then write the field header.
163 * Otherwise, write a single byte.
164 */
165uint32_t TCompactProtocol::writeBool(const bool value) {
166 uint32_t wsize = 0;
167
168 if (booleanField_.name != NULL) {
169 // we haven't written the field header yet
170 wsize += writeFieldBeginInternal(booleanField_.name,
171 booleanField_.fieldType,
172 booleanField_.fieldId,
173 value ? CT_BOOLEAN_TRUE : CT_BOOLEAN_FALSE);
174 booleanField_.name = NULL;
175 } else {
176 // we're not part of a field, so just write the value
177 wsize += writeByte(value ? CT_BOOLEAN_TRUE : CT_BOOLEAN_FALSE);
178 }
179 return wsize;
180}
181
182uint32_t TCompactProtocol::writeByte(const int8_t byte) {
183 trans_->write((uint8_t*)&byte, 1);
184 return 1;
185}
186
187/**
188 * Write an i16 as a zigzag varint.
189 */
190uint32_t TCompactProtocol::writeI16(const int16_t i16) {
191 return writeVarint32(i32ToZigzag(i16));
192}
193
194/**
195 * Write an i32 as a zigzag varint.
196 */
197uint32_t TCompactProtocol::writeI32(const int32_t i32) {
198 return writeVarint32(i32ToZigzag(i32));
199}
200
201/**
202 * Write an i64 as a zigzag varint.
203 */
204uint32_t TCompactProtocol::writeI64(const int64_t i64) {
205 return writeVarint64(i64ToZigzag(i64));
206}
207
208/**
209 * Write a double to the wire as 8 bytes.
210 */
211uint32_t TCompactProtocol::writeDouble(const double dub) {
212 BOOST_STATIC_ASSERT(sizeof(double) == sizeof(uint64_t));
213 BOOST_STATIC_ASSERT(std::numeric_limits<double>::is_iec559);
214
215 uint64_t bits = bitwise_cast<uint64_t>(dub);
216 bits = htolell(bits);
217 trans_->write((uint8_t*)&bits, 8);
218 return 8;
219}
220
221/**
222 * Write a string to the wire with a varint size preceeding.
223 */
224uint32_t TCompactProtocol::writeString(const std::string& str) {
225 return writeBinary(str);
226}
227
228uint32_t TCompactProtocol::writeBinary(const std::string& str) {
229 uint32_t ssize = str.size();
230 uint32_t wsize = writeVarint32(ssize) + ssize;
231 trans_->write((uint8_t*)str.data(), ssize);
232 return wsize;
233}
234
235//
236// Internal Writing methods
237//
238
239/**
240 * The workhorse of writeFieldBegin. It has the option of doing a
241 * 'type override' of the type header. This is used specifically in the
242 * boolean field case.
243 */
244int32_t TCompactProtocol::writeFieldBeginInternal(const char* name,
245 const TType fieldType,
246 const int16_t fieldId,
247 int8_t typeOverride) {
248 uint32_t wsize = 0;
249
250 // if there's a type override, use that.
251 int8_t typeToWrite = (typeOverride == -1 ? getCompactType(fieldType) : typeOverride);
252
253 // check if we can use delta encoding for the field id
254 if (fieldId > lastFieldId_ && fieldId - lastFieldId_ <= 15) {
255 // write them together
256 wsize += writeByte((fieldId - lastFieldId_) << 4 | typeToWrite);
257 } else {
258 // write them separate
259 wsize += writeByte(typeToWrite);
260 wsize += writeI16(fieldId);
261 }
262
263 lastFieldId_ = fieldId;
264 return wsize;
265}
266
267/**
268 * Abstract method for writing the start of lists and sets. List and sets on
269 * the wire differ only by the type indicator.
270 */
271uint32_t TCompactProtocol::writeCollectionBegin(int8_t elemType, int32_t size) {
272 uint32_t wsize = 0;
273 if (size <= 14) {
274 wsize += writeByte(size << 4 | getCompactType(elemType));
275 } else {
276 wsize += writeByte(0xf0 | getCompactType(elemType));
277 wsize += writeVarint32(size);
278 }
279 return wsize;
280}
281
282/**
283 * Write an i32 as a varint. Results in 1-5 bytes on the wire.
284 */
285uint32_t TCompactProtocol::writeVarint32(uint32_t n) {
286 uint8_t buf[5];
287 uint32_t wsize = 0;
288
289 while (true) {
290 if ((n & ~0x7F) == 0) {
291 buf[wsize++] = (int8_t)n;
292 break;
293 } else {
294 buf[wsize++] = (int8_t)((n & 0x7F) | 0x80);
295 n >>= 7;
296 }
297 }
298 trans_->write(buf, wsize);
299 return wsize;
300}
301
302/**
303 * Write an i64 as a varint. Results in 1-10 bytes on the wire.
304 */
305uint32_t TCompactProtocol::writeVarint64(uint64_t n) {
306 uint8_t buf[10];
307 uint32_t wsize = 0;
308
309 while (true) {
310 if ((n & ~0x7FL) == 0) {
311 buf[wsize++] = (int8_t)n;
312 break;
313 } else {
314 buf[wsize++] = (int8_t)((n & 0x7F) | 0x80);
315 n >>= 7;
316 }
317 }
318 trans_->write(buf, wsize);
319 return wsize;
320}
321
322/**
323 * Convert l into a zigzag long. This allows negative numbers to be
324 * represented compactly as a varint.
325 */
326uint64_t TCompactProtocol::i64ToZigzag(const int64_t l) {
327 return (l << 1) ^ (l >> 63);
328}
329
330/**
331 * Convert n into a zigzag int. This allows negative numbers to be
332 * represented compactly as a varint.
333 */
334uint32_t TCompactProtocol::i32ToZigzag(const int32_t n) {
335 return (n << 1) ^ (n >> 31);
336}
337
338/**
339 * Given a TType value, find the appropriate TCompactProtocol.Type value
340 */
341int8_t TCompactProtocol::getCompactType(int8_t ttype) {
342 return TTypeToCType[ttype];
343}
344
345//
346// Reading Methods
347//
348
349/**
350 * Read a message header.
351 */
352uint32_t TCompactProtocol::readMessageBegin(std::string& name,
353 TMessageType& messageType,
354 int32_t& seqid) {
355 uint32_t rsize = 0;
356 int8_t protocolId;
357 int8_t versionAndType;
358 int8_t version;
359
360 rsize += readByte(protocolId);
361 if (protocolId != PROTOCOL_ID) {
362 throw TProtocolException(TProtocolException::BAD_VERSION, "Bad protocol identifier");
363 }
364
365 rsize += readByte(versionAndType);
366 version = (int8_t)(versionAndType & VERSION_MASK);
367 if (version != VERSION_N) {
368 throw TProtocolException(TProtocolException::BAD_VERSION, "Bad protocol version");
369 }
370
371 messageType = (TMessageType)((versionAndType >> TYPE_SHIFT_AMOUNT) & 0x03);
372 rsize += readVarint32(seqid);
373 rsize += readString(name);
374
375 return rsize;
376}
377
378/**
379 * Read a struct begin. There's nothing on the wire for this, but it is our
380 * opportunity to push a new struct begin marker on the field stack.
381 */
382uint32_t TCompactProtocol::readStructBegin(std::string& name) {
383 name = "";
384 lastField_.push(lastFieldId_);
385 lastFieldId_ = 0;
386 return 0;
387}
388
389/**
390 * Doesn't actually consume any wire data, just removes the last field for
391 * this struct from the field stack.
392 */
393uint32_t TCompactProtocol::readStructEnd() {
394 lastFieldId_ = lastField_.top();
395 lastField_.pop();
396 return 0;
397}
398
399/**
400 * Read a field header off the wire.
401 */
402uint32_t TCompactProtocol::readFieldBegin(std::string& name,
403 TType& fieldType,
404 int16_t& fieldId) {
405 uint32_t rsize = 0;
406 int8_t byte;
407 int8_t type;
408
409 rsize += readByte(byte);
410 type = (byte & 0x0f);
411
412 // if it's a stop, then we can return immediately, as the struct is over.
413 if (type == T_STOP) {
414 fieldType = T_STOP;
415 fieldId = 0;
416 return rsize;
417 }
418
419 // mask off the 4 MSB of the type header. it could contain a field id delta.
420 int16_t modifier = (int16_t)(((uint8_t)byte & 0xf0) >> 4);
421 if (modifier == 0) {
422 // not a delta, look ahead for the zigzag varint field id.
423 rsize += readI16(fieldId);
424 } else {
425 fieldId = (int16_t)(lastFieldId_ + modifier);
426 }
427 fieldType = getTType(type);
428
429 // if this happens to be a boolean field, the value is encoded in the type
430 if (type == CT_BOOLEAN_TRUE || type == CT_BOOLEAN_FALSE) {
431 // save the boolean value in a special instance variable.
432 boolValue_.hasBoolValue = true;
433 boolValue_.boolValue = (type == CT_BOOLEAN_TRUE ? true : false);
434 }
435
436 // push the new field onto the field stack so we can keep the deltas going.
437 lastFieldId_ = fieldId;
438 return rsize;
439}
440
441/**
442 * Read a map header off the wire. If the size is zero, skip reading the key
443 * and value type. This means that 0-length maps will yield TMaps without the
444 * "correct" types.
445 */
446uint32_t TCompactProtocol::readMapBegin(TType& keyType,
447 TType& valType,
448 uint32_t& size) {
449 uint32_t rsize = 0;
450 int8_t kvType = 0;
451 int32_t msize = 0;
452
453 rsize += readVarint32(msize);
454 if (msize != 0)
455 rsize += readByte(kvType);
456
457 if (msize < 0) {
458 throw TProtocolException(TProtocolException::NEGATIVE_SIZE);
459 } else if (container_limit_ && msize > container_limit_) {
460 throw TProtocolException(TProtocolException::SIZE_LIMIT);
461 }
462
463 keyType = getTType((int8_t)((uint8_t)kvType >> 4));
464 valType = getTType((int8_t)((uint8_t)kvType & 0xf));
465 size = (uint32_t)msize;
466
467 return rsize;
468}
469
470/**
471 * Read a list header off the wire. If the list size is 0-14, the size will
472 * be packed into the element type header. If it's a longer list, the 4 MSB
473 * of the element type header will be 0xF, and a varint will follow with the
474 * true size.
475 */
476uint32_t TCompactProtocol::readListBegin(TType& elemType,
477 uint32_t& size) {
478 int8_t size_and_type;
479 uint32_t rsize = 0;
480 int32_t lsize;
481
482 rsize += readByte(size_and_type);
483
484 lsize = ((uint8_t)size_and_type >> 4) & 0x0f;
485 if (lsize == 15) {
486 rsize += readVarint32(lsize);
487 }
488
489 if (lsize < 0) {
490 throw TProtocolException(TProtocolException::NEGATIVE_SIZE);
491 } else if (container_limit_ && lsize > container_limit_) {
492 throw TProtocolException(TProtocolException::SIZE_LIMIT);
493 }
494
495 elemType = getTType((int8_t)(size_and_type & 0x0f));
496 size = (uint32_t)lsize;
497
498 return rsize;
499}
500
501/**
502 * Read a set header off the wire. If the set size is 0-14, the size will
503 * be packed into the element type header. If it's a longer set, the 4 MSB
504 * of the element type header will be 0xF, and a varint will follow with the
505 * true size.
506 */
507uint32_t TCompactProtocol::readSetBegin(TType& elemType,
508 uint32_t& size) {
509 return readListBegin(elemType, size);
510}
511
512/**
513 * Read a boolean off the wire. If this is a boolean field, the value should
514 * already have been read during readFieldBegin, so we'll just consume the
515 * pre-stored value. Otherwise, read a byte.
516 */
517uint32_t TCompactProtocol::readBool(bool& value) {
518 if (boolValue_.hasBoolValue == true) {
519 value = boolValue_.boolValue;
520 boolValue_.hasBoolValue = false;
521 return 0;
522 } else {
523 int8_t val;
524 readByte(val);
525 value = (val == CT_BOOLEAN_TRUE);
526 return 1;
527 }
528}
529
530/**
531 * Read a single byte off the wire. Nothing interesting here.
532 */
533uint32_t TCompactProtocol::readByte(int8_t& byte) {
534 uint8_t b[1];
535 trans_->readAll(b, 1);
536 byte = *(int8_t*)b;
537 return 1;
538}
539
540/**
541 * Read an i16 from the wire as a zigzag varint.
542 */
543uint32_t TCompactProtocol::readI16(int16_t& i16) {
544 int32_t value;
545 uint32_t rsize = readVarint32(value);
546 i16 = (int16_t)zigzagToI32(value);
547 return rsize;
548}
549
550/**
551 * Read an i32 from the wire as a zigzag varint.
552 */
553uint32_t TCompactProtocol::readI32(int32_t& i32) {
554 int32_t value;
555 uint32_t rsize = readVarint32(value);
556 i32 = zigzagToI32(value);
557 return rsize;
558}
559
560/**
561 * Read an i64 from the wire as a zigzag varint.
562 */
563uint32_t TCompactProtocol::readI64(int64_t& i64) {
564 int64_t value;
565 uint32_t rsize = readVarint64(value);
566 i64 = zigzagToI64(value);
567 return rsize;
568}
569
570/**
571 * No magic here - just read a double off the wire.
572 */
573uint32_t TCompactProtocol::readDouble(double& dub) {
574 BOOST_STATIC_ASSERT(sizeof(double) == sizeof(uint64_t));
575 BOOST_STATIC_ASSERT(std::numeric_limits<double>::is_iec559);
576
577 uint64_t bits;
578 uint8_t b[8];
579 trans_->readAll(b, 8);
580 bits = *(uint64_t*)b;
581 bits = letohll(bits);
582 dub = bitwise_cast<double>(bits);
583 return 8;
584}
585
586uint32_t TCompactProtocol::readString(std::string& str) {
587 return readBinary(str);
588}
589
590/**
591 * Read a byte[] from the wire.
592 */
593uint32_t TCompactProtocol::readBinary(std::string& str) {
594 int32_t rsize = 0;
595 int32_t size;
596
597 rsize += readVarint32(size);
598 // Catch empty string case
599 if (size == 0) {
600 str = "";
601 return rsize;
602 }
603
604 // Catch error cases
605 if (size < 0) {
606 throw TProtocolException(TProtocolException::NEGATIVE_SIZE);
607 }
608 if (string_limit_ > 0 && size > string_limit_) {
609 throw TProtocolException(TProtocolException::SIZE_LIMIT);
610 }
611
612 // Use the heap here to prevent stack overflow for v. large strings
613 if (size > string_buf_size_ || string_buf_ == NULL) {
614 void* new_string_buf = std::realloc(string_buf_, (uint32_t)size);
615 if (new_string_buf == NULL) {
616 throw TProtocolException(TProtocolException::UNKNOWN, "Out of memory in TCompactProtocol::readString");
617 }
618 string_buf_ = (uint8_t*)new_string_buf;
619 string_buf_size_ = size;
620 }
621 trans_->readAll(string_buf_, size);
622 str.assign((char*)string_buf_, size);
623
624 return rsize + (uint32_t)size;
625}
626
627/**
628 * Read an i32 from the wire as a varint. The MSB of each byte is set
629 * if there is another byte to follow. This can read up to 5 bytes.
630 */
631uint32_t TCompactProtocol::readVarint32(int32_t& i32) {
632 int64_t val;
633 uint32_t rsize = readVarint64(val);
634 i32 = (int32_t)val;
635 return rsize;
636}
637
638/**
639 * Read an i64 from the wire as a proper varint. The MSB of each byte is set
640 * if there is another byte to follow. This can read up to 10 bytes.
641 */
642uint32_t TCompactProtocol::readVarint64(int64_t& i64) {
643 uint32_t rsize = 0;
644 uint64_t val = 0;
645 int shift = 0;
646 uint8_t buf[10]; // 64 bits / (7 bits/byte) = 10 bytes.
647 uint32_t buf_size = sizeof(buf);
648 const uint8_t* borrowed = trans_->borrow(buf, &buf_size);
649
650 // Fast path.
651 if (borrowed != NULL) {
652 while (true) {
653 uint8_t byte = borrowed[rsize];
654 rsize++;
655 val |= (uint64_t)(byte & 0x7f) << shift;
656 shift += 7;
657 if (!(byte & 0x80)) {
658 i64 = val;
659 trans_->consume(rsize);
660 return rsize;
661 }
662 // Have to check for invalid data so we don't crash.
663 if (UNLIKELY(rsize == sizeof(buf))) {
664 throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes.");
665 }
666 }
667 }
668
669 // Slow path.
670 else {
671 while (true) {
672 uint8_t byte;
673 rsize += trans_->readAll(&byte, 1);
674 val |= (uint64_t)(byte & 0x7f) << shift;
675 shift += 7;
676 if (!(byte & 0x80)) {
677 i64 = val;
678 return rsize;
679 }
680 // Might as well check for invalid data on the slow path too.
681 if (UNLIKELY(rsize >= sizeof(buf))) {
682 throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes.");
683 }
684 }
685 }
686}
687
688/**
689 * Convert from zigzag int to int.
690 */
691int32_t TCompactProtocol::zigzagToI32(uint32_t n) {
692 return (n >> 1) ^ -(n & 1);
693}
694
695/**
696 * Convert from zigzag long to long.
697 */
698int64_t TCompactProtocol::zigzagToI64(uint64_t n) {
699 return (n >> 1) ^ -(n & 1);
700}
701
702TType TCompactProtocol::getTType(int8_t type) {
703 switch (type) {
704 case T_STOP:
705 return T_STOP;
706 case CT_BOOLEAN_FALSE:
707 case CT_BOOLEAN_TRUE:
708 return T_BOOL;
709 case CT_BYTE:
710 return T_BYTE;
711 case CT_I16:
712 return T_I16;
713 case CT_I32:
714 return T_I32;
715 case CT_I64:
716 return T_I64;
717 case CT_DOUBLE:
718 return T_DOUBLE;
719 case CT_BINARY:
720 return T_STRING;
721 case CT_LIST:
722 return T_LIST;
723 case CT_SET:
724 return T_SET;
725 case CT_MAP:
726 return T_MAP;
727 case CT_STRUCT:
728 return T_STRUCT;
729 default:
730 throw TException("don't know what type: " + type);
731 }
732 return T_STOP;
733}
734
735}}} // apache::thrift::protocol