Thrift: Clean up and test TDenseProtocol
Summary:
- TDenseProtocol now includes a part of the struct fingerprint in
the serialized message, to protect from unserialzing trash.
- A lot of cleanups and commenting for TDenseProtocol.
- A lot of test cases for same.
Reviewed By: mcslee
Test Plan: test/DenseProtoTest.cpp
Revert Plan: ok
git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@665257 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/cpp/src/generate/t_cpp_generator.cc b/compiler/cpp/src/generate/t_cpp_generator.cc
index 4a6befd..6de7452 100644
--- a/compiler/cpp/src/generate/t_cpp_generator.cc
+++ b/compiler/cpp/src/generate/t_cpp_generator.cc
@@ -361,6 +361,7 @@
generate_struct_fingerprint(f_types_impl_, tstruct, true);
generate_local_reflection(f_types_, tstruct, false);
generate_local_reflection(f_types_impl_, tstruct, true);
+ generate_local_reflection_pointer(f_types_impl_, tstruct);
generate_struct_reader(f_types_impl_, tstruct);
generate_struct_writer(f_types_impl_, tstruct);
}
@@ -576,7 +577,7 @@
indent() << stat << "char* " << nspace
<< "ascii_fingerprint" << comment << "= \"" <<
tstruct->get_ascii_fingerprint() << "\";" << endl <<
- indent() << stat << "char " << nspace <<
+ indent() << stat << "uint8_t " << nspace <<
"binary_fingerprint[" << t_type::fingerprint_len << "]" << comment << "= {";
char* comma = "";
for (int i = 0; i < t_type::fingerprint_len; i++) {
@@ -681,7 +682,7 @@
if (ttype->is_struct()) {
out << "," << endl <<
- indent() << ((t_struct*)ttype)->get_members().size() << "," << endl <<
+ indent() << type_name(ttype) << "::binary_fingerprint," << endl <<
indent() << local_reflection_name("metas", ttype) << "," << endl <<
indent() << local_reflection_name("specs", ttype);
} else if (ttype->is_list()) {
@@ -701,16 +702,22 @@
out << ");" << endl << endl;
indent_down();
+}
- // If this is a struct and we are in the implementaion file,
- // also set the class's static pointer to its reflection.
- if (ttype->is_struct() && is_definition) {
- indent(out) <<
- "facebook::thrift::reflection::local::TypeSpec* " <<
- ttype->get_name() << "::local_reflection = " << endl <<
- indent() << " &" << local_reflection_name("typespec", ttype) << ";" <<
- endl << endl;
+/**
+ * Writes the structure's static pointer to its local reflection typespec
+ * into the implementation file.
+ */
+void t_cpp_generator::generate_local_reflection_pointer(std::ofstream& out,
+ t_type* ttype) {
+ if (!gen_dense_) {
+ return;
}
+ indent(out) <<
+ "facebook::thrift::reflection::local::TypeSpec* " <<
+ ttype->get_name() << "::local_reflection = " << endl <<
+ indent() << " &" << local_reflection_name("typespec", ttype) << ";" <<
+ endl << endl;
}
/**
diff --git a/compiler/cpp/src/generate/t_cpp_generator.h b/compiler/cpp/src/generate/t_cpp_generator.h
index 7054990..008140a 100644
--- a/compiler/cpp/src/generate/t_cpp_generator.h
+++ b/compiler/cpp/src/generate/t_cpp_generator.h
@@ -149,8 +149,9 @@
std::string type_to_enum(t_type* ttype);
std::string local_reflection_name(const char*, t_type* ttype);
- // This handles checking gen_dense_ and checking for duplicates.
+ // These handles checking gen_dense_ and checking for duplicates.
void generate_local_reflection(std::ofstream& out, t_type* ttype, bool is_definition);
+ void generate_local_reflection_pointer(std::ofstream& out, t_type* ttype);
bool is_complex_type(t_type* ttype) {
ttype = get_true_type(ttype);
diff --git a/lib/cpp/src/TReflectionLocal.h b/lib/cpp/src/TReflectionLocal.h
index df6e706..3fe7c63 100644
--- a/lib/cpp/src/TReflectionLocal.h
+++ b/lib/cpp/src/TReflectionLocal.h
@@ -8,18 +8,25 @@
#define _THRIFT_TREFLECTIONLOCAL_H_ 1
#include <stdint.h>
+#include <cstring>
#include <protocol/TProtocol.h>
+/**
+ * Local Reflection is a blanket term referring to the the structure
+ * and generation of this particular representation of Thrift types.
+ * (It is called local because it cannot be serialized by Thrift).
+ *
+ * @author David Reiss <dreiss@facebook.com>
+ */
+
namespace facebook { namespace thrift { namespace reflection { namespace local {
using facebook::thrift::protocol::TType;
-/**
- * A local reflection is a representation of a Thrift structure.
- * (It is called local because it cannot be serialized by Thrift).
- *
- * @author David Reiss <dreiss@facebook.com>
- */
+// We include this many bytes of the structure's fingerprint when serializing
+// a top-level structure. Long enough to make collisions unlikely, short
+// enough to not significantly affect the amount of memory used.
+const int FP_PREFIX_LEN = 4;
struct FieldMeta {
int16_t tag;
@@ -27,16 +34,15 @@
};
struct TypeSpec {
+ TType ttype;
+ uint8_t fp_prefix[FP_PREFIX_LEN];
+
// Use an anonymous union here so we can fit two TypeSpecs in one cache line.
union {
struct {
// Use parallel arrays here for denser packing (of the arrays).
FieldMeta* metas;
TypeSpec** specs;
- // n_fields is only used for debugging, but it should only add
- // a minimimal amount to the effective size of this structure
- // because of alignment restrictions.
- int n_fields;
} tstruct;
struct {
TypeSpec *subtype1;
@@ -44,21 +50,21 @@
} tcontainer;
};
- // Put this at the end so the 32-bit enum can be crammed up next to the
- // 32-bit int (n_fields).
- TType ttype;
-
-
// Static initialization of unions isn't really possible,
// so take the plunge and use constructors.
// Hopefully they'll be evaluated at compile time.
- TypeSpec(TType ttype) : ttype(ttype) {}
+ TypeSpec(TType ttype) : ttype(ttype) {
+ std::memset(fp_prefix, 0, FP_PREFIX_LEN);
+ }
- TypeSpec(TType ttype, int n_fields, FieldMeta* metas, TypeSpec** specs) :
+ TypeSpec(TType ttype,
+ uint8_t* fingerprint,
+ FieldMeta* metas,
+ TypeSpec** specs) :
ttype(ttype)
{
- tstruct.n_fields = n_fields;
+ std::memcpy(fp_prefix, fingerprint, FP_PREFIX_LEN);
tstruct.metas = metas;
tstruct.specs = specs;
}
@@ -66,6 +72,7 @@
TypeSpec(TType ttype, TypeSpec* subtype1, TypeSpec* subtype2) :
ttype(ttype)
{
+ std::memset(fp_prefix, 0, FP_PREFIX_LEN);
tcontainer.subtype1 = subtype1;
tcontainer.subtype2 = subtype2;
}
diff --git a/lib/cpp/src/protocol/TDenseProtocol.cpp b/lib/cpp/src/protocol/TDenseProtocol.cpp
index 99117d6..e79d4f1 100644
--- a/lib/cpp/src/protocol/TDenseProtocol.cpp
+++ b/lib/cpp/src/protocol/TDenseProtocol.cpp
@@ -4,20 +4,95 @@
// See accompanying file LICENSE or visit the Thrift site at:
// http://developers.facebook.com/thrift/
+/*
+
+IMPLEMENTATION DETAILS
+
+TDenseProtocol was designed to have a smaller serialized form than
+TBinaryProtocol. This is accomplished using two techniques. The first is
+variable-length integer encoding. We use the same technique that the Standard
+MIDI File format uses for "variable-length quantities"
+(http://en.wikipedia.org/wiki/Variable-length_quantity).
+All integers (including i16, but not byte) are first cast to uint64_t,
+then written out as variable-length quantities. This has the unfortunate side
+effect that all negative numbers require 10 bytes, but negative numbers tend
+to be far less common than positive ones.
+
+The second technique eliminating the field ids used by TBinaryProtocol. This
+decision required support from the Thrift compiler and also sacrifices some of
+the backward and forward compatibility of TBinaryProtocol.
+
+We considered implementing this technique by generating separate readers and
+writers for the dense protocol (this is how Pillar, Thrift's predecessor,
+worked), but this idea had a few problems:
+- Our abstractions go out the window.
+- We would have to maintain a second code generator.
+- Preserving compatibility with old versions of the structures would be a
+ nightmare.
+
+Therefore, we chose an alternate implementation that stored the description of
+the data neither in the data itself (like TBinaryProtocol) nor in the
+serialization code (like Pillar), but instead in a separate data structure,
+called a TypeSpec. TypeSpecs are generated by the Thrift compiler
+(specifically in the t_cpp_generator), and their structure should be
+documented there (TODO(dreiss): s/should be/is/).
+
+We maintain a stack of TypeSpecs within the protocol so it knows where the
+generated code is in the reading/writing process. For example, if we are
+writing an i32 contained in a struct bar, contained in a struct foo, then the
+stack would look like: TOP , i32 , struct bar , struct foo , BOTTOM.
+The following invariant: whenever we are about to read/write an object
+(structBegin, containerBegin, or a scalar), the TypeSpec on the top of the
+stack must match the type being read/written. The main reasons that this
+invariant must be maintained is that if we ever start reading a structure, we
+must have its exact TypeSpec in order to pass the right tags to the
+deserializer.
+
+We use the following strategies for maintaining this invariant:
+
+- For structures, we have a separate stack of indexes, one for each structure
+ on the TypeSpec stack. These are indexes into the list of fields in the
+ structure's TypeSpec. When we {read,write}FieldBegin, we push on the
+ TypeSpec for the field.
+- When we begin writing a list or set, we push on the TypeSpec for the
+ element type.
+- For maps, we have a separate stack of booleans, one for each map on the
+ TypeSpec stack. The boolean is true if we are writing the key for that
+ map, and false if we are writing the value. Maps are the trickiest case
+ because the generated code does not call any protocol method between
+ the key and the value. As a result, we potentially have to switch
+ between map key state and map value state after reading/writing any object.
+- This job is handled by the stateTransition method. It is called after
+ reading/writing every object. It pops the current TypeSpec off the stack,
+ then optionally pushes a new one on, depending on what the next TypeSpec is.
+ If it is a struct, the job is left to the next writeFieldBegin. If it is a
+ set or list, the just-popped typespec is pushed back on. If it is a map,
+ the top of the key/value stack is toggled, and the appropriate TypeSpec
+ is pushed.
+
+Optional fields are a little tricky also. We write a zero byte if they are
+absent and prefix them with an 0x01 byte if they are present
+*/
+
#define __STDC_LIMIT_MACROS
#include <stdint.h>
#include "TDenseProtocol.h"
#include "TReflectionLocal.h"
-// XXX for debugging (duh)
+// Leaving this on for now. Disabling it will turn off asserts, which should
+// give a performance boost. When we have *really* thorough test cases,
+// we should drop this.
#define DEBUG_TDENSEPROTOCOL
-// XXX for development.
-#define TDENSE_PROTOCOL_MEASURE_VLI
-
-// The XXX above does not apply to this.
+// NOTE: Assertions should *only* be used to detect bugs in code,
+// either in TDenseProtocol itself, or in code using it.
+// (For example, using the wrong TypeSpec.)
+// Invalid data should NEVER cause an assertion failure,
+// no matter how grossly corrupted, nor how ingeniously crafted.
#ifdef DEBUG_TDENSEPROTOCOL
#undef NDEBUG
+#else
+#define NDEBUG
#endif
#include <cassert>
@@ -31,6 +106,9 @@
namespace facebook { namespace thrift { namespace protocol {
+const int TDenseProtocol::FP_PREFIX_LEN =
+ facebook::thrift::reflection::local::FP_PREFIX_LEN;
+
// Top TypeSpec. TypeSpec of the structure being encoded.
#define TTS (ts_stack_.back()) // type = TypeSpec*
// InDeX. Index into TTS of the current/next field to encode.
@@ -44,15 +122,25 @@
#define ST2 (TTS->tcontainer.subtype2)
+/**
+ * Checks that @c ttype is indeed the ttype that we should be writing,
+ * according to our typespec. Aborts if the test fails and debugging in on.
+ */
inline void TDenseProtocol::checkTType(const TType ttype) {
assert(!ts_stack_.empty());
assert(TTS->ttype == ttype);
}
+/**
+ * Makes sure that the TypeSpec stack is correct for the next object.
+ * See top-of-file comments.
+ */
inline void TDenseProtocol::stateTransition() {
TypeSpec* old_tts = ts_stack_.back();
ts_stack_.pop_back();
+ // If this is the end of the top-level write, we should have just popped
+ // the TypeSpec passed to the constructor.
if (ts_stack_.empty()) {
assert(old_tts = type_spec_);
return;
@@ -83,6 +171,84 @@
}
}
+
+/*
+ * Variable-length quantity functions.
+ */
+
+inline uint32_t TDenseProtocol::vlqRead(uint64_t& vlq) {
+ uint32_t used = 0;
+ uint64_t val = 0;
+ uint8_t buf[10]; // 64 bits / (7 bits/byte) = 10 bytes.
+ bool borrowed = trans_->borrow(buf, sizeof(buf));
+
+ // Fast path. TODO(dreiss): Make it faster.
+ if (borrowed) {
+ while (true) {
+ uint8_t byte = buf[used];
+ used++;
+ val = (val << 7) | (byte & 0x7f);
+ if (!(byte & 0x80)) {
+ vlq = val;
+ trans_->consume(used);
+ return used;
+ }
+ // Have to check for invalid data so we don't crash.
+ if (UNLIKELY(used == sizeof(buf))) {
+ resetState();
+ throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes.");
+ }
+ }
+ }
+
+ // Slow path.
+ else {
+ while (true) {
+ uint8_t byte;
+ used += trans_->readAll(&byte, 1);
+ val = (val << 7) | (byte & 0x7f);
+ if (!(byte & 0x80)) {
+ vlq = val;
+ return used;
+ }
+ // Might as well check for invalid data on the slow path too.
+ if (UNLIKELY(used >= sizeof(buf))) {
+ resetState();
+ throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes.");
+ }
+ }
+ }
+}
+
+inline uint32_t TDenseProtocol::vlqWrite(uint64_t vlq) {
+ uint8_t buf[10]; // 64 bits / (7 bits/byte) = 10 bytes.
+ int32_t pos = sizeof(buf) - 1;
+
+ // Write the thing from back to front.
+ buf[pos] = vlq & 0x7f;
+ vlq >>= 7;
+ pos--;
+
+ while (vlq > 0) {
+ assert(pos >= 0);
+ buf[pos] = (vlq | 0x80);
+ vlq >>= 7;
+ pos--;
+ }
+
+ // Back up one step before writing.
+ pos++;
+
+ trans_->write(buf+pos, sizeof(buf) - pos);
+ return sizeof(buf) - pos;
+}
+
+
+
+/*
+ * Writing functions.
+ */
+
uint32_t TDenseProtocol::writeMessageBegin(const std::string& name,
const TMessageType messageType,
const int32_t seqid) {
@@ -100,16 +266,27 @@
return 0;
}
-// Also implements readStructBegin.
uint32_t TDenseProtocol::writeStructBegin(const string& name) {
+ uint32_t xfer = 0;
+
+ // The TypeSpec stack should be empty if this is the top-level read/write.
+ // If it is, we push the TypeSpec passed to the constructor.
if (ts_stack_.empty()) {
+ assert(standalone_);
+
if (type_spec_ == NULL) {
+ resetState();
throw TApplicationException("TDenseProtocol: No type specified.");
} else {
+ assert(type_spec_->ttype == T_STRUCT);
ts_stack_.push_back(type_spec_);
+ // Write out a prefix of the structure fingerprint.
+ trans_->write(type_spec_->fp_prefix, FP_PREFIX_LEN);
+ xfer += FP_PREFIX_LEN;
}
}
+ // We need a new field index for this structure.
idx_stack_.push_back(0);
return 0;
}
@@ -125,11 +302,14 @@
const int16_t fieldId) {
uint32_t xfer = 0;
+ // Skip over optional fields.
while (FMT.tag != fieldId) {
// TODO(dreiss): Old meta here.
assert(FTS->ttype != T_STOP);
assert(FMT.is_optional);
+ // Write a zero byte so the reader can skip it.
xfer += subWriteBool(false);
+ // And advance to the next field.
IDX++;
}
@@ -141,20 +321,24 @@
xfer += 1;
}
- // OMG I'm so gross. XXX
- if (FTS->ttype != T_STOP) {
+ // writeFieldStop shares all lot of logic up to this point.
+ // Instead of replicating it all, we just call this method from that one
+ // and use a gross special case here.
+ if (UNLIKELY(FTS->ttype != T_STOP)) {
+ // For normal fields, push the TypeSpec that we're about to use.
ts_stack_.push_back(FTS);
}
return xfer;
}
uint32_t TDenseProtocol::writeFieldEnd() {
+ // Just move on to the next field.
IDX++;
return 0;
}
uint32_t TDenseProtocol::writeFieldStop() {
- return writeFieldBegin("", T_STOP, 0);
+ return TDenseProtocol::writeFieldBegin("", T_STOP, 0);
}
uint32_t TDenseProtocol::writeMapBegin(const TType keyType,
@@ -172,6 +356,8 @@
}
uint32_t TDenseProtocol::writeMapEnd() {
+ // Pop off the value type, as well as our entry in the map key/value stack.
+ // stateTransition takes care of popping off our TypeSpec.
ts_stack_.pop_back();
mkv_stack_.pop_back();
stateTransition();
@@ -188,6 +374,7 @@
}
uint32_t TDenseProtocol::writeListEnd() {
+ // Pop off the element type. stateTransition takes care of popping off ours.
ts_stack_.pop_back();
stateTransition();
return 0;
@@ -203,6 +390,7 @@
}
uint32_t TDenseProtocol::writeSetEnd() {
+ // Pop off the element type. stateTransition takes care of popping off ours.
ts_stack_.pop_back();
stateTransition();
return 0;
@@ -223,46 +411,19 @@
uint32_t TDenseProtocol::writeI16(const int16_t i16) {
checkTType(T_I16);
stateTransition();
-
- uint32_t rv = vliWrite(i16);
-#ifdef TDENSE_PROTOCOL_MEASURE_VLI
- vli_save_16 += 2 - rv;
- if (i16 < 0) {
- negs++;
- }
-#endif
-
- return rv;
+ return vlqWrite(i16);
}
uint32_t TDenseProtocol::writeI32(const int32_t i32) {
checkTType(T_I32);
stateTransition();
-
- uint32_t rv = vliWrite(i32);
-#ifdef TDENSE_PROTOCOL_MEASURE_VLI
- vli_save_32 += 4 - rv;
- if (i32 < 0) {
- negs++;
- }
-#endif
-
- return rv;
+ return vlqWrite(i32);
}
uint32_t TDenseProtocol::writeI64(const int64_t i64) {
checkTType(T_I64);
stateTransition();
-
- uint32_t rv = vliWrite(i64);
-#ifdef TDENSE_PROTOCOL_MEASURE_VLI
- vli_save_64 += 8 - rv;
- if (i64 < 0) {
- negs++;
- }
-#endif
-
- return rv;
+ return vlqWrite(i64);
}
uint32_t TDenseProtocol::writeDouble(const double dub) {
@@ -278,14 +439,7 @@
}
inline uint32_t TDenseProtocol::subWriteI32(const int32_t i32) {
- uint32_t rv = vliWrite(i32);
-#ifdef TDENSE_PROTOCOL_MEASURE_VLI
- vli_save_sub += 4 - rv;
- if (i32 < 0) {
- negs++;
- }
-#endif
- return rv;
+ return vlqWrite(i32);
}
uint32_t TDenseProtocol::subWriteString(const std::string& str) {
@@ -297,73 +451,13 @@
return xfer + size;
}
-inline uint32_t TDenseProtocol::vliRead(uint64_t& vli) {
- uint32_t used = 0;
- uint64_t val = 0;
- uint8_t buf[10]; // 64 bits / (7 bits/byte) = 10 bytes.
- bool borrowed = trans_->borrow(buf, sizeof(buf));
- // Fast path. TODO(dreiss): Make it faster.
- if (borrowed) {
- while (true) {
- uint8_t byte = buf[used];
- used++;
- val = (val << 7) | (byte & 0x7f);
- if (!(byte & 0x80)) {
- vli = val;
- trans_->consume(used);
- return used;
- }
- // Have to check for invalid data so we don't crash.
- if (UNLIKELY(used == sizeof(buf))) {
- throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes.");
- }
- }
- }
- // Slow path.
- else {
- while (true) {
- uint8_t byte;
- used += trans_->readAll(&byte, 1);
- val = (val << 7) | (byte & 0x7f);
- if (!(byte & 0x80)) {
- vli = val;
- return used;
- }
- // Might as well check for invalid data on the slow path too.
- if (UNLIKELY(used >= sizeof(buf))) {
- throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes.");
- }
- }
- }
-}
-
-inline uint32_t TDenseProtocol::vliWrite(uint64_t vli) {
- uint8_t buf[10]; // 64 bits / (7 bits/byte) = 10 bytes.
- int32_t pos = sizeof(buf) - 1;
-
- // Write the thing from back to front.
- buf[pos] = vli & 0x7f;
- vli >>= 7;
- pos--;
-
- while (vli > 0) {
- assert(pos >= 0);
- buf[pos] = (vli | 0x80);
- vli >>= 7;
- pos--;
- }
-
- // Back up one step before writing.
- pos++;
-
- trans_->write(buf+pos, sizeof(buf) - pos);
- return sizeof(buf) - pos;
-}
-
-/**
+/*
* Reading functions
+ *
+ * These have a lot of the same logic as the writing functions, so if
+ * something is confusing, look for comments in the corresponding writer.
*/
uint32_t TDenseProtocol::readMessageBegin(std::string& name,
@@ -395,8 +489,32 @@
}
uint32_t TDenseProtocol::readStructBegin(string& name) {
- // TODO(dreiss): Any chance this gets inlined?
- return TDenseProtocol::writeStructBegin(name);
+ uint32_t xfer = 0;
+
+ if (ts_stack_.empty()) {
+ assert(standalone_);
+
+ if (type_spec_ == NULL) {
+ resetState();
+ throw TApplicationException("TDenseProtocol: No type specified.");
+ } else {
+ assert(type_spec_->ttype == T_STRUCT);
+ ts_stack_.push_back(type_spec_);
+
+ // Check the fingerprint prefix.
+ uint8_t buf[FP_PREFIX_LEN];
+ xfer += trans_->read(buf, FP_PREFIX_LEN);
+ if (std::memcmp(buf, type_spec_->fp_prefix, FP_PREFIX_LEN) != 0) {
+ resetState();
+ throw TProtocolException(TProtocolException::INVALID_DATA,
+ "Fingerprint in data does not match type_spec.");
+ }
+ }
+ }
+
+ // We need a new field index for this structure.
+ idx_stack_.push_back(0);
+ return 0;
}
uint32_t TDenseProtocol::readStructEnd() {
@@ -410,6 +528,7 @@
int16_t& fieldId) {
uint32_t xfer = 0;
+ // For optional fields, check to see if they are there.
while (FMT.is_optional) {
bool is_present;
xfer += subReadBool(is_present);
@@ -419,10 +538,14 @@
IDX++;
}
+ // Once we hit a mandatory field, or an optional field that is present,
+ // we know that FMT and FTS point to the appropriate field.
+
fieldId = FMT.tag;
fieldType = FTS->ttype;
- // OMG I'm so gross. XXX
+ // Normally, we push the TypeSpec that we are about to read,
+ // but no reading is done for T_STOP.
if (FTS->ttype != T_STOP) {
ts_stack_.push_back(FTS);
}
@@ -443,8 +566,10 @@
int32_t sizei;
xfer += subReadI32(sizei);
if (sizei < 0) {
+ resetState();
throw TProtocolException(TProtocolException::NEGATIVE_SIZE);
} else if (container_limit_ && sizei > container_limit_) {
+ resetState();
throw TProtocolException(TProtocolException::SIZE_LIMIT);
}
size = (uint32_t)sizei;
@@ -473,8 +598,10 @@
int32_t sizei;
xfer += subReadI32(sizei);
if (sizei < 0) {
+ resetState();
throw TProtocolException(TProtocolException::NEGATIVE_SIZE);
} else if (container_limit_ && sizei > container_limit_) {
+ resetState();
throw TProtocolException(TProtocolException::SIZE_LIMIT);
}
size = (uint32_t)sizei;
@@ -500,8 +627,10 @@
int32_t sizei;
xfer += subReadI32(sizei);
if (sizei < 0) {
+ resetState();
throw TProtocolException(TProtocolException::NEGATIVE_SIZE);
} else if (container_limit_ && sizei > container_limit_) {
+ resetState();
throw TProtocolException(TProtocolException::SIZE_LIMIT);
}
size = (uint32_t)sizei;
@@ -535,9 +664,10 @@
checkTType(T_I16);
stateTransition();
uint64_t u64;
- uint32_t rv = vliRead(u64);
+ uint32_t rv = vlqRead(u64);
int64_t val = (int64_t)u64;
if (UNLIKELY(val > INT16_MAX || val < INT16_MIN)) {
+ resetState();
throw TProtocolException(TProtocolException::INVALID_DATA,
"i16 out of range.");
}
@@ -549,9 +679,10 @@
checkTType(T_I32);
stateTransition();
uint64_t u64;
- uint32_t rv = vliRead(u64);
+ uint32_t rv = vlqRead(u64);
int64_t val = (int64_t)u64;
if (UNLIKELY(val > INT32_MAX || val < INT32_MIN)) {
+ resetState();
throw TProtocolException(TProtocolException::INVALID_DATA,
"i32 out of range.");
}
@@ -563,9 +694,10 @@
checkTType(T_I64);
stateTransition();
uint64_t u64;
- uint32_t rv = vliRead(u64);
+ uint32_t rv = vlqRead(u64);
int64_t val = (int64_t)u64;
if (UNLIKELY(val > INT64_MAX || val < INT64_MIN)) {
+ resetState();
throw TProtocolException(TProtocolException::INVALID_DATA,
"i64 out of range.");
}
@@ -587,9 +719,10 @@
uint32_t TDenseProtocol::subReadI32(int32_t& i32) {
uint64_t u64;
- uint32_t rv = vliRead(u64);
+ uint32_t rv = vlqRead(u64);
int64_t val = (int64_t)u64;
if (UNLIKELY(val > INT32_MAX || val < INT32_MIN)) {
+ resetState();
throw TProtocolException(TProtocolException::INVALID_DATA,
"i32 out of range.");
}
diff --git a/lib/cpp/src/protocol/TDenseProtocol.h b/lib/cpp/src/protocol/TDenseProtocol.h
index edd5c88..0902e15 100644
--- a/lib/cpp/src/protocol/TDenseProtocol.h
+++ b/lib/cpp/src/protocol/TDenseProtocol.h
@@ -12,13 +12,30 @@
namespace facebook { namespace thrift { namespace protocol {
/**
- * The dense protocol is designed to use as little space as possible.
- *
* !!!WARNING!!!
* This class is still highly experimental. Incompatible changes
* WILL be made to it without notice. DO NOT USE IT YET unless
* you are coordinating your testing with the author.
*
+ * The dense protocol is designed to use as little space as possible.
+ *
+ * There are two types of dense protocol instances. Standalone instances
+ * are not used for RPC and just encoded and decode structures of
+ * a predetermined type. Non-standalone instances are used for RPC.
+ * Currently, only standalone instances exist.
+ *
+ * To use a standalone dense protocol object, you must set the type_spec
+ * property (either in the constructor, or with setTypeSpec) to the local
+ * reflection TypeSpec of the structures you will write to (or read from) the
+ * protocol instance.
+ *
+ * BEST PRACTICES:
+ * - Never use optional for primitives or containers.
+ * - Only use optional for structures if they are very big and very rarely set.
+ * - All integers are variable-length, so you can use i64 without bloating.
+ * - NEVER EVER change the struct definitions IN ANY WAY without either
+ * changing your cache keys or talking to dreiss.
+ *
* TODO(dreiss): New class write with old meta.
*
* We override all of TBinaryProtocol's methods.
@@ -35,34 +52,18 @@
public:
typedef facebook::thrift::reflection::local::TypeSpec TypeSpec;
+ static const int FP_PREFIX_LEN;
+ /**
+ * @param tran The transport to use.
+ * @param type_spec The TypeSpec of the structures using this protocol.
+ */
TDenseProtocol(boost::shared_ptr<TTransport> trans,
TypeSpec* type_spec = NULL) :
TBinaryProtocol(trans),
- type_spec_(type_spec) {
- vli_save_16 = 0;
- vli_save_32 = 0;
- vli_save_64 = 0;
- vli_save_sub = 0;
- negs = 0;
- }
-
- TDenseProtocol(boost::shared_ptr<TTransport> trans,
- TypeSpec* type_spec,
- int32_t string_limit,
- int32_t container_limit) :
- TBinaryProtocol(trans,
- string_limit,
- container_limit,
- true,
- true),
- type_spec_(type_spec) {
- vli_save_16 = 0;
- vli_save_32 = 0;
- vli_save_64 = 0;
- vli_save_sub = 0;
- negs = 0;
- }
+ type_spec_(type_spec),
+ standalone_(true)
+ {}
void setTypeSpec(TypeSpec* type_spec) {
type_spec_ = type_spec;
@@ -203,14 +204,24 @@
private:
+ // Implementation functions, documented in the .cpp.
inline void checkTType(const TType ttype);
inline void stateTransition();
// Read and write variable-length integers.
// Uses the same technique as the MIDI file format.
- inline uint32_t vliRead(uint64_t& vli);
- inline uint32_t vliWrite(uint64_t vli);
+ inline uint32_t vlqRead(uint64_t& vlq);
+ inline uint32_t vlqWrite(uint64_t vlq);
+ // Called before throwing an exception to make the object reusable.
+ void resetState() {
+ ts_stack_.clear();
+ idx_stack_.clear();
+ mkv_stack_.clear();
+ }
+
+ // TypeSpec of the top-level structure to write,
+ // for standalone protocol objects.
TypeSpec* type_spec_;
std::vector<TypeSpec*> ts_stack_; // TypeSpec stack.
@@ -218,13 +229,8 @@
std::vector<bool> mkv_stack_; // Map Key/Vlue stack.
// True = key, False = value.
- // XXX Remove these when wire format is finalized.
- public:
- int vli_save_16;
- int vli_save_32;
- int vli_save_64;
- int vli_save_sub;
- int negs;
+ // True iff this is a standalone instance (no RPC).
+ bool standalone_;
};
}}} // facebook::thrift::protocol
diff --git a/test/DenseProtoTest.cpp b/test/DenseProtoTest.cpp
index 0953c19..f50faca 100644
--- a/test/DenseProtoTest.cpp
+++ b/test/DenseProtoTest.cpp
@@ -1,8 +1,10 @@
/*
../compiler/cpp/thrift -cpp -dense DebugProtoTest.thrift
+../compiler/cpp/thrift -cpp -dense OptionalRequiredTest.thrift
g++ -Wall -g -I../lib/cpp/src -I/usr/local/include/boost-1_33_1 \
- DenseProtoTest.cpp gen-cpp/DebugProtoTest_types.cpp \
- ../lib/cpp/.libs/libthrift.a -o DenseProtoTest
+ gen-cpp/OptionalRequiredTest_types.cpp \
+ gen-cpp/DebugProtoTest_types.cpp \
+ DenseProtoTest.cpp ../lib/cpp/.libs/libthrift.a -o DenseProtoTest
./DenseProtoTest
*/
@@ -11,10 +13,13 @@
#define inline
#undef NDEBUG
+#include <cstdlib>
#include <cassert>
#include <iostream>
#include <cmath>
+#include <string>
#include "gen-cpp/DebugProtoTest_types.h"
+#include "gen-cpp/OptionalRequiredTest_types.h"
#include <protocol/TDenseProtocol.h>
#include <transport/TTransportUtils.h>
@@ -31,6 +36,7 @@
int main() {
+ using std::string;
using std::cout;
using std::endl;
using boost::shared_ptr;
@@ -123,13 +129,14 @@
// Let's test out the variable-length ints, shall we?
- uint64_t vli;
+ uint64_t vlq;
#define checkout(i, c) { \
buffer->resetBuffer(); \
- proto->vliWrite(i); \
+ proto->vlqWrite(i); \
+ proto->getTransport()->flush(); \
assert(my_memeq(buffer->getBufferAsString().data(), c, sizeof(c)-1)); \
- proto->vliRead(vli); \
- assert(vli == i); \
+ proto->vlqRead(vlq); \
+ assert(vlq == i); \
}
checkout(0x00000000, "\x00");
@@ -160,6 +167,199 @@
checkout(0x7FFFFFFFFFFFFFFFull, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F");
checkout(0xFFFFFFFFFFFFFFFFull, "\x81\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F");
+ // Test out the slow path with a TBufferedTransport.
+ shared_ptr<TBufferedTransport> buff_trans(new TBufferedTransport(buffer, 3));
+ proto.reset(new TDenseProtocol(buff_trans));
+ checkout(0x0000000100000000ull, "\x90\x80\x80\x80\x00");
+ checkout(0x0000000200000000ull, "\xA0\x80\x80\x80\x00");
+ checkout(0x0000000300000000ull, "\xB0\x80\x80\x80\x00");
+ checkout(0x0000000700000000ull, "\xF0\x80\x80\x80\x00");
+ checkout(0x00000007F0000000ull, "\xFF\x80\x80\x80\x00");
+ checkout(0x00000007FFFFFFFFull, "\xFF\xFF\xFF\xFF\x7F");
+ checkout(0x0000000800000000ull, "\x81\x80\x80\x80\x80\x00");
+ checkout(0x1FFFFFFFFFFFFFFFull, "\x9F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F");
+ checkout(0x7FFFFFFFFFFFFFFFull, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F");
+ checkout(0xFFFFFFFFFFFFFFFFull, "\x81\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F");
+
+ // Test optional stuff.
+ proto.reset(new TDenseProtocol(buffer));
+ proto->setTypeSpec(ManyOpt::local_reflection);
+ ManyOpt mo1, mo2, mo3, mo4, mo5, mo6;
+ mo1.opt1 = 923759347;
+ mo1.opt2 = 392749274;
+ mo1.opt3 = 395739402;
+ mo1.def4 = 294730928;
+ mo1.opt5 = 394309218;
+ mo1.opt6 = 832194723;
+ mo1.__isset.opt1 = true;
+ mo1.__isset.opt2 = true;
+ mo1.__isset.opt3 = true;
+ mo1.__isset.def4 = true;
+ mo1.__isset.opt5 = true;
+ mo1.__isset.opt6 = true;
+
+ mo1.write(proto.get());
+ mo2.read(proto.get());
+
+ assert(mo2.__isset.opt1 == true);
+ assert(mo2.__isset.opt2 == true);
+ assert(mo2.__isset.opt3 == true);
+ assert(mo2.__isset.def4 == true);
+ assert(mo2.__isset.opt5 == true);
+ assert(mo2.__isset.opt6 == true);
+
+ assert(mo1 == mo2);
+
+ mo1.__isset.opt1 = false;
+ mo1.__isset.opt3 = false;
+ mo1.__isset.opt5 = false;
+
+ mo1.write(proto.get());
+ mo3.read(proto.get());
+
+ assert(mo3.__isset.opt1 == false);
+ assert(mo3.__isset.opt2 == true);
+ assert(mo3.__isset.opt3 == false);
+ assert(mo3.__isset.def4 == true);
+ assert(mo3.__isset.opt5 == false);
+ assert(mo3.__isset.opt6 == true);
+
+ assert(mo1 == mo3);
+
+ mo1.__isset.opt1 = true;
+ mo1.__isset.opt3 = true;
+ mo1.__isset.opt5 = true;
+ mo1.__isset.opt2 = false;
+ mo1.__isset.opt6 = false;
+
+ mo1.write(proto.get());
+ mo4.read(proto.get());
+
+ assert(mo4.__isset.opt1 == true);
+ assert(mo4.__isset.opt2 == false);
+ assert(mo4.__isset.opt3 == true);
+ assert(mo4.__isset.def4 == true);
+ assert(mo4.__isset.opt5 == true);
+ assert(mo4.__isset.opt6 == false);
+
+ assert(mo1 == mo4);
+
+ mo1.__isset.opt1 = false;
+ mo1.__isset.opt5 = false;
+
+ mo1.write(proto.get());
+ mo5.read(proto.get());
+
+ assert(mo5.__isset.opt1 == false);
+ assert(mo5.__isset.opt2 == false);
+ assert(mo5.__isset.opt3 == true);
+ assert(mo5.__isset.def4 == true);
+ assert(mo5.__isset.opt5 == false);
+ assert(mo5.__isset.opt6 == false);
+
+ assert(mo1 == mo5);
+
+ mo1.__isset.opt3 = false;
+
+ mo1.write(proto.get());
+ mo6.read(proto.get());
+
+ assert(mo6.__isset.opt1 == false);
+ assert(mo6.__isset.opt2 == false);
+ assert(mo6.__isset.opt3 == false);
+ assert(mo6.__isset.def4 == true);
+ assert(mo6.__isset.opt5 == false);
+ assert(mo6.__isset.opt6 == false);
+
+ assert(mo1 == mo6);
+
+
+ // Test fingerprint checking stuff.
+
+ {
+ // Default and required have the same fingerprint.
+ Tricky1 t1;
+ Tricky3 t3;
+ assert(string(Tricky1::ascii_fingerprint) == Tricky3::ascii_fingerprint);
+ proto->setTypeSpec(Tricky1::local_reflection);
+ t1.im_default = 227;
+ t1.write(proto.get());
+ proto->setTypeSpec(Tricky3::local_reflection);
+ t3.read(proto.get());
+ assert(t3.im_required == 227);
+ }
+
+ {
+ // Optional changes things.
+ Tricky1 t1;
+ Tricky2 t2;
+ assert(string(Tricky1::ascii_fingerprint) != Tricky2::ascii_fingerprint);
+ proto->setTypeSpec(Tricky1::local_reflection);
+ t1.im_default = 227;
+ t1.write(proto.get());
+ try {
+ proto->setTypeSpec(Tricky2::local_reflection);
+ t2.read(proto.get());
+ assert(false);
+ } catch (TProtocolException& ex) {
+ buffer->resetBuffer();
+ }
+ }
+
+ {
+ // Holy cow. We can use the Tricky1 typespec with the Tricky2 structure.
+ Tricky1 t1;
+ Tricky2 t2;
+ proto->setTypeSpec(Tricky1::local_reflection);
+ t1.im_default = 227;
+ t1.write(proto.get());
+ t2.read(proto.get());
+ assert(t2.__isset.im_optional == true);
+ assert(t2.im_optional == 227);
+ }
+
+ {
+ // And totally off the wall.
+ Tricky1 t1;
+ OneOfEach ooe2;
+ assert(string(Tricky1::ascii_fingerprint) != OneOfEach::ascii_fingerprint);
+ proto->setTypeSpec(Tricky1::local_reflection);
+ t1.im_default = 227;
+ t1.write(proto.get());
+ try {
+ proto->setTypeSpec(OneOfEach::local_reflection);
+ ooe2.read(proto.get());
+ assert(false);
+ } catch (TProtocolException& ex) {
+ buffer->resetBuffer();
+ }
+ }
+
+ // Okay, this is really off the wall.
+ // Just don't crash.
+ cout << "Starting fuzz test. This takes a while. (20 dots.)" << endl;
+ std::srand(12345);
+ for (int i = 0; i < 2000; i++) {
+ if (i % 100 == 0) {
+ cout << ".";
+ cout.flush();
+ }
+ buffer->resetBuffer();
+ // Make sure the fingerprint prefix is right.
+ buffer->write(Nesting::binary_fingerprint, 4);
+ for (int j = 0; j < 1024*1024; j++) {
+ uint8_t r = std::rand();
+ buffer->write(&r, 1);
+ }
+ Nesting n;
+ proto->setTypeSpec(OneOfEach::local_reflection);
+ try {
+ n.read(proto.get());
+ } catch (TProtocolException& ex) {
+ } catch (TTransportException& ex) {
+ }
+ }
+ cout << endl;
return 0;
}
diff --git a/test/OptionalRequiredTest.thrift b/test/OptionalRequiredTest.thrift
index b2652dd..431a0b0 100644
--- a/test/OptionalRequiredTest.thrift
+++ b/test/OptionalRequiredTest.thrift
@@ -40,3 +40,12 @@
5: required Simple req_simp;
6: optional Simple opt_simp;
}
+
+struct ManyOpt {
+ 1: optional i32 opt1;
+ 2: optional i32 opt2;
+ 3: optional i32 opt3;
+ 4: i32 def4;
+ 5: optional i32 opt5;
+ 6: optional i32 opt6;
+}