Thrift: Clean up and test TDenseProtocol

Summary:
- TDenseProtocol now includes a part of the struct fingerprint in
  the serialized message, to protect from unserialzing trash.
- A lot of cleanups and commenting for TDenseProtocol.
- A lot of test cases for same.

Reviewed By: mcslee

Test Plan: test/DenseProtoTest.cpp

Revert Plan: ok


git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@665257 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/cpp/src/generate/t_cpp_generator.cc b/compiler/cpp/src/generate/t_cpp_generator.cc
index 4a6befd..6de7452 100644
--- a/compiler/cpp/src/generate/t_cpp_generator.cc
+++ b/compiler/cpp/src/generate/t_cpp_generator.cc
@@ -361,6 +361,7 @@
   generate_struct_fingerprint(f_types_impl_, tstruct, true);
   generate_local_reflection(f_types_, tstruct, false);
   generate_local_reflection(f_types_impl_, tstruct, true);
+  generate_local_reflection_pointer(f_types_impl_, tstruct);
   generate_struct_reader(f_types_impl_, tstruct);
   generate_struct_writer(f_types_impl_, tstruct);
 }
@@ -576,7 +577,7 @@
       indent() << stat << "char* " << nspace
         << "ascii_fingerprint" << comment << "= \"" <<
         tstruct->get_ascii_fingerprint() << "\";" << endl <<
-      indent() << stat << "char " << nspace <<
+      indent() << stat << "uint8_t " << nspace <<
         "binary_fingerprint[" << t_type::fingerprint_len << "]" << comment << "= {";
     char* comma = "";
     for (int i = 0; i < t_type::fingerprint_len; i++) {
@@ -681,7 +682,7 @@
 
   if (ttype->is_struct()) {
     out << "," << endl <<
-      indent() << ((t_struct*)ttype)->get_members().size() << "," << endl <<
+      indent() << type_name(ttype) << "::binary_fingerprint," << endl <<
       indent() << local_reflection_name("metas", ttype) << "," << endl <<
       indent() << local_reflection_name("specs", ttype);
   } else if (ttype->is_list()) {
@@ -701,16 +702,22 @@
   out << ");" << endl << endl;
 
   indent_down();
+}
 
-  // If this is a struct and we are in the implementaion file,
-  // also set the class's static pointer to its reflection.
-  if (ttype->is_struct() && is_definition) {
-    indent(out) <<
-      "facebook::thrift::reflection::local::TypeSpec* " <<
-        ttype->get_name() << "::local_reflection = " << endl <<
-      indent() << "  &" << local_reflection_name("typespec", ttype) << ";" <<
-      endl << endl;
+/**
+ * Writes the structure's static pointer to its local reflection typespec
+ * into the implementation file.
+ */
+void t_cpp_generator::generate_local_reflection_pointer(std::ofstream& out,
+                                                        t_type* ttype) {
+  if (!gen_dense_) {
+    return;
   }
+  indent(out) <<
+    "facebook::thrift::reflection::local::TypeSpec* " <<
+      ttype->get_name() << "::local_reflection = " << endl <<
+    indent() << "  &" << local_reflection_name("typespec", ttype) << ";" <<
+    endl << endl;
 }
 
 /**
diff --git a/compiler/cpp/src/generate/t_cpp_generator.h b/compiler/cpp/src/generate/t_cpp_generator.h
index 7054990..008140a 100644
--- a/compiler/cpp/src/generate/t_cpp_generator.h
+++ b/compiler/cpp/src/generate/t_cpp_generator.h
@@ -149,8 +149,9 @@
   std::string type_to_enum(t_type* ttype);
   std::string local_reflection_name(const char*, t_type* ttype);
 
-  // This handles checking gen_dense_ and checking for duplicates.
+  // These handles checking gen_dense_ and checking for duplicates.
   void generate_local_reflection(std::ofstream& out, t_type* ttype, bool is_definition);
+  void generate_local_reflection_pointer(std::ofstream& out, t_type* ttype);
 
   bool is_complex_type(t_type* ttype) {
     ttype = get_true_type(ttype);
diff --git a/lib/cpp/src/TReflectionLocal.h b/lib/cpp/src/TReflectionLocal.h
index df6e706..3fe7c63 100644
--- a/lib/cpp/src/TReflectionLocal.h
+++ b/lib/cpp/src/TReflectionLocal.h
@@ -8,18 +8,25 @@
 #define _THRIFT_TREFLECTIONLOCAL_H_ 1
 
 #include <stdint.h>
+#include <cstring>
 #include <protocol/TProtocol.h>
 
+/**
+ * Local Reflection is a blanket term referring to the the structure
+ * and generation of this particular representation of Thrift types.
+ * (It is called local because it cannot be serialized by Thrift).
+ *
+ * @author David Reiss <dreiss@facebook.com>
+ */
+
 namespace facebook { namespace thrift { namespace reflection { namespace local {
 
 using facebook::thrift::protocol::TType;
 
-/**
- * A local reflection is a representation of a Thrift structure.
- * (It is called local because it cannot be serialized by Thrift).
- *
- * @author David Reiss <dreiss@facebook.com>
- */
+// We include this many bytes of the structure's fingerprint when serializing
+// a top-level structure.  Long enough to make collisions unlikely, short
+// enough to not significantly affect the amount of memory used.
+const int FP_PREFIX_LEN = 4;
 
 struct FieldMeta {
   int16_t tag;
@@ -27,16 +34,15 @@
 };
 
 struct TypeSpec {
+  TType ttype;
+  uint8_t    fp_prefix[FP_PREFIX_LEN];
+
   // Use an anonymous union here so we can fit two TypeSpecs in one cache line.
   union {
     struct {
       // Use parallel arrays here for denser packing (of the arrays).
       FieldMeta* metas;
       TypeSpec** specs;
-      // n_fields is only used for debugging, but it should only add
-      // a minimimal amount to the effective size of this structure
-      // because of alignment restrictions.
-      int        n_fields;
     } tstruct;
     struct {
       TypeSpec *subtype1;
@@ -44,21 +50,21 @@
     } tcontainer;
   };
 
-  // Put this at the end so the 32-bit enum can be crammed up next to the
-  // 32-bit int (n_fields).
-  TType ttype;
-
-
   // Static initialization of unions isn't really possible,
   // so take the plunge and use constructors.
   // Hopefully they'll be evaluated at compile time.
 
-  TypeSpec(TType ttype) : ttype(ttype) {}
+  TypeSpec(TType ttype) : ttype(ttype) {
+    std::memset(fp_prefix, 0, FP_PREFIX_LEN);
+  }
 
-  TypeSpec(TType ttype, int n_fields, FieldMeta* metas, TypeSpec** specs) :
+  TypeSpec(TType ttype,
+           uint8_t* fingerprint,
+           FieldMeta* metas,
+           TypeSpec** specs) :
     ttype(ttype)
   {
-    tstruct.n_fields = n_fields;
+    std::memcpy(fp_prefix, fingerprint, FP_PREFIX_LEN);
     tstruct.metas = metas;
     tstruct.specs = specs;
   }
@@ -66,6 +72,7 @@
   TypeSpec(TType ttype, TypeSpec* subtype1, TypeSpec* subtype2) :
     ttype(ttype)
   {
+    std::memset(fp_prefix, 0, FP_PREFIX_LEN);
     tcontainer.subtype1 = subtype1;
     tcontainer.subtype2 = subtype2;
   }
diff --git a/lib/cpp/src/protocol/TDenseProtocol.cpp b/lib/cpp/src/protocol/TDenseProtocol.cpp
index 99117d6..e79d4f1 100644
--- a/lib/cpp/src/protocol/TDenseProtocol.cpp
+++ b/lib/cpp/src/protocol/TDenseProtocol.cpp
@@ -4,20 +4,95 @@
 // See accompanying file LICENSE or visit the Thrift site at:
 // http://developers.facebook.com/thrift/
 
+/*
+
+IMPLEMENTATION DETAILS
+
+TDenseProtocol was designed to have a smaller serialized form than
+TBinaryProtocol.  This is accomplished using two techniques.  The first is
+variable-length integer encoding.  We use the same technique that the Standard
+MIDI File format uses for "variable-length quantities"
+(http://en.wikipedia.org/wiki/Variable-length_quantity).
+All integers (including i16, but not byte) are first cast to uint64_t,
+then written out as variable-length quantities.  This has the unfortunate side
+effect that all negative numbers require 10 bytes, but negative numbers tend
+to be far less common than positive ones.
+
+The second technique eliminating the field ids used by TBinaryProtocol.  This
+decision required support from the Thrift compiler and also sacrifices some of
+the backward and forward compatibility of TBinaryProtocol.
+
+We considered implementing this technique by generating separate readers and
+writers for the dense protocol (this is how Pillar, Thrift's predecessor,
+worked), but this idea had a few problems:
+- Our abstractions go out the window.
+- We would have to maintain a second code generator.
+- Preserving compatibility with old versions of the structures would be a
+  nightmare.
+
+Therefore, we chose an alternate implementation that stored the description of
+the data neither in the data itself (like TBinaryProtocol) nor in the
+serialization code (like Pillar), but instead in a separate data structure,
+called a TypeSpec.  TypeSpecs are generated by the Thrift compiler
+(specifically in the t_cpp_generator), and their structure should be
+documented there (TODO(dreiss): s/should be/is/).
+
+We maintain a stack of TypeSpecs within the protocol so it knows where the
+generated code is in the reading/writing process.  For example, if we are
+writing an i32 contained in a struct bar, contained in a struct foo, then the
+stack would look like: TOP , i32 , struct bar , struct foo , BOTTOM.
+The following invariant: whenever we are about to read/write an object
+(structBegin, containerBegin, or a scalar), the TypeSpec on the top of the
+stack must match the type being read/written.  The main reasons that this
+invariant must be maintained is that if we ever start reading a structure, we
+must have its exact TypeSpec in order to pass the right tags to the
+deserializer.
+
+We use the following strategies for maintaining this invariant:
+
+- For structures, we have a separate stack of indexes, one for each structure
+  on the TypeSpec stack.  These are indexes into the list of fields in the
+  structure's TypeSpec.  When we {read,write}FieldBegin, we push on the
+  TypeSpec for the field.
+- When we begin writing a list or set, we push on the TypeSpec for the
+  element type.
+- For maps, we have a separate stack of booleans, one for each map on the
+  TypeSpec stack.  The boolean is true if we are writing the key for that
+  map, and false if we are writing the value.  Maps are the trickiest case
+  because the generated code does not call any protocol method between
+  the key and the value.  As a result, we potentially have to switch
+  between map key state and map value state after reading/writing any object.
+- This job is handled by the stateTransition method.  It is called after
+  reading/writing every object.  It pops the current TypeSpec off the stack,
+  then optionally pushes a new one on, depending on what the next TypeSpec is.
+  If it is a struct, the job is left to the next writeFieldBegin.  If it is a
+  set or list, the just-popped typespec is pushed back on.  If it is a map,
+  the top of the key/value stack is toggled, and the appropriate TypeSpec
+  is pushed.
+
+Optional fields are a little tricky also.  We write a zero byte if they are
+absent and prefix them with an 0x01 byte if they are present
+*/
+
 #define __STDC_LIMIT_MACROS
 #include <stdint.h>
 #include "TDenseProtocol.h"
 #include "TReflectionLocal.h"
 
-// XXX for debugging (duh)
+// Leaving this on for now.  Disabling it will turn off asserts, which should
+// give a performance boost.  When we have *really* thorough test cases,
+// we should drop this.
 #define DEBUG_TDENSEPROTOCOL
 
-// XXX for development.
-#define TDENSE_PROTOCOL_MEASURE_VLI
-
-// The XXX above does not apply to this.
+// NOTE: Assertions should *only* be used to detect bugs in code,
+//       either in TDenseProtocol itself, or in code using it.
+//       (For example, using the wrong TypeSpec.)
+//       Invalid data should NEVER cause an assertion failure,
+//       no matter how grossly corrupted, nor how ingeniously crafted.
 #ifdef DEBUG_TDENSEPROTOCOL
 #undef NDEBUG
+#else
+#define NDEBUG
 #endif
 #include <cassert>
 
@@ -31,6 +106,9 @@
 
 namespace facebook { namespace thrift { namespace protocol {
 
+const int TDenseProtocol::FP_PREFIX_LEN =
+  facebook::thrift::reflection::local::FP_PREFIX_LEN;
+
 // Top TypeSpec.  TypeSpec of the structure being encoded.
 #define TTS  (ts_stack_.back())  // type = TypeSpec*
 // InDeX.  Index into TTS of the current/next field to encode.
@@ -44,15 +122,25 @@
 #define ST2 (TTS->tcontainer.subtype2)
 
 
+/**
+ * Checks that @c ttype is indeed the ttype that we should be writing,
+ * according to our typespec.  Aborts if the test fails and debugging in on.
+ */
 inline void TDenseProtocol::checkTType(const TType ttype) {
   assert(!ts_stack_.empty());
   assert(TTS->ttype == ttype);
 }
 
+/**
+ * Makes sure that the TypeSpec stack is correct for the next object.
+ * See top-of-file comments.
+ */
 inline void TDenseProtocol::stateTransition() {
   TypeSpec* old_tts = ts_stack_.back();
   ts_stack_.pop_back();
 
+  // If this is the end of the top-level write, we should have just popped
+  // the TypeSpec passed to the constructor.
   if (ts_stack_.empty()) {
     assert(old_tts = type_spec_);
     return;
@@ -83,6 +171,84 @@
   }
 }
 
+
+/*
+ * Variable-length quantity functions.
+ */
+
+inline uint32_t TDenseProtocol::vlqRead(uint64_t& vlq) {
+  uint32_t used = 0;
+  uint64_t val = 0;
+  uint8_t buf[10];  // 64 bits / (7 bits/byte) = 10 bytes.
+  bool borrowed = trans_->borrow(buf, sizeof(buf));
+
+  // Fast path.  TODO(dreiss): Make it faster.
+  if (borrowed) {
+    while (true) {
+      uint8_t byte = buf[used];
+      used++;
+      val = (val << 7) | (byte & 0x7f);
+      if (!(byte & 0x80)) {
+        vlq = val;
+        trans_->consume(used);
+        return used;
+      }
+      // Have to check for invalid data so we don't crash.
+      if (UNLIKELY(used == sizeof(buf))) {
+        resetState();
+        throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes.");
+      }
+    }
+  }
+
+  // Slow path.
+  else {
+    while (true) {
+      uint8_t byte;
+      used += trans_->readAll(&byte, 1);
+      val = (val << 7) | (byte & 0x7f);
+      if (!(byte & 0x80)) {
+        vlq = val;
+        return used;
+      }
+      // Might as well check for invalid data on the slow path too.
+      if (UNLIKELY(used >= sizeof(buf))) {
+        resetState();
+        throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes.");
+      }
+    }
+  }
+}
+
+inline uint32_t TDenseProtocol::vlqWrite(uint64_t vlq) {
+  uint8_t buf[10];  // 64 bits / (7 bits/byte) = 10 bytes.
+  int32_t pos = sizeof(buf) - 1;
+
+  // Write the thing from back to front.
+  buf[pos] = vlq & 0x7f;
+  vlq >>= 7;
+  pos--;
+
+  while (vlq > 0) {
+    assert(pos >= 0);
+    buf[pos] = (vlq | 0x80);
+    vlq >>= 7;
+    pos--;
+  }
+
+  // Back up one step before writing.
+  pos++;
+
+  trans_->write(buf+pos, sizeof(buf) - pos);
+  return sizeof(buf) - pos;
+}
+
+
+
+/*
+ * Writing functions.
+ */
+
 uint32_t TDenseProtocol::writeMessageBegin(const std::string& name,
                                            const TMessageType messageType,
                                            const int32_t seqid) {
@@ -100,16 +266,27 @@
   return 0;
 }
 
-// Also implements readStructBegin.
 uint32_t TDenseProtocol::writeStructBegin(const string& name) {
+  uint32_t xfer = 0;
+
+  // The TypeSpec stack should be empty if this is the top-level read/write.
+  // If it is, we push the TypeSpec passed to the constructor.
   if (ts_stack_.empty()) {
+    assert(standalone_);
+
     if (type_spec_ == NULL) {
+      resetState();
       throw TApplicationException("TDenseProtocol: No type specified.");
     } else {
+      assert(type_spec_->ttype == T_STRUCT);
       ts_stack_.push_back(type_spec_);
+      // Write out a prefix of the structure fingerprint.
+      trans_->write(type_spec_->fp_prefix, FP_PREFIX_LEN);
+      xfer += FP_PREFIX_LEN;
     }
   }
 
+  // We need a new field index for this structure.
   idx_stack_.push_back(0);
   return 0;
 }
@@ -125,11 +302,14 @@
                                          const int16_t fieldId) {
   uint32_t xfer = 0;
 
+  // Skip over optional fields.
   while (FMT.tag != fieldId) {
     // TODO(dreiss): Old meta here.
     assert(FTS->ttype != T_STOP);
     assert(FMT.is_optional);
+    // Write a zero byte so the reader can skip it.
     xfer += subWriteBool(false);
+    // And advance to the next field.
     IDX++;
   }
 
@@ -141,20 +321,24 @@
     xfer += 1;
   }
 
-  // OMG I'm so gross.  XXX
-  if (FTS->ttype != T_STOP) {
+  // writeFieldStop shares all lot of logic up to this point.
+  // Instead of replicating it all, we just call this method from that one
+  // and use a gross special case here.
+  if (UNLIKELY(FTS->ttype != T_STOP)) {
+    // For normal fields, push the TypeSpec that we're about to use.
     ts_stack_.push_back(FTS);
   }
   return xfer;
 }
 
 uint32_t TDenseProtocol::writeFieldEnd() {
+  // Just move on to the next field.
   IDX++;
   return 0;
 }
 
 uint32_t TDenseProtocol::writeFieldStop() {
-  return writeFieldBegin("", T_STOP, 0);
+  return TDenseProtocol::writeFieldBegin("", T_STOP, 0);
 }
 
 uint32_t TDenseProtocol::writeMapBegin(const TType keyType,
@@ -172,6 +356,8 @@
 }
 
 uint32_t TDenseProtocol::writeMapEnd() {
+  // Pop off the value type, as well as our entry in the map key/value stack.
+  // stateTransition takes care of popping off our TypeSpec.
   ts_stack_.pop_back();
   mkv_stack_.pop_back();
   stateTransition();
@@ -188,6 +374,7 @@
 }
 
 uint32_t TDenseProtocol::writeListEnd() {
+  // Pop off the element type.  stateTransition takes care of popping off ours.
   ts_stack_.pop_back();
   stateTransition();
   return 0;
@@ -203,6 +390,7 @@
 }
 
 uint32_t TDenseProtocol::writeSetEnd() {
+  // Pop off the element type.  stateTransition takes care of popping off ours.
   ts_stack_.pop_back();
   stateTransition();
   return 0;
@@ -223,46 +411,19 @@
 uint32_t TDenseProtocol::writeI16(const int16_t i16) {
   checkTType(T_I16);
   stateTransition();
-
-  uint32_t rv = vliWrite(i16);
-#ifdef TDENSE_PROTOCOL_MEASURE_VLI
-  vli_save_16 += 2 - rv;
-  if (i16 < 0) {
-    negs++;
-  }
-#endif
-
-  return rv;
+  return vlqWrite(i16);
 }
 
 uint32_t TDenseProtocol::writeI32(const int32_t i32) {
   checkTType(T_I32);
   stateTransition();
-
-  uint32_t rv = vliWrite(i32);
-#ifdef TDENSE_PROTOCOL_MEASURE_VLI
-  vli_save_32 += 4 - rv;
-  if (i32 < 0) {
-    negs++;
-  }
-#endif
-
-  return rv;
+  return vlqWrite(i32);
 }
 
 uint32_t TDenseProtocol::writeI64(const int64_t i64) {
   checkTType(T_I64);
   stateTransition();
-
-  uint32_t rv = vliWrite(i64);
-#ifdef TDENSE_PROTOCOL_MEASURE_VLI
-  vli_save_64 += 8 - rv;
-  if (i64 < 0) {
-    negs++;
-  }
-#endif
-
-  return rv;
+  return vlqWrite(i64);
 }
 
 uint32_t TDenseProtocol::writeDouble(const double dub) {
@@ -278,14 +439,7 @@
 }
 
 inline uint32_t TDenseProtocol::subWriteI32(const int32_t i32) {
-  uint32_t rv = vliWrite(i32);
-#ifdef TDENSE_PROTOCOL_MEASURE_VLI
-  vli_save_sub += 4 - rv;
-  if (i32 < 0) {
-    negs++;
-  }
-#endif
-  return rv;
+  return vlqWrite(i32);
 }
 
 uint32_t TDenseProtocol::subWriteString(const std::string& str) {
@@ -297,73 +451,13 @@
   return xfer + size;
 }
 
-inline uint32_t TDenseProtocol::vliRead(uint64_t& vli) {
-  uint32_t used = 0;
-  uint64_t val = 0;
-  uint8_t buf[10];  // 64 bits / (7 bits/byte) = 10 bytes.
-  bool borrowed = trans_->borrow(buf, sizeof(buf));
 
-  // Fast path.  TODO(dreiss): Make it faster.
-  if (borrowed) {
-    while (true) {
-      uint8_t byte = buf[used];
-      used++;
-      val = (val << 7) | (byte & 0x7f);
-      if (!(byte & 0x80)) {
-        vli = val;
-        trans_->consume(used);
-        return used;
-      }
-      // Have to check for invalid data so we don't crash.
-      if (UNLIKELY(used == sizeof(buf))) {
-        throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes.");
-      }
-    }
-  }
 
-  // Slow path.
-  else {
-    while (true) {
-      uint8_t byte;
-      used += trans_->readAll(&byte, 1);
-      val = (val << 7) | (byte & 0x7f);
-      if (!(byte & 0x80)) {
-        vli = val;
-        return used;
-      }
-      // Might as well check for invalid data on the slow path too.
-      if (UNLIKELY(used >= sizeof(buf))) {
-        throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes.");
-      }
-    }
-  }
-}
-
-inline uint32_t TDenseProtocol::vliWrite(uint64_t vli) {
-  uint8_t buf[10];  // 64 bits / (7 bits/byte) = 10 bytes.
-  int32_t pos = sizeof(buf) - 1;
-
-  // Write the thing from back to front.
-  buf[pos] = vli & 0x7f;
-  vli >>= 7;
-  pos--;
-
-  while (vli > 0) {
-    assert(pos >= 0);
-    buf[pos] = (vli | 0x80);
-    vli >>= 7;
-    pos--;
-  }
-
-  // Back up one step before writing.
-  pos++;
-
-  trans_->write(buf+pos, sizeof(buf) - pos);
-  return sizeof(buf) - pos;
-}
-
-/**
+/*
  * Reading functions
+ *
+ * These have a lot of the same logic as the writing functions, so if
+ * something is confusing, look for comments in the corresponding writer.
  */
 
 uint32_t TDenseProtocol::readMessageBegin(std::string& name,
@@ -395,8 +489,32 @@
 }
 
 uint32_t TDenseProtocol::readStructBegin(string& name) {
-  // TODO(dreiss): Any chance this gets inlined?
-  return TDenseProtocol::writeStructBegin(name);
+  uint32_t xfer = 0;
+
+  if (ts_stack_.empty()) {
+    assert(standalone_);
+
+    if (type_spec_ == NULL) {
+      resetState();
+      throw TApplicationException("TDenseProtocol: No type specified.");
+    } else {
+      assert(type_spec_->ttype == T_STRUCT);
+      ts_stack_.push_back(type_spec_);
+
+      // Check the fingerprint prefix.
+      uint8_t buf[FP_PREFIX_LEN];
+      xfer += trans_->read(buf, FP_PREFIX_LEN);
+      if (std::memcmp(buf, type_spec_->fp_prefix, FP_PREFIX_LEN) != 0) {
+        resetState();
+        throw TProtocolException(TProtocolException::INVALID_DATA,
+            "Fingerprint in data does not match type_spec.");
+      }
+    }
+  }
+
+  // We need a new field index for this structure.
+  idx_stack_.push_back(0);
+  return 0;
 }
 
 uint32_t TDenseProtocol::readStructEnd() {
@@ -410,6 +528,7 @@
                                         int16_t& fieldId) {
   uint32_t xfer = 0;
 
+  // For optional fields, check to see if they are there.
   while (FMT.is_optional) {
     bool is_present;
     xfer += subReadBool(is_present);
@@ -419,10 +538,14 @@
     IDX++;
   }
 
+  // Once we hit a mandatory field, or an optional field that is present,
+  // we know that FMT and FTS point to the appropriate field.
+
   fieldId   = FMT.tag;
   fieldType = FTS->ttype;
 
-  // OMG I'm so gross.  XXX
+  // Normally, we push the TypeSpec that we are about to read,
+  // but no reading is done for T_STOP.
   if (FTS->ttype != T_STOP) {
     ts_stack_.push_back(FTS);
   }
@@ -443,8 +566,10 @@
   int32_t sizei;
   xfer += subReadI32(sizei);
   if (sizei < 0) {
+    resetState();
     throw TProtocolException(TProtocolException::NEGATIVE_SIZE);
   } else if (container_limit_ && sizei > container_limit_) {
+    resetState();
     throw TProtocolException(TProtocolException::SIZE_LIMIT);
   }
   size = (uint32_t)sizei;
@@ -473,8 +598,10 @@
   int32_t sizei;
   xfer += subReadI32(sizei);
   if (sizei < 0) {
+    resetState();
     throw TProtocolException(TProtocolException::NEGATIVE_SIZE);
   } else if (container_limit_ && sizei > container_limit_) {
+    resetState();
     throw TProtocolException(TProtocolException::SIZE_LIMIT);
   }
   size = (uint32_t)sizei;
@@ -500,8 +627,10 @@
   int32_t sizei;
   xfer += subReadI32(sizei);
   if (sizei < 0) {
+    resetState();
     throw TProtocolException(TProtocolException::NEGATIVE_SIZE);
   } else if (container_limit_ && sizei > container_limit_) {
+    resetState();
     throw TProtocolException(TProtocolException::SIZE_LIMIT);
   }
   size = (uint32_t)sizei;
@@ -535,9 +664,10 @@
   checkTType(T_I16);
   stateTransition();
   uint64_t u64;
-  uint32_t rv = vliRead(u64);
+  uint32_t rv = vlqRead(u64);
   int64_t val = (int64_t)u64;
   if (UNLIKELY(val > INT16_MAX || val < INT16_MIN)) {
+    resetState();
     throw TProtocolException(TProtocolException::INVALID_DATA,
                              "i16 out of range.");
   }
@@ -549,9 +679,10 @@
   checkTType(T_I32);
   stateTransition();
   uint64_t u64;
-  uint32_t rv = vliRead(u64);
+  uint32_t rv = vlqRead(u64);
   int64_t val = (int64_t)u64;
   if (UNLIKELY(val > INT32_MAX || val < INT32_MIN)) {
+    resetState();
     throw TProtocolException(TProtocolException::INVALID_DATA,
                              "i32 out of range.");
   }
@@ -563,9 +694,10 @@
   checkTType(T_I64);
   stateTransition();
   uint64_t u64;
-  uint32_t rv = vliRead(u64);
+  uint32_t rv = vlqRead(u64);
   int64_t val = (int64_t)u64;
   if (UNLIKELY(val > INT64_MAX || val < INT64_MIN)) {
+    resetState();
     throw TProtocolException(TProtocolException::INVALID_DATA,
                              "i64 out of range.");
   }
@@ -587,9 +719,10 @@
 
 uint32_t TDenseProtocol::subReadI32(int32_t& i32) {
   uint64_t u64;
-  uint32_t rv = vliRead(u64);
+  uint32_t rv = vlqRead(u64);
   int64_t val = (int64_t)u64;
   if (UNLIKELY(val > INT32_MAX || val < INT32_MIN)) {
+    resetState();
     throw TProtocolException(TProtocolException::INVALID_DATA,
                              "i32 out of range.");
   }
diff --git a/lib/cpp/src/protocol/TDenseProtocol.h b/lib/cpp/src/protocol/TDenseProtocol.h
index edd5c88..0902e15 100644
--- a/lib/cpp/src/protocol/TDenseProtocol.h
+++ b/lib/cpp/src/protocol/TDenseProtocol.h
@@ -12,13 +12,30 @@
 namespace facebook { namespace thrift { namespace protocol {
 
 /**
- * The dense protocol is designed to use as little space as possible.
- *
  * !!!WARNING!!!
  * This class is still highly experimental.  Incompatible changes
  * WILL be made to it without notice.  DO NOT USE IT YET unless
  * you are coordinating your testing with the author.
  *
+ * The dense protocol is designed to use as little space as possible.
+ *
+ * There are two types of dense protocol instances.  Standalone instances
+ * are not used for RPC and just encoded and decode structures of 
+ * a predetermined type.  Non-standalone instances are used for RPC.
+ * Currently, only standalone instances exist.
+ *
+ * To use a standalone dense protocol object, you must set the type_spec
+ * property (either in the constructor, or with setTypeSpec) to the local
+ * reflection TypeSpec of the structures you will write to (or read from) the
+ * protocol instance.
+ *
+ * BEST PRACTICES:
+ * - Never use optional for primitives or containers.
+ * - Only use optional for structures if they are very big and very rarely set.
+ * - All integers are variable-length, so you can use i64 without bloating.
+ * - NEVER EVER change the struct definitions IN ANY WAY without either
+ *   changing your cache keys or talking to dreiss.
+ *
  * TODO(dreiss): New class write with old meta.
  *
  * We override all of TBinaryProtocol's methods.
@@ -35,34 +52,18 @@
 
  public:
   typedef facebook::thrift::reflection::local::TypeSpec TypeSpec;
+  static const int FP_PREFIX_LEN;
 
+  /**
+   * @param tran       The transport to use.
+   * @param type_spec  The TypeSpec of the structures using this protocol.
+   */
   TDenseProtocol(boost::shared_ptr<TTransport> trans,
                  TypeSpec* type_spec = NULL) :
     TBinaryProtocol(trans),
-    type_spec_(type_spec) {
-      vli_save_16 = 0;
-      vli_save_32 = 0;
-      vli_save_64 = 0;
-      vli_save_sub = 0;
-      negs = 0;
-    }
-
-  TDenseProtocol(boost::shared_ptr<TTransport> trans,
-                  TypeSpec* type_spec,
-                  int32_t string_limit,
-                  int32_t container_limit) :
-    TBinaryProtocol(trans,
-                    string_limit,
-                    container_limit,
-                    true,
-                    true),
-    type_spec_(type_spec) {
-      vli_save_16 = 0;
-      vli_save_32 = 0;
-      vli_save_64 = 0;
-      vli_save_sub = 0;
-      negs = 0;
-    }
+    type_spec_(type_spec),
+    standalone_(true)
+  {}
 
   void setTypeSpec(TypeSpec* type_spec) {
     type_spec_ = type_spec;
@@ -203,14 +204,24 @@
 
  private:
 
+  // Implementation functions, documented in the .cpp.
   inline void checkTType(const TType ttype);
   inline void stateTransition();
 
   // Read and write variable-length integers.
   // Uses the same technique as the MIDI file format.
-  inline uint32_t vliRead(uint64_t& vli);
-  inline uint32_t vliWrite(uint64_t vli);
+  inline uint32_t vlqRead(uint64_t& vlq);
+  inline uint32_t vlqWrite(uint64_t vlq);
 
+  // Called before throwing an exception to make the object reusable.
+  void resetState() {
+    ts_stack_.clear();
+    idx_stack_.clear();
+    mkv_stack_.clear();
+  }
+
+  // TypeSpec of the top-level structure to write,
+  // for standalone protocol objects.
   TypeSpec* type_spec_;
 
   std::vector<TypeSpec*> ts_stack_;   // TypeSpec stack.
@@ -218,13 +229,8 @@
   std::vector<bool>      mkv_stack_;  // Map Key/Vlue stack.
                                       // True = key, False = value.
 
-  // XXX Remove these when wire format is finalized.
- public:
-  int vli_save_16;
-  int vli_save_32;
-  int vli_save_64;
-  int vli_save_sub;
-  int negs;
+  // True iff this is a standalone instance (no RPC).
+  bool standalone_;
 };
 
 }}} // facebook::thrift::protocol
diff --git a/test/DenseProtoTest.cpp b/test/DenseProtoTest.cpp
index 0953c19..f50faca 100644
--- a/test/DenseProtoTest.cpp
+++ b/test/DenseProtoTest.cpp
@@ -1,8 +1,10 @@
 /*
 ../compiler/cpp/thrift -cpp -dense DebugProtoTest.thrift
+../compiler/cpp/thrift -cpp -dense OptionalRequiredTest.thrift
 g++ -Wall -g -I../lib/cpp/src -I/usr/local/include/boost-1_33_1 \
-  DenseProtoTest.cpp gen-cpp/DebugProtoTest_types.cpp \
-  ../lib/cpp/.libs/libthrift.a -o DenseProtoTest
+  gen-cpp/OptionalRequiredTest_types.cpp \
+  gen-cpp/DebugProtoTest_types.cpp \
+  DenseProtoTest.cpp ../lib/cpp/.libs/libthrift.a -o DenseProtoTest
 ./DenseProtoTest
 */
 
@@ -11,10 +13,13 @@
 #define inline
 
 #undef NDEBUG
+#include <cstdlib>
 #include <cassert>
 #include <iostream>
 #include <cmath>
+#include <string>
 #include "gen-cpp/DebugProtoTest_types.h"
+#include "gen-cpp/OptionalRequiredTest_types.h"
 #include <protocol/TDenseProtocol.h>
 #include <transport/TTransportUtils.h>
 
@@ -31,6 +36,7 @@
 
 
 int main() {
+  using std::string;
   using std::cout;
   using std::endl;
   using boost::shared_ptr;
@@ -123,13 +129,14 @@
 
 
   // Let's test out the variable-length ints, shall we?
-  uint64_t vli;
+  uint64_t vlq;
   #define checkout(i, c) { \
     buffer->resetBuffer(); \
-    proto->vliWrite(i); \
+    proto->vlqWrite(i); \
+    proto->getTransport()->flush(); \
     assert(my_memeq(buffer->getBufferAsString().data(), c, sizeof(c)-1)); \
-    proto->vliRead(vli); \
-    assert(vli == i); \
+    proto->vlqRead(vlq); \
+    assert(vlq == i); \
   }
 
   checkout(0x00000000, "\x00");
@@ -160,6 +167,199 @@
   checkout(0x7FFFFFFFFFFFFFFFull, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F");
   checkout(0xFFFFFFFFFFFFFFFFull, "\x81\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F");
 
+  // Test out the slow path with a TBufferedTransport.
+  shared_ptr<TBufferedTransport> buff_trans(new TBufferedTransport(buffer, 3));
+  proto.reset(new TDenseProtocol(buff_trans));
+  checkout(0x0000000100000000ull, "\x90\x80\x80\x80\x00");
+  checkout(0x0000000200000000ull, "\xA0\x80\x80\x80\x00");
+  checkout(0x0000000300000000ull, "\xB0\x80\x80\x80\x00");
+  checkout(0x0000000700000000ull, "\xF0\x80\x80\x80\x00");
+  checkout(0x00000007F0000000ull, "\xFF\x80\x80\x80\x00");
+  checkout(0x00000007FFFFFFFFull, "\xFF\xFF\xFF\xFF\x7F");
+  checkout(0x0000000800000000ull, "\x81\x80\x80\x80\x80\x00");
+  checkout(0x1FFFFFFFFFFFFFFFull, "\x9F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F");
+  checkout(0x7FFFFFFFFFFFFFFFull, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F");
+  checkout(0xFFFFFFFFFFFFFFFFull, "\x81\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F");
+
+  // Test optional stuff.
+  proto.reset(new TDenseProtocol(buffer));
+  proto->setTypeSpec(ManyOpt::local_reflection);
+  ManyOpt mo1, mo2, mo3, mo4, mo5, mo6;
+  mo1.opt1 = 923759347;
+  mo1.opt2 = 392749274;
+  mo1.opt3 = 395739402;
+  mo1.def4 = 294730928;
+  mo1.opt5 = 394309218;
+  mo1.opt6 = 832194723;
+  mo1.__isset.opt1 = true;
+  mo1.__isset.opt2 = true;
+  mo1.__isset.opt3 = true;
+  mo1.__isset.def4 = true;
+  mo1.__isset.opt5 = true;
+  mo1.__isset.opt6 = true;
+
+  mo1.write(proto.get());
+  mo2.read(proto.get());
+
+  assert(mo2.__isset.opt1 == true);
+  assert(mo2.__isset.opt2 == true);
+  assert(mo2.__isset.opt3 == true);
+  assert(mo2.__isset.def4 == true);
+  assert(mo2.__isset.opt5 == true);
+  assert(mo2.__isset.opt6 == true);
+
+  assert(mo1 == mo2);
+
+  mo1.__isset.opt1 = false;
+  mo1.__isset.opt3 = false;
+  mo1.__isset.opt5 = false;
+
+  mo1.write(proto.get());
+  mo3.read(proto.get());
+
+  assert(mo3.__isset.opt1 == false);
+  assert(mo3.__isset.opt2 == true);
+  assert(mo3.__isset.opt3 == false);
+  assert(mo3.__isset.def4 == true);
+  assert(mo3.__isset.opt5 == false);
+  assert(mo3.__isset.opt6 == true);
+
+  assert(mo1 == mo3);
+
+  mo1.__isset.opt1 = true;
+  mo1.__isset.opt3 = true;
+  mo1.__isset.opt5 = true;
+  mo1.__isset.opt2 = false;
+  mo1.__isset.opt6 = false;
+
+  mo1.write(proto.get());
+  mo4.read(proto.get());
+
+  assert(mo4.__isset.opt1 == true);
+  assert(mo4.__isset.opt2 == false);
+  assert(mo4.__isset.opt3 == true);
+  assert(mo4.__isset.def4 == true);
+  assert(mo4.__isset.opt5 == true);
+  assert(mo4.__isset.opt6 == false);
+
+  assert(mo1 == mo4);
+
+  mo1.__isset.opt1 = false;
+  mo1.__isset.opt5 = false;
+
+  mo1.write(proto.get());
+  mo5.read(proto.get());
+
+  assert(mo5.__isset.opt1 == false);
+  assert(mo5.__isset.opt2 == false);
+  assert(mo5.__isset.opt3 == true);
+  assert(mo5.__isset.def4 == true);
+  assert(mo5.__isset.opt5 == false);
+  assert(mo5.__isset.opt6 == false);
+
+  assert(mo1 == mo5);
+
+  mo1.__isset.opt3 = false;
+
+  mo1.write(proto.get());
+  mo6.read(proto.get());
+
+  assert(mo6.__isset.opt1 == false);
+  assert(mo6.__isset.opt2 == false);
+  assert(mo6.__isset.opt3 == false);
+  assert(mo6.__isset.def4 == true);
+  assert(mo6.__isset.opt5 == false);
+  assert(mo6.__isset.opt6 == false);
+
+  assert(mo1 == mo6);
+
+
+  // Test fingerprint checking stuff.
+
+  {
+    // Default and required have the same fingerprint.
+    Tricky1 t1;
+    Tricky3 t3;
+    assert(string(Tricky1::ascii_fingerprint) == Tricky3::ascii_fingerprint);
+    proto->setTypeSpec(Tricky1::local_reflection);
+    t1.im_default = 227;
+    t1.write(proto.get());
+    proto->setTypeSpec(Tricky3::local_reflection);
+    t3.read(proto.get());
+    assert(t3.im_required == 227);
+  }
+
+  {
+    // Optional changes things.
+    Tricky1 t1;
+    Tricky2 t2;
+    assert(string(Tricky1::ascii_fingerprint) != Tricky2::ascii_fingerprint);
+    proto->setTypeSpec(Tricky1::local_reflection);
+    t1.im_default = 227;
+    t1.write(proto.get());
+    try {
+      proto->setTypeSpec(Tricky2::local_reflection);
+      t2.read(proto.get());
+      assert(false);
+    } catch (TProtocolException& ex) {
+      buffer->resetBuffer();
+    }
+  }
+
+  {
+    // Holy cow.  We can use the Tricky1 typespec with the Tricky2 structure.
+    Tricky1 t1;
+    Tricky2 t2;
+    proto->setTypeSpec(Tricky1::local_reflection);
+    t1.im_default = 227;
+    t1.write(proto.get());
+    t2.read(proto.get());
+    assert(t2.__isset.im_optional == true);
+    assert(t2.im_optional == 227);
+  }
+
+  {
+    // And totally off the wall.
+    Tricky1 t1;
+    OneOfEach ooe2;
+    assert(string(Tricky1::ascii_fingerprint) != OneOfEach::ascii_fingerprint);
+    proto->setTypeSpec(Tricky1::local_reflection);
+    t1.im_default = 227;
+    t1.write(proto.get());
+    try {
+      proto->setTypeSpec(OneOfEach::local_reflection);
+      ooe2.read(proto.get());
+      assert(false);
+    } catch (TProtocolException& ex) {
+      buffer->resetBuffer();
+    }
+  }
+
+  // Okay, this is really off the wall.
+  // Just don't crash.
+  cout << "Starting fuzz test.  This takes a while.  (20 dots.)" << endl;
+  std::srand(12345);
+  for (int i = 0; i < 2000; i++) {
+    if (i % 100 == 0) {
+      cout << ".";
+      cout.flush();
+    }
+    buffer->resetBuffer();
+    // Make sure the fingerprint prefix is right.
+    buffer->write(Nesting::binary_fingerprint, 4);
+    for (int j = 0; j < 1024*1024; j++) {
+      uint8_t r = std::rand();
+      buffer->write(&r, 1);
+    }
+    Nesting n;
+    proto->setTypeSpec(OneOfEach::local_reflection);
+    try {
+      n.read(proto.get());
+    } catch (TProtocolException& ex) {
+    } catch (TTransportException& ex) {
+    }
+  }
+  cout << endl;
 
   return 0;
 }
diff --git a/test/OptionalRequiredTest.thrift b/test/OptionalRequiredTest.thrift
index b2652dd..431a0b0 100644
--- a/test/OptionalRequiredTest.thrift
+++ b/test/OptionalRequiredTest.thrift
@@ -40,3 +40,12 @@
   5: required Simple req_simp;
   6: optional Simple opt_simp;
 }
+
+struct ManyOpt {
+  1: optional i32 opt1;
+  2: optional i32 opt2;
+  3: optional i32 opt3;
+  4:          i32 def4;
+  5: optional i32 opt5;
+  6: optional i32 opt6;
+}