THRIFT-3217 Provide a little endian variant of the binary protocol in C++
Client: C++
Patch: Ben Craig <bencraig@apache.org>

This closes #537
diff --git a/lib/cpp/src/thrift/protocol/TBinaryProtocol.h b/lib/cpp/src/thrift/protocol/TBinaryProtocol.h
index 25f0255..7291988 100644
--- a/lib/cpp/src/thrift/protocol/TBinaryProtocol.h
+++ b/lib/cpp/src/thrift/protocol/TBinaryProtocol.h
@@ -34,8 +34,8 @@
  * binary format, essentially just spitting out the raw bytes.
  *
  */
-template <class Transport_>
-class TBinaryProtocolT : public TVirtualProtocol<TBinaryProtocolT<Transport_> > {
+template <class Transport_, class ByteOrder_ = TNetworkBigEndian>
+class TBinaryProtocolT : public TVirtualProtocol<TBinaryProtocolT<Transport_, ByteOrder_> > {
 protected:
   static const int32_t VERSION_MASK = ((int32_t)0xffff0000);
   static const int32_t VERSION_1 = ((int32_t)0x80010000);
@@ -43,7 +43,7 @@
 
 public:
   TBinaryProtocolT(boost::shared_ptr<Transport_> trans)
-    : TVirtualProtocol<TBinaryProtocolT<Transport_> >(trans),
+    : TVirtualProtocol<TBinaryProtocolT<Transport_, ByteOrder_> >(trans),
       trans_(trans.get()),
       string_limit_(0),
       container_limit_(0),
@@ -55,7 +55,7 @@
                    int32_t container_limit,
                    bool strict_read,
                    bool strict_write)
-    : TVirtualProtocol<TBinaryProtocolT<Transport_> >(trans),
+    : TVirtualProtocol<TBinaryProtocolT<Transport_, ByteOrder_> >(trans),
       trans_(trans.get()),
       string_limit_(string_limit),
       container_limit_(container_limit),
@@ -150,7 +150,7 @@
 
   inline uint32_t readBool(bool& value);
   // Provide the default readBool() implementation for std::vector<bool>
-  using TVirtualProtocol<TBinaryProtocolT<Transport_> >::readBool;
+  using TVirtualProtocol<TBinaryProtocolT<Transport_, ByteOrder_> >::readBool;
 
   inline uint32_t readByte(int8_t& byte);
 
@@ -182,11 +182,12 @@
 };
 
 typedef TBinaryProtocolT<TTransport> TBinaryProtocol;
+typedef TBinaryProtocolT<TTransport, TNetworkLittleEndian> TLEBinaryProtocol;
 
 /**
  * Constructs binary protocol handlers
  */
-template <class Transport_>
+template <class Transport_, class ByteOrder_ = TNetworkBigEndian>
 class TBinaryProtocolFactoryT : public TProtocolFactory {
 public:
   TBinaryProtocolFactoryT()
@@ -216,17 +217,19 @@
     boost::shared_ptr<Transport_> specific_trans = boost::dynamic_pointer_cast<Transport_>(trans);
     TProtocol* prot;
     if (specific_trans) {
-      prot = new TBinaryProtocolT<Transport_>(specific_trans,
-                                              string_limit_,
-                                              container_limit_,
-                                              strict_read_,
-                                              strict_write_);
+      prot = new TBinaryProtocolT<Transport_, ByteOrder_>(
+        specific_trans,
+        string_limit_,
+        container_limit_,
+        strict_read_,
+        strict_write_);
     } else {
-      prot = new TBinaryProtocol(trans,
-                                 string_limit_,
-                                 container_limit_,
-                                 strict_read_,
-                                 strict_write_);
+      prot = new TBinaryProtocolT<TTransport, ByteOrder_>(
+        trans,
+        string_limit_,
+        container_limit_,
+        strict_read_,
+        strict_write_);
     }
 
     return boost::shared_ptr<TProtocol>(prot);
@@ -240,6 +243,7 @@
 };
 
 typedef TBinaryProtocolFactoryT<TTransport> TBinaryProtocolFactory;
+typedef TBinaryProtocolFactoryT<TTransport, TNetworkLittleEndian> TLEBinaryProtocolFactory;
 }
 }
 } // apache::thrift::protocol
diff --git a/lib/cpp/src/thrift/protocol/TBinaryProtocol.tcc b/lib/cpp/src/thrift/protocol/TBinaryProtocol.tcc
index 0d72d8a..ae350df 100644
--- a/lib/cpp/src/thrift/protocol/TBinaryProtocol.tcc
+++ b/lib/cpp/src/thrift/protocol/TBinaryProtocol.tcc
@@ -28,8 +28,8 @@
 namespace thrift {
 namespace protocol {
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeMessageBegin(const std::string& name,
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeMessageBegin(const std::string& name,
                                                          const TMessageType messageType,
                                                          const int32_t seqid) {
   if (this->strict_write_) {
@@ -48,24 +48,24 @@
   }
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeMessageEnd() {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeMessageEnd() {
   return 0;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeStructBegin(const char* name) {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeStructBegin(const char* name) {
   (void)name;
   return 0;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeStructEnd() {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeStructEnd() {
   return 0;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeFieldBegin(const char* name,
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeFieldBegin(const char* name,
                                                        const TType fieldType,
                                                        const int16_t fieldId) {
   (void)name;
@@ -75,18 +75,18 @@
   return wsize;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeFieldEnd() {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeFieldEnd() {
   return 0;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeFieldStop() {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeFieldStop() {
   return writeByte((int8_t)T_STOP);
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeMapBegin(const TType keyType,
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeMapBegin(const TType keyType,
                                                      const TType valType,
                                                      const uint32_t size) {
   uint32_t wsize = 0;
@@ -96,85 +96,85 @@
   return wsize;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeMapEnd() {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeMapEnd() {
   return 0;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeListBegin(const TType elemType, const uint32_t size) {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeListBegin(const TType elemType, const uint32_t size) {
   uint32_t wsize = 0;
   wsize += writeByte((int8_t)elemType);
   wsize += writeI32((int32_t)size);
   return wsize;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeListEnd() {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeListEnd() {
   return 0;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeSetBegin(const TType elemType, const uint32_t size) {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeSetBegin(const TType elemType, const uint32_t size) {
   uint32_t wsize = 0;
   wsize += writeByte((int8_t)elemType);
   wsize += writeI32((int32_t)size);
   return wsize;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeSetEnd() {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeSetEnd() {
   return 0;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeBool(const bool value) {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeBool(const bool value) {
   uint8_t tmp = value ? 1 : 0;
   this->trans_->write(&tmp, 1);
   return 1;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeByte(const int8_t byte) {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeByte(const int8_t byte) {
   this->trans_->write((uint8_t*)&byte, 1);
   return 1;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeI16(const int16_t i16) {
-  int16_t net = (int16_t)htons(i16);
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeI16(const int16_t i16) {
+  int16_t net = (int16_t)ByteOrder_::toWire16(i16);
   this->trans_->write((uint8_t*)&net, 2);
   return 2;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeI32(const int32_t i32) {
-  int32_t net = (int32_t)htonl(i32);
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeI32(const int32_t i32) {
+  int32_t net = (int32_t)ByteOrder_::toWire32(i32);
   this->trans_->write((uint8_t*)&net, 4);
   return 4;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeI64(const int64_t i64) {
-  int64_t net = (int64_t)htonll(i64);
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeI64(const int64_t i64) {
+  int64_t net = (int64_t)ByteOrder_::toWire64(i64);
   this->trans_->write((uint8_t*)&net, 8);
   return 8;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeDouble(const double dub) {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeDouble(const double dub) {
   BOOST_STATIC_ASSERT(sizeof(double) == sizeof(uint64_t));
   BOOST_STATIC_ASSERT(std::numeric_limits<double>::is_iec559);
 
   uint64_t bits = bitwise_cast<uint64_t>(dub);
-  bits = htonll(bits);
+  bits = ByteOrder_::toWire64(bits);
   this->trans_->write((uint8_t*)&bits, 8);
   return 8;
 }
 
-template <class Transport_>
+template <class Transport_, class ByteOrder_>
 template <typename StrType>
-uint32_t TBinaryProtocolT<Transport_>::writeString(const StrType& str) {
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeString(const StrType& str) {
   if (str.size() > static_cast<size_t>((std::numeric_limits<int32_t>::max)()))
     throw TProtocolException(TProtocolException::SIZE_LIMIT);
   uint32_t size = static_cast<uint32_t>(str.size());
@@ -185,17 +185,17 @@
   return result + size;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::writeBinary(const std::string& str) {
-  return TBinaryProtocolT<Transport_>::writeString(str);
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::writeBinary(const std::string& str) {
+  return TBinaryProtocolT<Transport_, ByteOrder_>::writeString(str);
 }
 
 /**
  * Reading functions
  */
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readMessageBegin(std::string& name,
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readMessageBegin(std::string& name,
                                                         TMessageType& messageType,
                                                         int32_t& seqid) {
   uint32_t result = 0;
@@ -227,24 +227,24 @@
   return result;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readMessageEnd() {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readMessageEnd() {
   return 0;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readStructBegin(std::string& name) {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readStructBegin(std::string& name) {
   name = "";
   return 0;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readStructEnd() {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readStructEnd() {
   return 0;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readFieldBegin(std::string& name,
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readFieldBegin(std::string& name,
                                                       TType& fieldType,
                                                       int16_t& fieldId) {
   (void)name;
@@ -260,13 +260,13 @@
   return result;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readFieldEnd() {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readFieldEnd() {
   return 0;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readMapBegin(TType& keyType,
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readMapBegin(TType& keyType,
                                                     TType& valType,
                                                     uint32_t& size) {
   int8_t k, v;
@@ -286,13 +286,13 @@
   return result;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readMapEnd() {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readMapEnd() {
   return 0;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readListBegin(TType& elemType, uint32_t& size) {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readListBegin(TType& elemType, uint32_t& size) {
   int8_t e;
   uint32_t result = 0;
   int32_t sizei;
@@ -308,13 +308,13 @@
   return result;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readListEnd() {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readListEnd() {
   return 0;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readSetBegin(TType& elemType, uint32_t& size) {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readSetBegin(TType& elemType, uint32_t& size) {
   int8_t e;
   uint32_t result = 0;
   int32_t sizei;
@@ -330,62 +330,62 @@
   return result;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readSetEnd() {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readSetEnd() {
   return 0;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readBool(bool& value) {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readBool(bool& value) {
   uint8_t b[1];
   this->trans_->readAll(b, 1);
   value = *(int8_t*)b != 0;
   return 1;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readByte(int8_t& byte) {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readByte(int8_t& byte) {
   uint8_t b[1];
   this->trans_->readAll(b, 1);
   byte = *(int8_t*)b;
   return 1;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readI16(int16_t& i16) {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readI16(int16_t& i16) {
   union bytes {
     uint8_t b[2];
     int16_t all;
   } theBytes;
   this->trans_->readAll(theBytes.b, 2);
-  i16 = (int16_t)ntohs(theBytes.all);
+  i16 = (int16_t)ByteOrder_::fromWire16(theBytes.all);
   return 2;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readI32(int32_t& i32) {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readI32(int32_t& i32) {
   union bytes {
     uint8_t b[4];
     int32_t all;
   } theBytes;
   this->trans_->readAll(theBytes.b, 4);
-  i32 = (int32_t)ntohl(theBytes.all);
+  i32 = (int32_t)ByteOrder_::fromWire32(theBytes.all);
   return 4;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readI64(int64_t& i64) {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readI64(int64_t& i64) {
   union bytes {
     uint8_t b[8];
     int64_t all;
   } theBytes;
   this->trans_->readAll(theBytes.b, 8);
-  i64 = (int64_t)ntohll(theBytes.all);
+  i64 = (int64_t)ByteOrder_::fromWire64(theBytes.all);
   return 8;
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readDouble(double& dub) {
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readDouble(double& dub) {
   BOOST_STATIC_ASSERT(sizeof(double) == sizeof(uint64_t));
   BOOST_STATIC_ASSERT(std::numeric_limits<double>::is_iec559);
 
@@ -394,28 +394,28 @@
     uint64_t all;
   } theBytes;
   this->trans_->readAll(theBytes.b, 8);
-  theBytes.all = ntohll(theBytes.all);
+  theBytes.all = ByteOrder_::fromWire64(theBytes.all);
   dub = bitwise_cast<double>(theBytes.all);
   return 8;
 }
 
-template <class Transport_>
+template <class Transport_, class ByteOrder_>
 template <typename StrType>
-uint32_t TBinaryProtocolT<Transport_>::readString(StrType& str) {
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readString(StrType& str) {
   uint32_t result;
   int32_t size;
   result = readI32(size);
   return result + readStringBody(str, size);
 }
 
-template <class Transport_>
-uint32_t TBinaryProtocolT<Transport_>::readBinary(std::string& str) {
-  return TBinaryProtocolT<Transport_>::readString(str);
+template <class Transport_, class ByteOrder_>
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readBinary(std::string& str) {
+  return TBinaryProtocolT<Transport_, ByteOrder_>::readString(str);
 }
 
-template <class Transport_>
+template <class Transport_, class ByteOrder_>
 template <typename StrType>
-uint32_t TBinaryProtocolT<Transport_>::readStringBody(StrType& str, int32_t size) {
+uint32_t TBinaryProtocolT<Transport_, ByteOrder_>::readStringBody(StrType& str, int32_t size) {
   uint32_t result = 0;
 
   // Catch error cases
diff --git a/lib/cpp/src/thrift/protocol/TProtocol.h b/lib/cpp/src/thrift/protocol/TProtocol.h
index f3b6048..9eec1ee 100644
--- a/lib/cpp/src/thrift/protocol/TProtocol.h
+++ b/lib/cpp/src/thrift/protocol/TProtocol.h
@@ -105,6 +105,10 @@
 #  include <byteswap.h>
 #  define htolell(n) bswap_64(n)
 #  define letohll(n) bswap_64(n)
+#  define THRIFT_htolel(n) bswap_32(n)
+#  define THRIFT_letohl(n) bswap_32(n)
+#  define THRIFT_htoles(n) bswap_16(n)
+#  define THRIFT_letohs(n) bswap_16(n)
 # else /* GNUC & GLIBC */
 #  define bswap_64(n) \
       ( (((n) & 0xff00000000000000ull) >> 56) \
@@ -115,12 +119,28 @@
       | (((n) & 0x0000000000ff0000ull) << 24) \
       | (((n) & 0x000000000000ff00ull) << 40) \
       | (((n) & 0x00000000000000ffull) << 56) )
+#  define bswap_32(n) \
+      ( (((n) & 0xff000000ul) >> 24) \
+      | (((n) & 0x00ff0000ul) >> 8)  \
+      | (((n) & 0x0000ff00ul) << 8)  \
+      | (((n) & 0x000000fful) << 24) )
+#  define bswap_16(n) \
+      ( (((n) & ((unsigned short)0xff00ul)) >> 8)  \
+      | (((n) & ((unsigned short)0x00fful)) << 8)  )
 #  define htolell(n) bswap_64(n)
 #  define letohll(n) bswap_64(n)
+#  define THRIFT_htolel(n) bswap_32(n)
+#  define THRIFT_letohl(n) bswap_32(n)
+#  define THRIFT_htoles(n) bswap_16(n)
+#  define THRIFT_letohs(n) bswap_16(n)
 # endif /* GNUC & GLIBC */
 #elif __THRIFT_BYTE_ORDER == __THRIFT_LITTLE_ENDIAN
 #  define htolell(n) (n)
 #  define letohll(n) (n)
+#  define THRIFT_htolel(n) (n)
+#  define THRIFT_letohl(n) (n)
+#  define THRIFT_htoles(n) (n)
+#  define THRIFT_letohs(n) (n)
 # if defined(__GNUC__) && defined(__GLIBC__)
 #  include <byteswap.h>
 #  define ntohll(n) bswap_64(n)
@@ -669,6 +689,29 @@
  * It is used only by the generator code.
  */
 class TDummyProtocol : public TProtocol {};
+
+// This is the default / legacy choice
+struct TNetworkBigEndian
+{
+  static uint16_t toWire16(uint16_t x)   {return htons(x);}
+  static uint32_t toWire32(uint32_t x)   {return htonl(x);}
+  static uint64_t toWire64(uint64_t x)   {return htonll(x);}
+  static uint16_t fromWire16(uint16_t x) {return ntohs(x);}
+  static uint32_t fromWire32(uint32_t x) {return ntohl(x);}
+  static uint64_t fromWire64(uint64_t x) {return ntohll(x);}
+};
+
+// On most systems, this will be a bit faster than TNetworkBigEndian
+struct TNetworkLittleEndian
+{
+  static uint16_t toWire16(uint16_t x)   {return THRIFT_htoles(x);}
+  static uint32_t toWire32(uint32_t x)   {return THRIFT_htolel(x);}
+  static uint64_t toWire64(uint64_t x)   {return htolell(x);}
+  static uint16_t fromWire16(uint16_t x) {return THRIFT_letohs(x);}
+  static uint32_t fromWire32(uint32_t x) {return THRIFT_letohl(x);}
+  static uint64_t fromWire64(uint64_t x) {return letohll(x);}
+};
+
 }
 }
 } // apache::thrift::protocol
diff --git a/lib/cpp/test/AllProtocolTests.cpp b/lib/cpp/test/AllProtocolTests.cpp
index a1bccb5..6b5c7c4 100644
--- a/lib/cpp/test/AllProtocolTests.cpp
+++ b/lib/cpp/test/AllProtocolTests.cpp
@@ -38,6 +38,10 @@
   testProtocol<TBinaryProtocol>("TBinaryProtocol");
 }
 
+BOOST_AUTO_TEST_CASE(test_little_binary_protocol) {
+  testProtocol<TLEBinaryProtocol>("TLEBinaryProtocol");
+}
+
 BOOST_AUTO_TEST_CASE(test_compact_protocol) {
   testProtocol<TCompactProtocol>("TCompactProtocol");
 }
diff --git a/lib/cpp/test/Benchmark.cpp b/lib/cpp/test/Benchmark.cpp
index 9d96d08..69e6414 100644
--- a/lib/cpp/test/Benchmark.cpp
+++ b/lib/cpp/test/Benchmark.cpp
@@ -66,41 +66,178 @@
   ooe.zomg_unicode = "\xd7\n\a\t";
   ooe.base64 = "\1\2\3\255";
 
-  boost::shared_ptr<TMemoryBuffer> buf(new TMemoryBuffer());
+  int num = 100000;
+  boost::shared_ptr<TMemoryBuffer> buf(new TMemoryBuffer(num*1000));
 
-  int num = 1000000;
+  uint8_t* data = NULL;
+  uint32_t datasize = 0;
 
   {
+    buf->resetBuffer();
+    TBinaryProtocolT<TMemoryBuffer> prot(buf);
+    double elapsed = 0.0;
     Timer timer;
 
     for (int i = 0; i < num; i++) {
-      buf->resetBuffer();
-      TBinaryProtocolT<TBufferBase> prot(buf);
       ooe.write(&prot);
     }
-    cout << "Write: " << num / (1000 * timer.frame()) << " kHz" << endl;
+    elapsed = timer.frame();
+    cout << "Write big endian: " << num / (1000 * elapsed) << " kHz" << endl;
   }
 
-  uint8_t* data;
-  uint32_t datasize;
-
   buf->getBuffer(&data, &datasize);
 
   {
-
+    boost::shared_ptr<TMemoryBuffer> buf2(new TMemoryBuffer(data, datasize));
+    TBinaryProtocolT<TMemoryBuffer> prot(buf2);
+    OneOfEach ooe2;
+    double elapsed = 0.0;
     Timer timer;
 
     for (int i = 0; i < num; i++) {
-      OneOfEach ooe2;
-      boost::shared_ptr<TMemoryBuffer> buf2(new TMemoryBuffer(data, datasize));
-      // buf2->resetBuffer(data, datasize);
-      TBinaryProtocolT<TBufferBase> prot(buf2);
       ooe2.read(&prot);
-
-      // cout << apache::thrift::ThriftDebugString(ooe2) << endl << endl;
     }
-    cout << " Read: " << num / (1000 * timer.frame()) << " kHz" << endl;
+    elapsed = timer.frame();
+    cout << " Read big endian: " << num / (1000 * elapsed) << " kHz" << endl;
   }
 
+  {
+    buf->resetBuffer();
+    TBinaryProtocolT<TMemoryBuffer, TNetworkLittleEndian> prot(buf);
+    double elapsed = 0.0;
+    Timer timer;
+
+    for (int i = 0; i < num; i++) {
+      ooe.write(&prot);
+    }
+    elapsed = timer.frame();
+    cout << "Write little endian: " << num / (1000 * elapsed) << " kHz" << endl;
+  }
+
+  {
+    OneOfEach ooe2;
+    boost::shared_ptr<TMemoryBuffer> buf2(new TMemoryBuffer(data, datasize));
+    TBinaryProtocolT<TMemoryBuffer, TNetworkLittleEndian> prot(buf2);
+    double elapsed = 0.0;
+    Timer timer;
+
+    for (int i = 0; i < num; i++) {
+      ooe2.read(&prot);
+    }
+    elapsed = timer.frame();
+    cout << " Read little endian: " << num / (1000 * elapsed) << " kHz" << endl;
+  }
+
+  {
+    buf->resetBuffer();
+    TBinaryProtocolT<TMemoryBuffer> prot(buf);
+    double elapsed = 0.0;
+    Timer timer;
+
+    for (int i = 0; i < num; i++) {
+      ooe.write(&prot);
+    }
+    elapsed = timer.frame();
+    cout << "Write big endian: " << num / (1000 * elapsed) << " kHz" << endl;
+  }
+
+  {
+    boost::shared_ptr<TMemoryBuffer> buf2(new TMemoryBuffer(data, datasize));
+    TBinaryProtocolT<TMemoryBuffer> prot(buf2);
+    OneOfEach ooe2;
+    double elapsed = 0.0;
+    Timer timer;
+
+    for (int i = 0; i < num; i++) {
+      ooe2.read(&prot);
+    }
+    elapsed = timer.frame();
+    cout << " Read big endian: " << num / (1000 * elapsed) << " kHz" << endl;
+  }
+
+
+  data = NULL;
+  datasize = 0;
+  num = 10000000;
+
+  ListDoublePerf listDoublePerf;
+  listDoublePerf.field.reserve(num);
+  for (int x = 0; x < num; ++x)
+    listDoublePerf.field.push_back(double(x));
+
+  buf.reset(new TMemoryBuffer(num * 100));
+
+  {
+    buf->resetBuffer();
+    TBinaryProtocolT<TMemoryBuffer> prot(buf);
+    double elapsed = 0.0;
+    Timer timer;
+
+    listDoublePerf.write(&prot);
+    elapsed = timer.frame();
+    cout << "Double write big endian: " << num / (1000 * elapsed) << " kHz" << endl;
+  }
+
+  buf->getBuffer(&data, &datasize);
+
+  {
+    boost::shared_ptr<TMemoryBuffer> buf2(new TMemoryBuffer(data, datasize));
+    TBinaryProtocolT<TMemoryBuffer> prot(buf2);
+    ListDoublePerf listDoublePerf2;
+    double elapsed = 0.0;
+    Timer timer;
+
+    listDoublePerf2.read(&prot);
+    elapsed = timer.frame();
+    cout << " Double read big endian: " << num / (1000 * elapsed) << " kHz" << endl;
+  }
+
+  {
+    buf->resetBuffer();
+    TBinaryProtocolT<TMemoryBuffer, TNetworkLittleEndian> prot(buf);
+    double elapsed = 0.0;
+    Timer timer;
+
+    listDoublePerf.write(&prot);
+    elapsed = timer.frame();
+    cout << "Double write little endian: " << num / (1000 * elapsed) << " kHz" << endl;
+  }
+
+  {
+    ListDoublePerf listDoublePerf2;
+    boost::shared_ptr<TMemoryBuffer> buf2(new TMemoryBuffer(data, datasize));
+    TBinaryProtocolT<TMemoryBuffer, TNetworkLittleEndian> prot(buf2);
+    double elapsed = 0.0;
+    Timer timer;
+
+    listDoublePerf2.read(&prot);
+    elapsed = timer.frame();
+    cout << " Double read little endian: " << num / (1000 * elapsed) << " kHz" << endl;
+  }
+
+  {
+    buf->resetBuffer();
+    TBinaryProtocolT<TMemoryBuffer> prot(buf);
+    double elapsed = 0.0;
+    Timer timer;
+
+    listDoublePerf.write(&prot);
+    elapsed = timer.frame();
+    cout << "Double write big endian: " << num / (1000 * elapsed) << " kHz" << endl;
+  }
+
+  {
+    boost::shared_ptr<TMemoryBuffer> buf2(new TMemoryBuffer(data, datasize));
+    TBinaryProtocolT<TMemoryBuffer> prot(buf2);
+    ListDoublePerf listDoublePerf2;
+    double elapsed = 0.0;
+    Timer timer;
+
+    listDoublePerf2.read(&prot);
+    elapsed = timer.frame();
+    cout << " Double read big endian: " << num / (1000 * elapsed) << " kHz" << endl;
+  }
+
+
   return 0;
 }