THRIFT-110. java: A more compact format

This patch includes the Java implementation of the new Compact Protocol outlined in the issue and a matching test. It also creates Fixtures.java, a helper class that contains some useful test structure instances. 

git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@742847 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/lib/java/build.xml b/lib/java/build.xml
index c686c7d..7567be4 100644
--- a/lib/java/build.xml
+++ b/lib/java/build.xml
@@ -10,13 +10,12 @@
   <property name="install.path" value="/usr/local/lib" />
   <property name="src.test" location="test" />
   <property name="build.test" location="${build}/test" />
-
   <property name="test.thrift.home" location="../../test"/>
 
   <property file="${user.home}/.thrift-build.properties" />
-  
+
   <property name="cpath" location="libthrift.jar:${thrift.extra.cpath}" />
-  
+
   <target name="init">
     <tstamp />
     <mkdir dir="${build}"/>
@@ -24,7 +23,7 @@
   </target>
 
   <target name="compile" depends="init">
-    <javac srcdir="${src}" destdir="${build}" source="1.5" />
+    <javac srcdir="${src}" destdir="${build}" source="1.5" debug="true"/>
   </target>
 
   <target name="dist" depends="compile">
@@ -53,6 +52,8 @@
   <target name="test" description="Run the full test suite" depends="compile-test">
     <java classname="org.apache.thrift.test.JSONProtoTest"
       classpath="${cpath}:${build.test}" failonerror="true" />
+    <java classname="org.apache.thrift.test.TCompactProtocolTest"
+      classpath="${cpath}:${build.test}" failonerror="true" />
     <java classname="org.apache.thrift.test.IdentityTest"
       classpath="${cpath}:${build.test}" failonerror="true" />
     <java classname="org.apache.thrift.test.EqualityTest"
@@ -82,5 +83,4 @@
     </exec>
   </target>
 
-
 </project>
diff --git a/lib/java/src/org/apache/thrift/protocol/TCompactProtocol.java b/lib/java/src/org/apache/thrift/protocol/TCompactProtocol.java
new file mode 100755
index 0000000..e6bf230
--- /dev/null
+++ b/lib/java/src/org/apache/thrift/protocol/TCompactProtocol.java
@@ -0,0 +1,722 @@
+
+package org.apache.thrift.protocol;
+
+import java.util.Stack;
+import java.io.UnsupportedEncodingException;
+
+import org.apache.thrift.transport.TTransport;
+import org.apache.thrift.TException;
+
+/**
+ * TCompactProtocol2 is the Java implementation of the compact protocol specified
+ * in THRIFT-110. The fundamental approach to reducing the overhead of 
+ * structures is a) use variable-length integers all over the place and b) make
+ * use of unused bits wherever possible. Your savings will obviously vary 
+ * based on the specific makeup of your structs, but in general, the more 
+ * fields, nested structures, short strings and collections, and low-value i32
+ * and i64 fields you have, the more benefit you'll see.
+ */
+public final class TCompactProtocol extends TProtocol {
+
+  private final static TStruct ANONYMOUS_STRUCT = new TStruct("");
+  private final static TField TSTOP = new TField("", TType.STOP, (short)0);
+
+  private final static byte[] ttypeToCompactType = new byte[16];
+  
+  static {
+    ttypeToCompactType[TType.STOP] = TType.STOP;
+    ttypeToCompactType[TType.BOOL] = Types.BOOLEAN_TRUE;
+    ttypeToCompactType[TType.BYTE] = Types.BYTE;
+    ttypeToCompactType[TType.I16] = Types.I16;
+    ttypeToCompactType[TType.I32] = Types.I32;
+    ttypeToCompactType[TType.I64] = Types.I64;
+    ttypeToCompactType[TType.DOUBLE] = Types.DOUBLE;
+    ttypeToCompactType[TType.STRING] = Types.BINARY;
+    ttypeToCompactType[TType.LIST] = Types.LIST;
+    ttypeToCompactType[TType.SET] = Types.SET;
+    ttypeToCompactType[TType.MAP] = Types.MAP;
+    ttypeToCompactType[TType.STRUCT] = Types.STRUCT;
+  }
+  
+  /**
+   * TProtocolFactory that produces TCompactProtocols.
+   */
+  public static class Factory implements TProtocolFactory {
+    public Factory() {}
+    
+    public TProtocol getProtocol(TTransport trans) {
+      return new TCompactProtocol(trans);
+    }
+  }
+  
+  private static final byte PROTOCOL_ID = (byte)0x82;
+  private static final byte VERSION = 1;
+  private static final byte VERSION_MASK = 0x1f; // 0001 1111
+  private static final byte TYPE_MASK = (byte)0xE0; // 1110 0000
+  private static final int  TYPE_SHIFT_AMOUNT = 5;
+  
+  /**
+   * All of the on-wire type codes.
+   */
+  private static class Types {
+    public static final byte BOOLEAN_TRUE   = 0x01;
+    public static final byte BOOLEAN_FALSE  = 0x02;
+    public static final byte BYTE           = 0x03;
+    public static final byte I16            = 0x04;
+    public static final byte I32            = 0x05;
+    public static final byte I64            = 0x06;
+    public static final byte DOUBLE         = 0x07;
+    public static final byte BINARY         = 0x08;
+    public static final byte LIST           = 0x09;
+    public static final byte SET            = 0x0A;
+    public static final byte MAP            = 0x0B;
+    public static final byte STRUCT         = 0x0C;
+  }
+  
+  /** 
+   * Used to keep track of the last field for the current and previous structs,
+   * so we can do the delta stuff.
+   */
+  private Stack<Short> lastField_ = new Stack<Short>();
+  
+  private short lastFieldId_ = 0;
+  
+  /** 
+   * If we encounter a boolean field begin, save the TField here so it can 
+   * have the value incorporated.
+   */
+  private TField booleanField_ = null;
+  
+  /**
+   * If we read a field header, and it's a boolean field, save the boolean 
+   * value here so that readBool can use it.
+   */
+  private Boolean boolValue_ = null;
+  
+  /**
+   * Create a TCompactProtocol.
+   *
+   * @param transport the TTransport object to read from or write to.
+   */
+  public TCompactProtocol(TTransport transport) {
+    super(transport);
+  }
+  
+  
+  //
+  // Public Writing methods.
+  //
+
+  /**
+   * Write a message header to the wire. Compact Protocol messages contain the
+   * protocol version so we can migrate forwards in the future if need be.
+   */
+  public void writeMessageBegin(TMessage message) throws TException {
+    writeByteDirect(PROTOCOL_ID);
+    writeByteDirect((VERSION & VERSION_MASK) | ((message.type << TYPE_SHIFT_AMOUNT) & TYPE_MASK));
+    writeVarint32(message.seqid);
+    writeString(message.name);
+  }
+
+  /**
+   * Write a struct begin. This doesn't actually put anything on the wire. We 
+   * use it as an opportunity to put special placeholder markers on the field
+   * stack so we can get the field id deltas correct.
+   */
+  public void writeStructBegin(TStruct struct) throws TException {
+    lastField_.push(lastFieldId_);
+    lastFieldId_ = 0;
+  }
+
+  /**
+   * Write a struct end. This doesn't actually put anything on the wire. We use
+   * this as an opportunity to pop the last field from the current struct off
+   * of the field stack.
+   */
+  public void writeStructEnd() throws TException {
+    lastFieldId_ = lastField_.pop();
+  }
+  
+  /**
+   * Write a field header containing the field id and field type. If the
+   * difference between the current field id and the last one is small (< 15),
+   * then the field id will be encoded in the 4 MSB as a delta. Otherwise, the
+   * field id will follow the type header as a zigzag varint.
+   */ 
+  public void writeFieldBegin(TField field) throws TException {
+    if (field.type == TType.BOOL) {
+      // we want to possibly include the value, so we'll wait.
+      booleanField_ = field;
+    } else {
+      writeFieldBeginInternal(field, (byte)-1);
+    }
+  }
+
+  /**
+   * The workhorse of writeFieldBegin. It has the option of doing a 
+   * 'type override' of the type header. This is used specifically in the 
+   * boolean field case.
+   */
+  private void writeFieldBeginInternal(TField field, byte typeOverride) throws TException {
+    // short lastField = lastField_.pop();
+
+    // if there's a type override, use that.
+    byte typeToWrite = typeOverride == -1 ? getCompactType(field.type) : typeOverride;
+
+    // check if we can use delta encoding for the field id
+    if (field.id > lastFieldId_ && field.id - lastFieldId_ <= 15) {
+      // write them together
+      writeByteDirect((field.id - lastFieldId_) << 4 | typeToWrite);
+    } else {
+      // write them separate
+      writeByteDirect(typeToWrite);
+      writeI16(field.id);
+    }
+
+    lastFieldId_ = field.id;
+    // lastField_.push(field.id);
+  }
+
+  /**
+   * Write the STOP symbol so we know there are no more fields in this struct.
+   */
+  public void writeFieldStop() throws TException {
+    writeByteDirect(TType.STOP);
+  }
+
+  /**
+   * Write a map header. If the map is empty, omit the key and value type 
+   * headers, as we don't need any additional information to skip it.
+   */
+  public void writeMapBegin(TMap map) throws TException {
+    if (map.size == 0) {
+      writeByteDirect(0);
+    } else {
+      writeVarint32(map.size);
+      writeByteDirect(getCompactType(map.keyType) << 4 | getCompactType(map.valueType));
+    }
+  }
+  
+  /** 
+   * Write a list header.
+   */
+  public void writeListBegin(TList list) throws TException {
+    writeCollectionBegin(list.elemType, list.size);
+  }
+
+  /**
+   * Write a set header.
+   */
+  public void writeSetBegin(TSet set) throws TException {
+    writeCollectionBegin(set.elemType, set.size);
+  }
+
+  /**
+   * Write a boolean value. Potentially, this could be a boolean field, in 
+   * which case the field header info isn't written yet. If so, decide what the
+   * right type header is for the value and then write the field header. 
+   * Otherwise, write a single byte.
+   */
+  public void writeBool(boolean b) throws TException {
+    if (booleanField_ != null) {
+      // we haven't written the field header yet
+      writeFieldBeginInternal(booleanField_, b ? Types.BOOLEAN_TRUE : Types.BOOLEAN_FALSE);
+      booleanField_ = null;
+    } else {
+      // we're not part of a field, so just write the value.
+      writeByteDirect(b ? Types.BOOLEAN_TRUE : Types.BOOLEAN_FALSE);
+    }
+  }
+
+  /** 
+   * Write a byte. Nothing to see here!
+   */
+  public void writeByte(byte b) throws TException {
+    writeByteDirect(b);
+  }
+
+  /**
+   * Write an I16 as a zigzag varint.
+   */
+  public void writeI16(short i16) throws TException {
+    writeVarint32(intToZigZag(i16));
+  }
+  
+  /**
+   * Write an i32 as a zigzag varint.
+   */
+  public void writeI32(int i32) throws TException {
+    writeVarint32(intToZigZag(i32));
+  }
+
+  /**
+   * Write an i64 as a zigzag varint.
+   */
+  public void writeI64(long i64) throws TException {
+    writeVarint64(longToZigzag(i64));
+  }
+
+  /**
+   * Write a double to the wire as 8 bytes.
+   */ 
+  public void writeDouble(double dub) throws TException {
+    byte[] data = new byte[]{0, 0, 0, 0, 0, 0, 0, 0};
+    fixedLongToBytes(Double.doubleToLongBits(dub), data, 0);
+    trans_.write(data);
+  }
+
+  /**
+   * Write a string to the wire with a varint size preceeding.
+   */
+  public void writeString(String str) throws TException {
+    try {
+      writeBinary(str.getBytes("UTF-8"));
+    } catch (UnsupportedEncodingException e) {
+      throw new TException("UTF-8 not supported!");
+    }
+  }
+
+  /**
+   * Write a byte array, using a varint for the size. 
+   */
+  public void writeBinary(byte[] bin) throws TException {
+    writeVarint32(bin.length);
+    trans_.write(bin);
+  }
+
+  //
+  // These methods are called by structs, but don't actually have any wire 
+  // output or purpose.
+  // 
+  
+  public void writeMessageEnd() throws TException {}
+  public void writeMapEnd() throws TException {}
+  public void writeListEnd() throws TException {}
+  public void writeSetEnd() throws TException {}
+  public void writeFieldEnd() throws TException {}
+
+  //
+  // Internal writing methods
+  //
+
+  /**
+   * Abstract method for writing the start of lists and sets. List and sets on 
+   * the wire differ only by the type indicator.
+   */
+  protected void writeCollectionBegin(byte elemType, int size) throws TException {
+    if (size <= 14) {
+      writeByteDirect(size << 4 | getCompactType(elemType));
+    } else {
+      writeByteDirect(0xf0 | getCompactType(elemType));
+      writeVarint32(size);
+    }
+  }
+
+  /**
+   * Write an i32 as a varint. Results in 1-5 bytes on the wire.
+   * TODO: make a permanent buffer like writeVarint64?
+   */
+  byte[] i32buf = new byte[5];
+  private void writeVarint32(int n) throws TException {
+    int idx = 0;
+    while (true) {
+      if ((n & ~0x7F) == 0) {
+        i32buf[idx++] = (byte)n;
+        // writeByteDirect((byte)n);
+        break;
+        // return;
+      } else {
+        i32buf[idx++] = (byte)((n & 0x7F) | 0x80);
+        // writeByteDirect((byte)((n & 0x7F) | 0x80));
+        n >>>= 7;
+      }
+    }
+    trans_.write(i32buf, 0, idx);
+  }
+
+  /**
+   * Write an i64 as a varint. Results in 1-10 bytes on the wire.
+   */
+  byte[] varint64out = new byte[10];
+  private void writeVarint64(long n) throws TException {
+    int idx = 0;
+    while (true) {
+      if ((n & ~0x7FL) == 0) {
+        varint64out[idx++] = (byte)n;
+        break;
+      } else {
+        varint64out[idx++] = ((byte)((n & 0x7F) | 0x80));
+        n >>>= 7;
+      }
+    }
+    trans_.write(varint64out, 0, idx);
+  }
+  
+  /**
+   * Convert l into a zigzag long. This allows negative numbers to be 
+   * represented compactly as a varint.
+   */
+  private long longToZigzag(long l) {
+    return (l << 1) ^ (l >> 63);
+  }
+  
+  /**
+   * Convert n into a zigzag int. This allows negative numbers to be 
+   * represented compactly as a varint.
+   */
+  private int intToZigZag(int n) {
+    return (n << 1) ^ (n >> 31);
+  }
+  
+  /**
+   * Convert a long into little-endian bytes in buf starting at off and going 
+   * until off+7.
+   */
+  private void fixedLongToBytes(long n, byte[] buf, int off) {
+    buf[off+0] = (byte)( n        & 0xff);
+    buf[off+1] = (byte)((n >> 8 ) & 0xff);
+    buf[off+2] = (byte)((n >> 16) & 0xff);
+    buf[off+3] = (byte)((n >> 24) & 0xff);
+    buf[off+4] = (byte)((n >> 32) & 0xff);
+    buf[off+5] = (byte)((n >> 40) & 0xff);
+    buf[off+6] = (byte)((n >> 48) & 0xff);
+    buf[off+7] = (byte)((n >> 56) & 0xff);
+  }
+
+  /** 
+   * Writes a byte without any possiblity of all that field header nonsense. 
+   * Used internally by other writing methods that know they need to write a byte.
+   */
+  private byte[] byteDirectBuffer = new byte[1];
+  private void writeByteDirect(byte b) throws TException {
+    byteDirectBuffer[0] = b;
+    trans_.write(byteDirectBuffer);
+  }
+
+  /** 
+   * Writes a byte without any possiblity of all that field header nonsense.
+   */
+  private void writeByteDirect(int n) throws TException {
+    writeByteDirect((byte)n);
+  }
+
+
+  // 
+  // Reading methods.
+  // 
+
+  /**
+   * Read a message header. 
+   */
+  public TMessage readMessageBegin() throws TException {
+    byte protocolId = readByte();
+    if (protocolId != PROTOCOL_ID) {
+      throw new TProtocolException("Expected protocol id " + Integer.toHexString(PROTOCOL_ID) + " but got " + Integer.toHexString(protocolId));
+    }
+    byte versionAndType = readByte();
+    byte version = (byte)(versionAndType & VERSION_MASK);
+    if (version != VERSION) {
+      throw new TProtocolException("Expected version " + VERSION + " but got " + version);
+    }
+    byte type = (byte)((versionAndType >> TYPE_SHIFT_AMOUNT) & 0x03);
+    int seqid = readVarint32();
+    String messageName = readString();
+    return new TMessage(messageName, type, seqid);
+  }
+
+  /**
+   * Read a struct begin. There's nothing on the wire for this, but it is our
+   * opportunity to push a new struct begin marker onto the field stack.
+   */
+  public TStruct readStructBegin() throws TException {
+    lastField_.push(lastFieldId_);
+    lastFieldId_ = 0;
+    return ANONYMOUS_STRUCT;
+  }
+
+  /**
+   * Doesn't actually consume any wire data, just removes the last field for 
+   * this struct from the field stack.
+   */
+  public void readStructEnd() throws TException {
+    // consume the last field we read off the wire.
+    lastFieldId_ = lastField_.pop();
+  }
+  
+  /**
+   * Read a field header off the wire. 
+   */
+  public TField readFieldBegin() throws TException {
+    byte type = readByte();
+    
+    // if it's a stop, then we can return immediately, as the struct is over.
+    if ((type & 0x0f) == TType.STOP) {
+      return TSTOP;
+    }
+    
+    short fieldId;
+
+    // mask off the 4 MSB of the type header. it could contain a field id delta.
+    short modifier = (short)((type & 0xf0) >> 4);
+    if (modifier == 0) {
+      // not a delta. look ahead for the zigzag varint field id.
+      fieldId = readI16();
+    } else {
+      // has a delta. add the delta to the last read field id.
+      fieldId = (short)(lastFieldId_ + modifier);
+    }
+    
+    TField field = new TField("", getTType((byte)(type & 0x0f)), fieldId);
+
+    // if this happens to be a boolean field, the value is encoded in the type
+    if (isBoolType(type)) {
+      // save the boolean value in a special instance variable.
+      boolValue_ = (byte)(type & 0x0f) == Types.BOOLEAN_TRUE ? Boolean.TRUE : Boolean.FALSE;
+    } 
+
+    // push the new field onto the field stack so we can keep the deltas going.
+    lastFieldId_ = field.id;
+    return field;
+  }
+
+  /** 
+   * Read a map header off the wire. If the size is zero, skip reading the key
+   * and value type. This means that 0-length maps will yield TMaps without the
+   * "correct" types.
+   */
+  public TMap readMapBegin() throws TException {
+    int size = readVarint32();
+    byte keyAndValueType = size == 0 ? 0 : readByte();
+    return new TMap(getTType((byte)(keyAndValueType >> 4)), getTType((byte)(keyAndValueType & 0xf)), size);
+  }
+
+  /**
+   * Read a list header off the wire. If the list size is 0-14, the size will 
+   * be packed into the element type header. If it's a longer list, the 4 MSB
+   * of the element type header will be 0xF, and a varint will follow with the
+   * true size.
+   */
+  public TList readListBegin() throws TException {
+    byte size_and_type = readByte();
+    int size = (size_and_type >> 4) & 0x0f;
+    if (size == 15) {
+      size = readVarint32();
+    }
+    byte type = getTType(size_and_type);
+    return new TList(type, size);
+  }
+
+  /**
+   * Read a set header off the wire. If the set size is 0-14, the size will 
+   * be packed into the element type header. If it's a longer set, the 4 MSB
+   * of the element type header will be 0xF, and a varint will follow with the
+   * true size.
+   */
+  public TSet readSetBegin() throws TException {
+    return new TSet(readListBegin());
+  }
+
+  /**
+   * Read a boolean off the wire. If this is a boolean field, the value should
+   * already have been read during readFieldBegin, so we'll just consume the
+   * pre-stored value. Otherwise, read a byte.
+   */
+  public boolean readBool() throws TException {
+    if (boolValue_ != null) {
+      boolean result = boolValue_.booleanValue();
+      boolValue_ = null;
+      return result;
+    }
+    return readByte() == Types.BOOLEAN_TRUE;
+  }
+
+  byte[] byteRawBuf = new byte[1];
+  /**
+   * Read a single byte off the wire. Nothing interesting here.
+   */
+  public byte readByte() throws TException {
+    trans_.read(byteRawBuf, 0, 1);
+    return byteRawBuf[0];
+  }
+
+  /**
+   * Read an i16 from the wire as a zigzag varint.
+   */
+  public short readI16() throws TException {
+    return (short)zigzagToInt(readVarint32());
+  }
+
+  /**
+   * Read an i32 from the wire as a zigzag varint.
+   */
+  public int readI32() throws TException {
+    return zigzagToInt(readVarint32());
+  }
+
+  /**
+   * Read an i64 from the wire as a zigzag varint.
+   */
+  public long readI64() throws TException {
+    return zigzagToLong(readVarint64());
+  }
+
+  /**
+   * No magic here - just read a double off the wire.
+   */
+  public double readDouble() throws TException {
+    byte[] longBits = new byte[8];
+    trans_.read(longBits, 0, 8);
+    return Double.longBitsToDouble(bytesToLong(longBits));
+  }
+
+  /**
+   * Reads a byte[] (via readBinary), and then UTF-8 decodes it.
+   */
+  public String readString() throws TException {
+    try {
+      return new String(readBinary(), "UTF-8");
+    } catch (UnsupportedEncodingException e) {
+      throw new TException("UTF-8 not supported!");
+    }
+  }
+
+  /**
+   * Read a byte[] from the wire. 
+   */
+  public byte[] readBinary() throws TException {
+    int length = readVarint32();
+    if (length == 0) return new byte[0];
+
+    byte[] buf = new byte[length];
+    trans_.read(buf, 0, length);
+    return buf;
+  }
+
+
+  //
+  // These methods are here for the struct to call, but don't have any wire 
+  // encoding.
+  //
+  public void readMessageEnd() throws TException {}
+  public void readFieldEnd() throws TException {}
+  public void readMapEnd() throws TException {}
+  public void readListEnd() throws TException {}
+  public void readSetEnd() throws TException {}
+  
+  //
+  // Internal reading methods
+  //
+  
+  /**
+   * Read an i32 from the wire as a varint. The MSB of each byte is set
+   * if there is another byte to follow. This can read up to 5 bytes.
+   */
+  private int readVarint32() throws TException {
+    // if the wire contains the right stuff, this will just truncate the i64 we
+    // read and get us the right sign.
+    return (int)readVarint64();
+  }
+
+  /**
+   * Read an i64 from the wire as a proper varint. The MSB of each byte is set 
+   * if there is another byte to follow. This can read up to 10 bytes.
+   */
+  private long readVarint64() throws TException {
+    int shift = 0;
+    long result = 0;
+    while (true) {
+      byte b = readByte();
+      result |= (long) (b & 0x7f) << shift;
+      if ((b & 0x80) != 0x80) break;
+      shift +=7;
+    }
+    return result;
+  }
+
+  //
+  // encoding helpers
+  //
+  
+  /**
+   * Convert from zigzag int to int.
+   */
+  private int zigzagToInt(int n) {
+    return (n >>> 1) ^ -(n & 1);
+  }
+  
+  /** 
+   * Convert from zigzag long to long.
+   */
+  private long zigzagToLong(long n) {
+    return (n >>> 1) ^ -(n & 1);
+  }
+
+  /**
+   * Note that it's important that the mask bytes are long literals, 
+   * otherwise they'll default to ints, and when you shift an int left 56 bits,
+   * you just get a messed up int.
+   */
+  private long bytesToLong(byte[] bytes) {
+    return
+      ((bytes[7] & 0xffL) << 56) |
+      ((bytes[6] & 0xffL) << 48) |
+      ((bytes[5] & 0xffL) << 40) |
+      ((bytes[4] & 0xffL) << 32) |
+      ((bytes[3] & 0xffL) << 24) |
+      ((bytes[2] & 0xffL) << 16) |
+      ((bytes[1] & 0xffL) <<  8) |
+      ((bytes[0] & 0xffL));
+  }
+
+  //
+  // type testing and converting
+  //
+
+  private boolean isBoolType(byte b) {
+    return (b & 0x0f) == Types.BOOLEAN_TRUE || (b & 0x0f) == Types.BOOLEAN_FALSE;
+  }
+
+  /**
+   * Given a TCompactProtocol.Types constant, convert it to its corresponding 
+   * TType value.
+   */
+  private byte getTType(byte type) {
+    switch ((byte)(type & 0x0f)) {
+      case TType.STOP:
+        return TType.STOP;
+      case Types.BOOLEAN_FALSE:
+      case Types.BOOLEAN_TRUE:
+        return TType.BOOL;
+      case Types.BYTE:
+        return TType.BYTE;
+      case Types.I16:
+        return TType.I16;
+      case Types.I32:
+        return TType.I32;
+      case Types.I64:
+        return TType.I64;
+      case Types.DOUBLE:
+        return TType.DOUBLE;
+      case Types.BINARY:
+        return TType.STRING;
+      case Types.LIST:
+        return TType.LIST;
+      case Types.SET:
+        return TType.SET;
+      case Types.MAP:
+        return TType.MAP;
+      case Types.STRUCT:
+        return TType.STRUCT;
+      default:
+        throw new RuntimeException("don't know what type: " + (byte)(type & 0x0f));
+    }
+  }
+
+  /**
+   * Given a TType value, find the appropriate TCompactProtocol.Types constant.
+   */
+  private byte getCompactType(byte ttype) {
+    return ttypeToCompactType[ttype];
+  }
+  
+}
diff --git a/lib/java/src/org/apache/thrift/protocol/TField.java b/lib/java/src/org/apache/thrift/protocol/TField.java
index f7c8e92..78e2328 100644
--- a/lib/java/src/org/apache/thrift/protocol/TField.java
+++ b/lib/java/src/org/apache/thrift/protocol/TField.java
@@ -25,4 +25,12 @@
   public final String name;
   public final byte   type;
   public final short  id;
+
+  public String toString() {
+    return "<TField name:'" + name + "' type:" + type + " field-id:" + id + ">";
+  }
+
+  public boolean equals(TField otherField) {
+    return type == otherField.type && id == otherField.id;
+  }
 }
diff --git a/lib/java/src/org/apache/thrift/protocol/TMessage.java b/lib/java/src/org/apache/thrift/protocol/TMessage.java
index d7c03cf..4fae143 100644
--- a/lib/java/src/org/apache/thrift/protocol/TMessage.java
+++ b/lib/java/src/org/apache/thrift/protocol/TMessage.java
@@ -25,4 +25,12 @@
   public final String name;
   public final byte type;
   public final int seqid;
+
+  public String toString() {
+    return "<TMessage name:'" + name + "' type: " + type + " seqid:" + seqid + ">";
+  }
+  
+  public boolean equals(TMessage other) {
+    return name.equals(other.name) && type == other.type && seqid == other.seqid;
+  }
 }
diff --git a/lib/java/src/org/apache/thrift/protocol/TSet.java b/lib/java/src/org/apache/thrift/protocol/TSet.java
index 515d558..9ff7477 100644
--- a/lib/java/src/org/apache/thrift/protocol/TSet.java
+++ b/lib/java/src/org/apache/thrift/protocol/TSet.java
@@ -21,6 +21,10 @@
     size = s;
   }
 
+  public TSet(TList list) {
+    this(list.elemType, list.size);
+  }
+
   public final byte elemType;
   public final int  size;
 }
diff --git a/lib/java/src/org/apache/thrift/transport/TMemoryBuffer.java b/lib/java/src/org/apache/thrift/transport/TMemoryBuffer.java
index ed3c844..5555535 100644
--- a/lib/java/src/org/apache/thrift/transport/TMemoryBuffer.java
+++ b/lib/java/src/org/apache/thrift/transport/TMemoryBuffer.java
@@ -64,10 +64,23 @@
     return arr_.toString(enc);
   }
 
+  public String inspect() {
+    String buf = "";
+    byte[] bytes = arr_.toByteArray();
+    for (int i = 0; i < bytes.length; i++) {
+      buf += (pos_ == i ? "==>" : "" ) + Integer.toHexString(bytes[i] & 0xff) + " ";
+    }
+    return buf;
+  }
+
   // The contents of the buffer
   private TByteArrayOutputStream arr_;
 
   // Position to read next byte from
   private int pos_;
+  
+  public int length() {
+    return arr_.size();
+  }
 }
 
diff --git a/lib/java/test/org/apache/thrift/test/Fixtures.java b/lib/java/test/org/apache/thrift/test/Fixtures.java
new file mode 100644
index 0000000..cf887f2
--- /dev/null
+++ b/lib/java/test/org/apache/thrift/test/Fixtures.java
@@ -0,0 +1,108 @@
+
+package org.apache.thrift.test;
+
+import java.util.*;
+import thrift.test.*;
+
+public class Fixtures {
+  
+  private static final byte[] kUnicodeBytes = {
+    (byte)0xd3, (byte)0x80, (byte)0xe2, (byte)0x85, (byte)0xae, (byte)0xce,
+    (byte)0x9d, (byte)0x20, (byte)0xd0, (byte)0x9d, (byte)0xce, (byte)0xbf,
+    (byte)0xe2, (byte)0x85, (byte)0xbf, (byte)0xd0, (byte)0xbe, (byte)0xc9,
+    (byte)0xa1, (byte)0xd0, (byte)0xb3, (byte)0xd0, (byte)0xb0, (byte)0xcf,
+    (byte)0x81, (byte)0xe2, (byte)0x84, (byte)0x8e, (byte)0x20, (byte)0xce,
+    (byte)0x91, (byte)0x74, (byte)0x74, (byte)0xce, (byte)0xb1, (byte)0xe2,
+    (byte)0x85, (byte)0xbd, (byte)0xce, (byte)0xba, (byte)0x83, (byte)0xe2,
+    (byte)0x80, (byte)0xbc
+  };
+  
+  
+  public static final OneOfEach oneOfEach;
+  public static final Nesting nesting;
+  public static final HolyMoley holyMoley;
+  public static final CompactProtoTestStruct compactProtoTestStruct;
+  
+  static {
+    try {
+      oneOfEach = new OneOfEach();
+      oneOfEach.im_true = true;
+      oneOfEach.im_false = false;
+      oneOfEach.a_bite = (byte) 0x03;
+      oneOfEach.integer16 = 27000;
+      oneOfEach.integer32 = 1 << 24;
+      oneOfEach.integer64 = (long) 6000 * 1000 * 1000;
+      oneOfEach.double_precision = Math.PI;
+      oneOfEach.some_characters = "JSON THIS! \"\1";
+      oneOfEach.zomg_unicode = new String(kUnicodeBytes, "UTF-8");
+
+      nesting = new Nesting(new Bonk(), new OneOfEach());
+      nesting.my_ooe.integer16 = 16;
+      nesting.my_ooe.integer32 = 32;
+      nesting.my_ooe.integer64 = 64;
+      nesting.my_ooe.double_precision = (Math.sqrt(5) + 1) / 2;
+      nesting.my_ooe.some_characters = ":R (me going \"rrrr\")";
+      nesting.my_ooe.zomg_unicode = new String(kUnicodeBytes, "UTF-8");
+      nesting.my_bonk.type = 31337;
+      nesting.my_bonk.message = "I am a bonk... xor!";
+
+      holyMoley = new HolyMoley();
+
+      holyMoley.big = new ArrayList<OneOfEach>();
+      holyMoley.big.add(new OneOfEach(oneOfEach));
+      holyMoley.big.add(nesting.my_ooe);
+      holyMoley.big.get(0).a_bite = (byte) 0x22;
+      holyMoley.big.get(1).a_bite = (byte) 0x23;
+
+      holyMoley.contain = new HashSet<List<String>>();
+      ArrayList<String> stage1 = new ArrayList<String>(2);
+      stage1.add("and a one");
+      stage1.add("and a two");
+      holyMoley.contain.add(stage1);
+      stage1 = new ArrayList<String>(3);
+      stage1.add("then a one, two");
+      stage1.add("three!");
+      stage1.add("FOUR!!");
+      holyMoley.contain.add(stage1);
+      stage1 = new ArrayList<String>(0);
+      holyMoley.contain.add(stage1);
+
+      ArrayList<Bonk> stage2 = new ArrayList<Bonk>();
+      holyMoley.bonks = new HashMap<String, List<Bonk>>();
+      // one empty
+      holyMoley.bonks.put("nothing", stage2);
+      
+      // one with two
+      stage2 = new ArrayList<Bonk>();
+      Bonk b = new Bonk();
+      b.type = 1;
+      b.message = "Wait.";
+      stage2.add(b);
+      b = new Bonk();
+      b.type = 2;
+      b.message = "What?";
+      stage2.add(b);      
+      holyMoley.bonks.put("something", stage2);
+      
+      // one with three
+      stage2 = new ArrayList<Bonk>();
+      b = new Bonk();
+      b.type = 3;
+      b.message = "quoth";
+      b = new Bonk();
+      b.type = 4;
+      b.message = "the raven";
+      b = new Bonk();
+      b.type = 5;
+      b.message = "nevermore";
+      holyMoley.bonks.put("poe", stage2);
+      
+      // superhuge compact proto test struct
+      compactProtoTestStruct = new CompactProtoTestStruct();
+      compactProtoTestStruct.a_binary = new byte[]{0,1,2,3,4,5,6,7,8};
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+  
+}
\ No newline at end of file
diff --git a/lib/java/test/org/apache/thrift/test/TCompactProtocolTest.java b/lib/java/test/org/apache/thrift/test/TCompactProtocolTest.java
new file mode 100755
index 0000000..b8df71d
--- /dev/null
+++ b/lib/java/test/org/apache/thrift/test/TCompactProtocolTest.java
@@ -0,0 +1,421 @@
+
+package org.apache.thrift.test;
+
+import java.util.*;
+
+import org.apache.thrift.transport.*;
+import org.apache.thrift.server.*;
+import org.apache.thrift.protocol.*;
+import org.apache.thrift.*;
+
+import thrift.test.*;
+
+public class TCompactProtocolTest {
+
+  static TProtocolFactory factory = new TCompactProtocol.Factory();
+
+  public static void main(String[] args) throws Exception {
+    testNakedByte();
+    for (int i = 0; i < 128; i++) {
+      testByteField((byte)i);
+      testByteField((byte)-i);
+    }
+    
+    testNakedI16((short)0);
+    testNakedI16((short)1);
+    testNakedI16((short)15000);
+    testNakedI16((short)0x7fff);
+    testNakedI16((short)-1);
+    testNakedI16((short)-15000);
+    testNakedI16((short)-0x7fff);
+    
+    testI16Field((short)0);
+    testI16Field((short)1);
+    testI16Field((short)7);
+    testI16Field((short)150);
+    testI16Field((short)15000);
+    testI16Field((short)0x7fff);
+    testI16Field((short)-1);
+    testI16Field((short)-7);
+    testI16Field((short)-150);
+    testI16Field((short)-15000);
+    testI16Field((short)-0x7fff);
+    
+    testNakedI32(0);
+    testNakedI32(1);
+    testNakedI32(15000);
+    testNakedI32(0xffff);
+    testNakedI32(-1);
+    testNakedI32(-15000);
+    testNakedI32(-0xffff);
+    
+    testI32Field(0);
+    testI32Field(1);
+    testI32Field(7);
+    testI32Field(150);
+    testI32Field(15000);
+    testI32Field(31337);
+    testI32Field(0xffff);
+    testI32Field(0xffffff);
+    testI32Field(-1);
+    testI32Field(-7);
+    testI32Field(-150);
+    testI32Field(-15000);
+    testI32Field(-0xffff);
+    testI32Field(-0xffffff);
+    
+    testNakedI64(0);
+    testNakedI64(1);
+    testNakedI64(0xff);
+    testNakedI64(0xffff);
+    testNakedI64(0xffffff);
+    testNakedI64(6000000000L);
+    testNakedI64(Long.MAX_VALUE);
+    testNakedI64(-1);
+    testNakedI64(-0xff);
+    testNakedI64(-0xffff);
+    testNakedI64(-0xffffff);
+    testNakedI64(-6000000000L);
+    
+    testI64Field(0);
+    testI64Field(1);
+    testI64Field(0xff);
+    testI64Field(0xffff);
+    testI64Field(0xffffff);
+    testI64Field(6000000000L);
+    testI64Field(1L << 32);
+    testI64Field(1L << 48);
+    testI64Field(1L << 55);
+    testI64Field(Long.MAX_VALUE);
+    testI64Field(-1);
+    testI64Field(-0xff);
+    testI64Field(-0xffff);
+    testI64Field(-0xffffff);
+    testI64Field(-6000000000L);
+    testI64Field(-1*Long.MAX_VALUE);
+    
+    testDouble();
+    
+    testNakedString("");
+    testNakedString("short");
+    testNakedString("borderlinetiny");
+    testNakedString("a bit longer than the smallest possible");
+    
+    testStringField("");
+    testStringField("short");
+    testStringField("borderlinetiny");
+    testStringField("a bit longer than the smallest possible");
+    
+    testNakedBinary(new byte[]{});
+    testNakedBinary(new byte[]{0,1,2,3,4,5,6,7,8,9,10});
+    testNakedBinary(new byte[]{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14});
+    testNakedBinary(new byte[128]);
+    
+    testBinaryField(new byte[]{});
+    testBinaryField(new byte[]{0,1,2,3,4,5,6,7,8,9,10});
+    testBinaryField(new byte[]{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14});
+    testBinaryField(new byte[128]);
+    
+    testSerialization(OneOfEach.class, Fixtures.oneOfEach);
+    testSerialization(Nesting.class, Fixtures.nesting);
+    testSerialization(HolyMoley.class, Fixtures.holyMoley);
+    testSerialization(CompactProtoTestStruct.class, Fixtures.compactProtoTestStruct);
+    
+    testMessage();
+    
+    testServerRequest();
+  }
+  
+  public static void testNakedByte() throws Exception {
+    TMemoryBuffer buf = new TMemoryBuffer(0);
+    TProtocol proto = factory.getProtocol(buf);
+    proto.writeByte((byte)123);
+    byte out = proto.readByte();
+    if (out != 123) {
+      throw new RuntimeException("Byte was supposed to be " + (byte)123 + " but was " + out);
+    }
+  }
+  
+  public static void testByteField(final byte b) throws Exception {
+    testStructField(new StructFieldTestCase(TType.BYTE, (short)15) {
+      public void writeMethod(TProtocol proto) throws TException {
+        proto.writeByte(b);
+      }
+      
+      public void readMethod(TProtocol proto) throws TException {
+        byte result = proto.readByte();
+        if (result != b) {
+          throw new RuntimeException("Byte was supposed to be " + (byte)b + " but was " + result);
+        }
+      }
+    });
+  }
+
+  public static void testNakedI16(short n) throws Exception {
+    TMemoryBuffer buf = new TMemoryBuffer(0);
+    TProtocol proto = factory.getProtocol(buf);
+    proto.writeI16(n);
+    // System.out.println(buf.inspect());
+    int out = proto.readI16();
+    if (out != n) {
+      throw new RuntimeException("I16 was supposed to be " + n + " but was " + out);
+    }
+  }
+
+  public static void testI16Field(final short n) throws Exception {
+    testStructField(new StructFieldTestCase(TType.I16, (short)15) {
+      public void writeMethod(TProtocol proto) throws TException {
+        proto.writeI16(n);
+      }
+      
+      public void readMethod(TProtocol proto) throws TException {
+        short result = proto.readI16();
+        if (result != n) {
+          throw new RuntimeException("I16 was supposed to be " + n + " but was " + result);
+        }
+      }
+    });
+  }
+  
+  public static void testNakedI32(int n) throws Exception {
+    TMemoryBuffer buf = new TMemoryBuffer(0);
+    TProtocol proto = factory.getProtocol(buf);
+    proto.writeI32(n);
+    // System.out.println(buf.inspect());
+    int out = proto.readI32();
+    if (out != n) {
+      throw new RuntimeException("I32 was supposed to be " + n + " but was " + out);
+    }
+  }
+  
+  public static void testI32Field(final int n) throws Exception {
+    testStructField(new StructFieldTestCase(TType.I32, (short)15) {
+      public void writeMethod(TProtocol proto) throws TException {
+        proto.writeI32(n);
+      }
+      
+      public void readMethod(TProtocol proto) throws TException {
+        int result = proto.readI32();
+        if (result != n) {
+          throw new RuntimeException("I32 was supposed to be " + n + " but was " + result);
+        }
+      }
+    });
+    
+  }
+
+  public static void testNakedI64(long n) throws Exception {
+    TMemoryBuffer buf = new TMemoryBuffer(0);
+    TProtocol proto = factory.getProtocol(buf);
+    proto.writeI64(n);
+    // System.out.println(buf.inspect());
+    long out = proto.readI64();
+    if (out != n) {
+      throw new RuntimeException("I64 was supposed to be " + n + " but was " + out);
+    }
+  }
+  
+  public static void testI64Field(final long n) throws Exception {
+    testStructField(new StructFieldTestCase(TType.I64, (short)15) {
+      public void writeMethod(TProtocol proto) throws TException {
+        proto.writeI64(n);
+      }
+      
+      public void readMethod(TProtocol proto) throws TException {
+        long result = proto.readI64();
+        if (result != n) {
+          throw new RuntimeException("I64 was supposed to be " + n + " but was " + result);
+        }
+      }
+    });
+  }
+    
+  public static void testDouble() throws Exception {
+    TMemoryBuffer buf = new TMemoryBuffer(1000);
+    TProtocol proto = factory.getProtocol(buf);
+    proto.writeDouble(123.456);
+    double out = proto.readDouble();
+    if (out != 123.456) {
+      throw new RuntimeException("Double was supposed to be " + 123.456 + " but was " + out);
+    }
+  }
+    
+  public static void testNakedString(String str) throws Exception {
+    TMemoryBuffer buf = new TMemoryBuffer(0);
+    TProtocol proto = factory.getProtocol(buf);
+    proto.writeString(str);
+    // System.out.println(buf.inspect());
+    String out = proto.readString();
+    if (!str.equals(out)) {
+      throw new RuntimeException("String was supposed to be '" + str + "' but was '" + out + "'");
+    }
+  }
+  
+  public static void testStringField(final String str) throws Exception {
+    testStructField(new StructFieldTestCase(TType.STRING, (short)15) {
+      public void writeMethod(TProtocol proto) throws TException {
+        proto.writeString(str);
+      }
+      
+      public void readMethod(TProtocol proto) throws TException {
+        String result = proto.readString();
+        if (!result.equals(str)) {
+          throw new RuntimeException("String was supposed to be " + str + " but was " + result);
+        }
+      }
+    });
+  }
+
+  public static void testNakedBinary(byte[] data) throws Exception {
+    TMemoryBuffer buf = new TMemoryBuffer(0);
+    TProtocol proto = factory.getProtocol(buf);
+    proto.writeBinary(data);
+    // System.out.println(buf.inspect());
+    byte[] out = proto.readBinary();
+    if (!Arrays.equals(data, out)) {
+      throw new RuntimeException("Binary was supposed to be '" + data + "' but was '" + out + "'");
+    }
+  }
+
+  public static void testBinaryField(final byte[] data) throws Exception {
+    testStructField(new StructFieldTestCase(TType.STRING, (short)15) {
+      public void writeMethod(TProtocol proto) throws TException {
+        proto.writeBinary(data);
+      }
+      
+      public void readMethod(TProtocol proto) throws TException {
+        byte[] result = proto.readBinary();
+        if (!Arrays.equals(data, result)) {
+          throw new RuntimeException("Binary was supposed to be '" + bytesToString(data) + "' but was '" + bytesToString(result) + "'");
+        }
+      }
+    });
+    
+  }
+
+  public static <T extends TBase> void testSerialization(Class<T> klass, T obj) throws Exception {
+    TMemoryBuffer buf = new TMemoryBuffer(0);
+    TBinaryProtocol binproto = new TBinaryProtocol(buf);
+    
+    try {
+      obj.write(binproto);
+      // System.out.println("Size in binary protocol: " + buf.length());
+    
+      buf = new TMemoryBuffer(0);
+      TProtocol proto = factory.getProtocol(buf);
+    
+      obj.write(proto);
+      System.out.println("Size in compact protocol: " + buf.length());
+      // System.out.println(buf.inspect());
+    
+      T objRead = klass.newInstance();
+      objRead.read(proto);
+      if (!obj.equals(objRead)) {
+        System.out.println("Expected: " + obj.toString());
+        System.out.println("Actual: " + objRead.toString());
+        // System.out.println(buf.inspect());
+        throw new RuntimeException("Objects didn't match!");
+      }
+    } catch (Exception e) {
+      System.out.println(buf.inspect());
+      throw e;
+    }
+  }
+
+  public static void testMessage() throws Exception {
+    List<TMessage> msgs = Arrays.asList(new TMessage[]{
+      new TMessage("short message name", TMessageType.CALL, 0),
+      new TMessage("1", TMessageType.REPLY, 12345),
+      new TMessage("loooooooooooooooooooooooooooooooooong", TMessageType.EXCEPTION, 1 << 16),
+      new TMessage("Janky", TMessageType.CALL, 0),
+    });
+    
+    for (TMessage msg : msgs) {
+      TMemoryBuffer buf = new TMemoryBuffer(0);
+      TProtocol proto = factory.getProtocol(buf);
+      TMessage output = null;
+      
+      proto.writeMessageBegin(msg);
+      proto.writeMessageEnd();
+
+      output = proto.readMessageBegin();
+
+      if (!msg.equals(output)) {
+        throw new RuntimeException("Message was supposed to be " + msg + " but was " + output);
+      }
+    }
+  }
+
+  public static void testServerRequest() throws Exception {
+    Srv.Iface handler = new Srv.Iface() {
+      public int Janky(int i32arg) throws TException {
+        return i32arg * 2;
+      }
+    };
+    
+    Srv.Processor testProcessor = new Srv.Processor(handler);
+
+    TMemoryBuffer clientOutTrans = new TMemoryBuffer(0);
+    TProtocol clientOutProto = factory.getProtocol(clientOutTrans);
+    TMemoryBuffer clientInTrans = new TMemoryBuffer(0);
+    TProtocol clientInProto = factory.getProtocol(clientInTrans);
+    
+    Srv.Client testClient = new Srv.Client(clientInProto, clientOutProto);
+    
+    testClient.send_Janky(1);
+    // System.out.println(clientOutTrans.inspect());
+    testProcessor.process(clientOutProto, clientInProto);
+    // System.out.println(clientInTrans.inspect());
+    int result = testClient.recv_Janky();
+    if (result != 2) {
+      throw new RuntimeException("Got an unexpected result: " + result);
+    }
+  }
+
+  //
+  // Helper methods
+  //
+  
+  private static String bytesToString(byte[] bytes) {
+    String s = "";
+    for (int i = 0; i < bytes.length; i++) {
+      s += Integer.toHexString((int)bytes[i]) + " ";
+    }
+    return s;
+  }
+
+  private static void testStructField(StructFieldTestCase testCase) throws Exception {
+    TMemoryBuffer buf = new TMemoryBuffer(0);
+    TProtocol proto = factory.getProtocol(buf);
+    
+    TField field = new TField("test_field", testCase.type_, testCase.id_);
+    proto.writeStructBegin(new TStruct("test_struct"));
+    proto.writeFieldBegin(field);
+    testCase.writeMethod(proto);
+    proto.writeFieldEnd();
+    proto.writeStructEnd();
+    
+    // System.out.println(buf.inspect());
+
+    proto.readStructBegin();
+    TField readField = proto.readFieldBegin();
+    // TODO: verify the field is as expected
+    if (!field.equals(readField)) {
+      throw new RuntimeException("Expected " + field + " but got " + readField);
+    }
+    testCase.readMethod(proto);
+    proto.readStructEnd();
+  }
+  
+  public static abstract class StructFieldTestCase {
+    byte type_;
+    short id_;
+    public StructFieldTestCase(byte type, short id) {
+      type_ = type;
+      id_ = id;
+    }
+    
+    public abstract void writeMethod(TProtocol proto) throws TException;
+    public abstract void readMethod(TProtocol proto) throws TException;
+  }
+}
\ No newline at end of file