THRIFT-1469. java: Java isset space optimization

This patch gives the generated code some variable-sized options for the isset bit vector. The compiler will attempt to use byte, short, int and long types before reverting to a BitSet for structs with a LOT of optional fields. This should save a fair amount of memory in a lot of cases.

Patch: Brian Bloniarz

git-svn-id: https://svn.apache.org/repos/asf/thrift/trunk@1221828 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/cpp/src/generate/t_java_generator.cc b/compiler/cpp/src/generate/t_java_generator.cc
index 49f81be..9f2bbd3 100644
--- a/compiler/cpp/src/generate/t_java_generator.cc
+++ b/compiler/cpp/src/generate/t_java_generator.cc
@@ -240,7 +240,8 @@
   void generate_deep_copy_container(std::ofstream& out, std::string source_name_p1, std::string source_name_p2, std::string result_name, t_type* type);
   void generate_deep_copy_non_container(std::ofstream& out, std::string source_name, std::string dest_name, t_type* type);
 
-  bool has_bit_vector(t_struct* tstruct);
+  enum isset_type { ISSET_NONE, ISSET_PRIMITIVE, ISSET_BITSET };
+  isset_type needs_isset(t_struct* tstruct, std::string *outPrimitiveType = NULL);
 
   /**
    * Helper rendering functions
@@ -352,6 +353,7 @@
     "import org.apache.thrift.scheme.StandardScheme;\n\n" +
     "import org.apache.thrift.scheme.TupleScheme;\n" +
     "import org.apache.thrift.protocol.TTupleProtocol;\n" +
+    "import org.apache.thrift.EncodingUtils;\n" +
     "import java.util.List;\n" +
     "import java.util.ArrayList;\n" +
     "import java.util.Map;\n" +
@@ -1296,9 +1298,18 @@
       }
     }
 
-    if (i > 0) {
+    std::string primitiveType;
+    switch(needs_isset(tstruct, &primitiveType)) {
+    case ISSET_NONE:
+      break;
+    case ISSET_PRIMITIVE:
+      indent(out) << "private " << primitiveType << " __isset_bitfield = 0;" << endl;
+      break;
+    case ISSET_BITSET:
       indent(out) << "private BitSet __isset_bit_vector = new BitSet(" << i << ");" << endl;
+      break;
     }
+
     if (optionals > 0) {
       std::string output_string = "private _Fields optionals[] = {";
       for (m_iter = members.begin(); m_iter != members.end(); ++m_iter) {
@@ -1370,9 +1381,16 @@
   indent(out) << "public " << tstruct->get_name() << "(" << tstruct->get_name() << " other) {" << endl;
   indent_up();
 
-  if (has_bit_vector(tstruct)) {
+  switch(needs_isset(tstruct)) {
+  case ISSET_NONE:
+    break;
+  case ISSET_PRIMITIVE:
+    indent(out) << "__isset_bitfield = other.__isset_bitfield;" << endl;
+    break;
+  case ISSET_BITSET:
     indent(out) << "__isset_bit_vector.clear();" << endl;
     indent(out) << "__isset_bit_vector.or(other.__isset_bit_vector);" << endl;    
+    break;
   }
 
   for (m_iter = members.begin(); m_iter != members.end(); ++m_iter) {
@@ -1788,6 +1806,7 @@
  */
 void t_java_generator::generate_java_bean_boilerplate(ofstream& out,
                                                       t_struct* tstruct) {
+  isset_type issetType = needs_isset(tstruct);
   const vector<t_field*>& fields = tstruct->get_members();
   vector<t_field*>::const_iterator f_iter;
   for (f_iter = fields.begin(); f_iter != fields.end(); ++f_iter) {
@@ -1928,6 +1947,8 @@
     indent_up();
     if (type_can_be_null(type)) {
       indent(out) << "this." << field_name << " = null;" << endl;
+    } else if(issetType == ISSET_PRIMITIVE) {
+      indent(out) << "__isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, " << isset_field_id(field) << ");" << endl;
     } else {
       indent(out) << "__isset_bit_vector.clear(" << isset_field_id(field) << ");" << endl;
     }
@@ -1940,6 +1961,8 @@
     indent_up();
     if (type_can_be_null(type)) {
       indent(out) << "return this." << field_name << " != null;" << endl;
+    } else if(issetType == ISSET_PRIMITIVE) {
+      indent(out) << "return EncodingUtils.testBit(__isset_bitfield, " << isset_field_id(field) << ");" << endl;
     } else {
       indent(out) << "return __isset_bit_vector.get(" << isset_field_id(field) << ");" << endl;
     }
@@ -1952,6 +1975,8 @@
       indent(out) << "if (!value) {" << endl;
       indent(out) << "  this." << field_name << " = null;" << endl;
       indent(out) << "}" << endl;
+    } else if(issetType == ISSET_PRIMITIVE) {
+      indent(out) << "__isset_bitfield = EncodingUtils.setBit(__isset_bitfield, " << isset_field_id(field) << ", value);" << endl;
     } else {
       indent(out) << "__isset_bit_vector.set(" << isset_field_id(field) << ", value);" << endl;
     }
@@ -3759,16 +3784,33 @@
   indent(out) << "}" << endl;
 }
 
-bool t_java_generator::has_bit_vector(t_struct* tstruct) {
+t_java_generator::isset_type t_java_generator::needs_isset(t_struct* tstruct, std::string *outPrimitiveType) {
   const vector<t_field*>& members = tstruct->get_members();
   vector<t_field*>::const_iterator m_iter;
 
+  int count = 0;
   for (m_iter = members.begin(); m_iter != members.end(); ++m_iter) {
     if (!type_can_be_null(get_true_type((*m_iter)->get_type()))) {
-      return true;
+      count++;
     }
   }
-  return false;
+  if(count == 0) {
+    return ISSET_NONE;
+  } else if(count <= 64) {
+    if(outPrimitiveType != NULL) {
+      if(count <= 8)
+        *outPrimitiveType = "byte";
+      else if(count <= 16)
+        *outPrimitiveType = "short";
+      else if(count <= 32)
+        *outPrimitiveType = "int";
+      else if(count <= 64)
+        *outPrimitiveType = "long";
+    }
+    return ISSET_PRIMITIVE;
+  } else {
+    return ISSET_BITSET;
+  }
 }
 
 void t_java_generator::generate_java_struct_clear(std::ofstream& out, t_struct* tstruct) {
@@ -3838,9 +3880,19 @@
 void t_java_generator::generate_java_struct_read_object(ofstream& out, t_struct* tstruct) {
   indent(out) << "private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {" << endl;
   indent(out) << "  try {" << endl;
-  if (!tstruct->is_union() && has_bit_vector(tstruct)) {
-    indent(out) << "    // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor." << endl;
-    indent(out) << "    __isset_bit_vector = new BitSet(1);" << endl;
+  if (!tstruct->is_union()) {
+    switch(needs_isset(tstruct)) {
+      case ISSET_NONE:
+        break;
+      case ISSET_PRIMITIVE:
+        indent(out) << "    // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor." << endl;
+        indent(out) << "    __isset_bitfield = 0;" << endl;
+        break;
+      case ISSET_BITSET:
+        indent(out) << "    // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor." << endl;
+        indent(out) << "    __isset_bit_vector = new BitSet(1);" << endl;
+        break;
+    }
   }
   indent(out) << "    read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));" << endl;
   indent(out) << "  } catch (org.apache.thrift.TException te) {" << endl;
diff --git a/lib/java/build.xml b/lib/java/build.xml
index c0c2374..eb74125 100644
--- a/lib/java/build.xml
+++ b/lib/java/build.xml
@@ -242,6 +242,9 @@
     <exec executable="../../compiler/cpp/thrift" failonerror="true">
       <arg line="--gen java:beans,nocamel ${test.thrift.home}/JavaBeansTest.thrift"/>
     </exec>
+    <exec executable="../../compiler/cpp/thrift" failonerror="true">
+      <arg line="--gen java:hashcode ${test.thrift.home}/ManyOptionals.thrift"/>
+    </exec>
   </target>
 
   <target name="proxy" if="proxy.enabled">
diff --git a/lib/java/src/org/apache/thrift/EncodingUtils.java b/lib/java/src/org/apache/thrift/EncodingUtils.java
index 072de93..bf14ef5 100644
--- a/lib/java/src/org/apache/thrift/EncodingUtils.java
+++ b/lib/java/src/org/apache/thrift/EncodingUtils.java
@@ -82,4 +82,67 @@
         | ((buf[offset + 2] & 0xff) << 8) | ((buf[offset + 3] & 0xff));
   }
 
+  /**
+   * Bitfield utilities.
+   * Returns true if the bit at position is set in v.
+   */
+  public static final boolean testBit(byte v, int position) {
+    return testBit((int)v, position);
+  }
+
+  public static final boolean testBit(short v, int position) {
+    return testBit((int)v, position);
+  }
+
+  public static final boolean testBit(int v, int position) {
+    return (v & (1 << position)) != 0;
+  }
+
+  public static final boolean testBit(long v, int position) {
+    return (v & (1L << position)) != 0L;
+  }
+
+  /**
+   * Returns v, with the bit at position set to zero.
+   */
+  public static final byte clearBit(byte v, int position) {
+    return (byte)clearBit((int)v, position);
+  }
+
+  public static final short clearBit(short v, int position) {
+    return (short)clearBit((int)v, position);
+  }
+
+  public static final int clearBit(int v, int position) {
+    return v & ~(1 << position);
+  }
+
+  public static final long clearBit(long v, int position) {
+    return v & ~(1L << position);
+  }
+
+  /**
+   * Returns v, with the bit at position set to 1 or 0 depending on value.
+   */
+  public static final byte setBit(byte v, int position, boolean value) {
+    return (byte)setBit((int)v, position, value);
+  }
+
+  public static final short setBit(short v, int position, boolean value) {
+    return (short)setBit((int)v, position, value);
+  }
+
+  public static final int setBit(int v, int position, boolean value) {
+    if(value)
+      return v | (1 << position);
+    else
+      return clearBit(v, position);
+  }
+
+  public static final long setBit(long v, int position, boolean value) {
+    if(value)
+      return v | (1L << position);
+    else
+      return clearBit(v, position);
+  }
 }
diff --git a/lib/java/test/org/apache/thrift/TestOptionals.java b/lib/java/test/org/apache/thrift/TestOptionals.java
new file mode 100644
index 0000000..d1591ee
--- /dev/null
+++ b/lib/java/test/org/apache/thrift/TestOptionals.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.thrift;
+
+import junit.framework.TestCase;
+
+import thrift.test.Opt4;
+import thrift.test.Opt30;
+import thrift.test.Opt64;
+import thrift.test.Opt80;
+
+// Exercises the isSet methods using structs from from ManyOptionals.thrift
+public class TestOptionals extends TestCase {
+  public void testEncodingUtils() throws Exception {
+    assertEquals((short)0x8, EncodingUtils.setBit((short)0, 3, true));
+    assertEquals((short)0, EncodingUtils.setBit((short)0x8, 3, false));
+    assertEquals(true, EncodingUtils.testBit((short)0x8, 3));
+    assertEquals(false, EncodingUtils.testBit((short)0x8, 4));
+
+    assertEquals((short)Short.MIN_VALUE, EncodingUtils.setBit((short)0, 15, true));
+    assertEquals((short)0, EncodingUtils.setBit((short)Short.MIN_VALUE, 15, false));
+    assertEquals(true, EncodingUtils.testBit(Short.MIN_VALUE, 15));
+    assertEquals(false, EncodingUtils.testBit(Short.MIN_VALUE, 14));
+  }
+
+  public void testOpt4() throws Exception {
+    Opt4 x = new Opt4();
+    assertEquals(false, x.isSetDef1());
+    x.setDef1(3);
+    assertEquals(true, x.isSetDef1());
+    assertEquals(false, x.isSetDef2());
+
+    Opt4 copy = new Opt4(x);
+    assertEquals(true, copy.isSetDef1());
+    copy.unsetDef1();
+    assertEquals(false, copy.isSetDef1());
+    assertEquals(true, x.isSetDef1());
+  }
+
+  public void testOpt30() throws Exception {
+    Opt30 x = new Opt30();
+    assertEquals(false, x.isSetDef1());
+    x.setDef1(3);
+    assertEquals(true, x.isSetDef1());
+    assertEquals(false, x.isSetDef2());
+  }
+
+  public void testOpt64() throws Exception {
+    Opt64 x = new Opt64();
+    assertEquals(false, x.isSetDef1());
+    x.setDef1(3);
+    assertEquals(true, x.isSetDef1());
+    assertEquals(false, x.isSetDef2());
+    x.setDef64(22);
+    assertEquals(true, x.isSetDef64());
+    assertEquals(false, x.isSetDef63());
+  }
+
+  public void testOpt80() throws Exception {
+    Opt80 x = new Opt80();
+    assertEquals(false, x.isSetDef1());
+    x.setDef1(3);
+    assertEquals(true, x.isSetDef1());
+    assertEquals(false, x.isSetDef2());
+
+    Opt80 copy = new Opt80(x);
+    copy.unsetDef1();
+    assertEquals(false, copy.isSetDef1());
+    assertEquals(true, x.isSetDef1());
+  }
+}
diff --git a/test/ManyOptionals.thrift b/test/ManyOptionals.thrift
new file mode 100644
index 0000000..3fb1d68
--- /dev/null
+++ b/test/ManyOptionals.thrift
@@ -0,0 +1,231 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+// The java codegenerator has a few different codepaths depending
+// on how many optionals the struct has; this attempts to exercise
+// them.
+
+namespace java thrift.test
+
+struct Opt4 {
+  1: i32 def1;
+  2: i32 def2;
+  3: i32 def3;
+  4: i32 def4;
+}
+
+struct Opt13 {
+  1: i32 def1;
+  2: i32 def2;
+  3: i32 def3;
+  4: i32 def4;
+  5: i32 def5;
+  6: i32 def6;
+  7: i32 def7;
+  8: i32 def8;
+  9: i32 def9;
+  10: i32 def10;
+  11: i32 def11;
+  12: i32 def12;
+  13: i32 def13;
+}
+
+struct Opt30 {
+  1: i32 def1;
+  2: i32 def2;
+  3: i32 def3;
+  4: i32 def4;
+  5: i32 def5;
+  6: i32 def6;
+  7: i32 def7;
+  8: i32 def8;
+  9: i32 def9;
+  10: i32 def10;
+  11: i32 def11;
+  12: i32 def12;
+  13: i32 def13;
+  14: i32 def14;
+  15: i32 def15;
+  16: i32 def16;
+  17: i32 def17;
+  18: i32 def18;
+  19: i32 def19;
+  20: i32 def20;
+  21: i32 def21;
+  22: i32 def22;
+  23: i32 def23;
+  24: i32 def24;
+  25: i32 def25;
+  26: i32 def26;
+  27: i32 def27;
+  28: i32 def28;
+  29: i32 def29;
+  30: i32 def30;
+}
+
+struct Opt64 {
+  1: i32 def1;
+  2: i32 def2;
+  3: i32 def3;
+  4: i32 def4;
+  5: i32 def5;
+  6: i32 def6;
+  7: i32 def7;
+  8: i32 def8;
+  9: i32 def9;
+  10: i32 def10;
+  11: i32 def11;
+  12: i32 def12;
+  13: i32 def13;
+  14: i32 def14;
+  15: i32 def15;
+  16: i32 def16;
+  17: i32 def17;
+  18: i32 def18;
+  19: i32 def19;
+  20: i32 def20;
+  21: i32 def21;
+  22: i32 def22;
+  23: i32 def23;
+  24: i32 def24;
+  25: i32 def25;
+  26: i32 def26;
+  27: i32 def27;
+  28: i32 def28;
+  29: i32 def29;
+  30: i32 def30;
+  31: i32 def31;
+  32: i32 def32;
+  33: i32 def33;
+  34: i32 def34;
+  35: i32 def35;
+  36: i32 def36;
+  37: i32 def37;
+  38: i32 def38;
+  39: i32 def39;
+  40: i32 def40;
+  41: i32 def41;
+  42: i32 def42;
+  43: i32 def43;
+  44: i32 def44;
+  45: i32 def45;
+  46: i32 def46;
+  47: i32 def47;
+  48: i32 def48;
+  49: i32 def49;
+  50: i32 def50;
+  51: i32 def51;
+  52: i32 def52;
+  53: i32 def53;
+  54: i32 def54;
+  55: i32 def55;
+  56: i32 def56;
+  57: i32 def57;
+  58: i32 def58;
+  59: i32 def59;
+  60: i32 def60;
+  61: i32 def61;
+  62: i32 def62;
+  63: i32 def63;
+  64: i32 def64;
+}
+
+struct Opt80 {
+  1: i32 def1;
+  2: i32 def2;
+  3: i32 def3;
+  4: i32 def4;
+  5: i32 def5;
+  6: i32 def6;
+  7: i32 def7;
+  8: i32 def8;
+  9: i32 def9;
+  10: i32 def10;
+  11: i32 def11;
+  12: i32 def12;
+  13: i32 def13;
+  14: i32 def14;
+  15: i32 def15;
+  16: i32 def16;
+  17: i32 def17;
+  18: i32 def18;
+  19: i32 def19;
+  20: i32 def20;
+  21: i32 def21;
+  22: i32 def22;
+  23: i32 def23;
+  24: i32 def24;
+  25: i32 def25;
+  26: i32 def26;
+  27: i32 def27;
+  28: i32 def28;
+  29: i32 def29;
+  30: i32 def30;
+  31: i32 def31;
+  32: i32 def32;
+  33: i32 def33;
+  34: i32 def34;
+  35: i32 def35;
+  36: i32 def36;
+  37: i32 def37;
+  38: i32 def38;
+  39: i32 def39;
+  40: i32 def40;
+  41: i32 def41;
+  42: i32 def42;
+  43: i32 def43;
+  44: i32 def44;
+  45: i32 def45;
+  46: i32 def46;
+  47: i32 def47;
+  48: i32 def48;
+  49: i32 def49;
+  50: i32 def50;
+  51: i32 def51;
+  52: i32 def52;
+  53: i32 def53;
+  54: i32 def54;
+  55: i32 def55;
+  56: i32 def56;
+  57: i32 def57;
+  58: i32 def58;
+  59: i32 def59;
+  60: i32 def60;
+  61: i32 def61;
+  62: i32 def62;
+  63: i32 def63;
+  64: i32 def64;
+  65: i32 def65;
+  66: i32 def66;
+  67: i32 def67;
+  68: i32 def68;
+  69: i32 def69;
+  70: i32 def70;
+  71: i32 def71;
+  72: i32 def72;
+  73: i32 def73;
+  74: i32 def74;
+  75: i32 def75;
+  76: i32 def76;
+  77: i32 def77;
+  78: i32 def78;
+  79: i32 def79;
+  80: i32 def80;
+}
+