THRIFT-318. java: Performance of HashSet for enumeration VALID_VALUES seems poor
Instead of a HashSet, enums will now use the special IntRangeSet implementation.
git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@743037 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/cpp/src/generate/t_java_generator.cc b/compiler/cpp/src/generate/t_java_generator.cc
index d6d7aea..a9ad54f 100644
--- a/compiler/cpp/src/generate/t_java_generator.cc
+++ b/compiler/cpp/src/generate/t_java_generator.cc
@@ -314,7 +314,8 @@
f_enum << string() +
"import java.util.Set;\n" +
"import java.util.HashSet;\n" +
- "import java.util.Collections;\n" << endl;
+ "import java.util.Collections;\n" +
+ "import org.apache.thrift.IntRangeSet;\n"<< endl;
f_enum <<
"public class " << tenum->get_name() << " ";
@@ -337,15 +338,18 @@
// Create a static Set with all valid values for this enum
f_enum << endl;
- indent(f_enum) << "public static final Set<Integer> VALID_VALUES = Collections.unmodifiableSet(new HashSet<Integer>(){{" << endl;
+ indent(f_enum) << "public static final IntRangeSet VALID_VALUES = new IntRangeSet(";
indent_up();
+ bool first = true;
for (c_iter = constants.begin(); c_iter != constants.end(); ++c_iter) {
- // populate set
- if ((*c_iter)->has_value())
- indent(f_enum) << "add(" << (*c_iter)->get_value() << ");" << endl;
+ // populate set
+ if ((*c_iter)->has_value()) {
+ f_enum << (first ? "" : ", ") << (*c_iter)->get_name();
+ first = false;
+ }
}
indent_down();
- indent(f_enum) << "}});" << endl;
+ f_enum << ");" << endl;
scope_down(f_enum);
f_enum.close();
diff --git a/lib/java/src/org/apache/thrift/IntRangeSet.java b/lib/java/src/org/apache/thrift/IntRangeSet.java
new file mode 100644
index 0000000..a12a627
--- /dev/null
+++ b/lib/java/src/org/apache/thrift/IntRangeSet.java
@@ -0,0 +1,152 @@
+package org.apache.thrift;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * IntRangeSet is a specialized Set<Integer> implementation designed
+ * specifically to make the generated validate() method calls faster. It groups
+ * the set values into ranges, and in the contains() call, it does
+ * num ranges * 2 comparisons max. For the common case, which is a single,
+ * contiguous range, this approach is about 60% faster than using a HashSet. If
+ * you had a very ragged value set, like all the odd numbers, for instance,
+ * then you would end up with pretty poor running time.
+ */
+public class IntRangeSet implements Set<Integer> {
+ /**
+ * This array keeps the bounds of each extent in alternating cells, always
+ * increasing. Example: [0,5,10,15], which corresponds to 0-5, 10-15.
+ */
+ private int[] extents;
+
+ /**
+ * We'll keep a duplicate, real HashSet around internally to satisfy some of
+ * the other set operations.
+ */
+ private Set<Integer> realSet = new HashSet<Integer>();
+
+ public IntRangeSet(int... values) {
+ Arrays.sort(values);
+
+ List<Integer> extent_list = new ArrayList<Integer>();
+
+ int ext_start = values[0];
+ int ext_end_so_far = values[0];
+ for (int i = 1; i < values.length; i++) {
+ realSet.add(values[i]);
+
+ if (values[i] == ext_end_so_far + 1) {
+ // advance the end so far
+ ext_end_so_far = values[i];
+ } else {
+ // create an extent for everything we saw so far, move on to the next one
+ extent_list.add(ext_start);
+ extent_list.add(ext_end_so_far);
+ ext_start = values[i];
+ ext_end_so_far = values[i];
+ }
+ }
+ extent_list.add(ext_start);
+ extent_list.add(ext_end_so_far);
+
+ extents = new int[extent_list.size()];
+ for (int i = 0; i < extent_list.size(); i++) {
+ extents[i] = extent_list.get(i);
+ }
+ }
+
+ public boolean add(Integer i) {
+ throw new UnsupportedOperationException();
+ }
+
+ public void clear() {
+ throw new UnsupportedOperationException();
+ }
+
+ public boolean addAll(Collection<? extends Integer> arg0) {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * While this method is here for Set interface compatibility, you should avoid
+ * using it. It incurs boxing overhead! Use the int method directly, instead.
+ */
+ public boolean contains(Object arg0) {
+ return contains(((Integer)arg0).intValue());
+ }
+
+ /**
+ * This is much faster, since it doesn't stop at Integer on the way through.
+ * @param val the value you want to check set membership for
+ * @return true if val was found, false otherwise
+ */
+ public boolean contains(int val) {
+ for (int i = 0; i < extents.length / 2; i++) {
+ if (val < extents[i*2]) {
+ return false;
+ } else if (val <= extents[i*2+1]) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public boolean containsAll(Collection<?> arg0) {
+ for (Object o : arg0) {
+ if (!contains(o)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public boolean isEmpty() {
+ return realSet.isEmpty();
+ }
+
+ public Iterator<Integer> iterator() {
+ return realSet.iterator();
+ }
+
+ public boolean remove(Object arg0) {
+ throw new UnsupportedOperationException();
+ }
+
+ public boolean removeAll(Collection<?> arg0) {
+ throw new UnsupportedOperationException();
+ }
+
+ public boolean retainAll(Collection<?> arg0) {
+ throw new UnsupportedOperationException();
+ }
+
+ public int size() {
+ return realSet.size();
+ }
+
+ public Object[] toArray() {
+ return realSet.toArray();
+ }
+
+ public <T> T[] toArray(T[] arg0) {
+ return realSet.toArray(arg0);
+ }
+
+ @Override
+ public String toString() {
+ String buf = "";
+ for (int i = 0; i < extents.length / 2; i++) {
+ if (i != 0) {
+ buf += ", ";
+ }
+ buf += "[" + extents[i*2] + "," + extents[i*2+1] + "]";
+ }
+ return buf;
+ }
+}