THRIFT-4393: renumber GeneratorInput t_{type,etc...}_id
Client: compiler

This closes #1419
diff --git a/compiler/cpp/src/thrift/plugin/plugin.cc b/compiler/cpp/src/thrift/plugin/plugin.cc
index 0bac135..49c0525 100644
--- a/compiler/cpp/src/thrift/plugin/plugin.cc
+++ b/compiler/cpp/src/thrift/plugin/plugin.cc
@@ -124,6 +124,11 @@
 
   std::map<int64_t, S> const* source;
 
+  void clear() {
+    source = nullptr ;
+    cache.clear() ;
+  }
+
 protected:
   std::map<int64_t, C*> cache;
 
@@ -141,6 +146,12 @@
 TypeCache< ::t_const, t_const> g_const_cache;
 TypeCache< ::t_service, t_service> g_service_cache;
 
+void clear_global_cache() {
+  g_type_cache.clear();
+  g_const_cache.clear();
+  g_service_cache.clear();
+}
+
 void set_global_cache(const TypeRegistry& from) {
   g_type_cache.source = &from.types;
   g_const_cache.source = &from.constants;
diff --git a/compiler/cpp/src/thrift/plugin/plugin_output.cc b/compiler/cpp/src/thrift/plugin/plugin_output.cc
index 75725a1..76662dd 100644
--- a/compiler/cpp/src/thrift/plugin/plugin_output.cc
+++ b/compiler/cpp/src/thrift/plugin/plugin_output.cc
@@ -99,29 +99,62 @@
 
 #define THRIFT_ASSIGN_METADATA() convert(reinterpret_cast<t_type*>(from), to.metadata)
 
+// a generator of sequential unique identifiers for addresses -- so
+// that the TypeCache below can use those IDs instead of
+// addresses. This allows GeneratorInput's various
+// t_{program,type,etc}_id types to be dense consecutively-numbered
+// integers, instead of large random-seeming integers.
+//
+// Furthermore, this allows GeneratorInput to be deterministic (no
+// addresses, so no pseudo-randomness) and that means reproducibility
+// of output.
+const int64_t ONE_MILLION = 1000 * 1000;
+class id_generator {
+public:
+  id_generator() : addr2id_(), next_id_(ONE_MILLION) {}
+
+  void clear() {
+    addr2id_.clear() ;
+    next_id_ = ONE_MILLION ;
+  }
+
+  int64_t gensym(const int64_t addr) {
+    if (!addr) return 0L ;
+    std::map<int64_t, int64_t>::iterator it = addr2id_.find(addr);
+    if (it != addr2id_.end()) return it->second ;
+    int64_t id = next_id_++ ;
+    addr2id_.insert(std::make_pair(addr, id)) ;
+    return id ;
+  }
+
+  std::map<int64_t, int64_t> addr2id_ ;
+  int64_t next_id_ ;
+} ;
+
 // To avoid multiple instances of same type, t_type, t_const and t_service are stored in one place
 // and referenced by ID.
 template <typename T>
 struct TypeCache {
   typedef typename plugin::ToType<T>::type to_type;
+  id_generator idgen ;
   std::map<int64_t, to_type> cache;
 
   template <typename T2>
   int64_t store(T2* t) {
-    intptr_t id = reinterpret_cast<intptr_t>(t);
-    if (id) {
-      typename std::map<int64_t, to_type>::iterator it = cache.find(id);
-      if (it == cache.end()) {
-        // HACK: fake resolve for recursive type
-        cache.insert(std::make_pair(id, to_type()));
-        // overwrite with true value
-        cache[id] = convert(t);
-      }
-    }
-    return static_cast<int64_t>(id);
+    intptr_t addr = reinterpret_cast<intptr_t>(t);
+    if (!addr) return 0L ;
+
+    int64_t id = idgen.gensym(addr) ;
+    if (cache.end() != cache.find(id)) return id ;
+
+    // HACK: fake resolve for recursive type
+    cache.insert(std::make_pair(id, to_type()));
+    // overwrite with true value
+    cache[id] = convert(t);
+    return id ;
   }
 
-  void clear() { cache.clear(); }
+  void clear() { cache.clear() ; idgen.clear(); }
 };
 
 template <typename T>
@@ -137,6 +170,8 @@
 T_STORE(const)
 T_STORE(service)
 #undef T_STORE
+// this id_generator is for gensymm-ing t_program_id
+id_generator program_cache ;
 
 #define THRIFT_ASSIGN_ID_N(t, from_name, to_name)                                                  \
   do {                                                                                             \
@@ -157,7 +192,7 @@
   } while (0)
 
 THRIFT_CONVERSION_N(::t_type, plugin::TypeMetadata) {
-  to.program_id = reinterpret_cast<int64_t>(from->get_program());
+  to.program_id = program_cache.gensym(reinterpret_cast<int64_t>(from->get_program()));
   THRIFT_ASSIGN_N(annotations_, annotations, );
   if (from->has_doc()) {
     to.__set_doc(from->get_doc());
@@ -341,6 +376,7 @@
   type_cache.clear();
   const_cache.clear();
   service_cache.clear();
+  program_cache.clear() ;
 }
 
 THRIFT_CONVERSION(t_program) {
@@ -360,7 +396,7 @@
   THRIFT_ASSIGN_LIST_ID(t_const, const);
   THRIFT_ASSIGN_LIST_ID(t_service, service);
   THRIFT_ASSIGN_LIST_N(t_program, get_includes(), includes);
-  to.program_id = reinterpret_cast<plugin::t_program_id>(from);
+  to.program_id = program_cache.gensym(reinterpret_cast<plugin::t_program_id>(from));
 }
 
 PluginDelegateResult delegateToPlugin(t_program* program, const std::string& options) {
@@ -410,3 +446,4 @@
   return PLUGIN_NOT_FOUND;
 }
 }
+
diff --git a/compiler/cpp/src/thrift/plugin/type_util.h b/compiler/cpp/src/thrift/plugin/type_util.h
index 508b741..996b5c6 100644
--- a/compiler/cpp/src/thrift/plugin/type_util.h
+++ b/compiler/cpp/src/thrift/plugin/type_util.h
@@ -38,6 +38,7 @@
 
 class TypeRegistry;
 void set_global_cache(const TypeRegistry&);
+void clear_global_cache();
 }
 }
 }
diff --git a/compiler/cpp/test/Makefile.am b/compiler/cpp/test/Makefile.am
index 5a23202..b7fc91d 100644
--- a/compiler/cpp/test/Makefile.am
+++ b/compiler/cpp/test/Makefile.am
@@ -32,6 +32,8 @@
 
 noinst_PROGRAMS = thrift-gen-mycpp
 
+all-local: thrift-gen-bincat
+
 AM_CPPFLAGS += -I$(top_srcdir)/lib/cpp/src -I$(top_builddir)/lib/cpp/src
 
 plugintest_SOURCES = plugin/conversion_test.cc
@@ -43,9 +45,16 @@
 thrift_gen_mycpp_LDADD = $(top_builddir)/compiler/cpp/libthriftc.la
 
 cpp_plugin_test.sh: thrift-gen-mycpp
-TESTS = $(check_PROGRAMS) cpp_plugin_test.sh
+
+thrift-gen-bincat:
+	cp bincat.sh $@
+	chmod 755 $@
+
+plugin_stability_test.sh: thrift-gen-bincat
+
+TESTS = $(check_PROGRAMS) cpp_plugin_test.sh plugin_stability_test.sh
 
 clean-local:
-	$(RM) -rf gen-cpp gen-mycpp
+	$(RM) -rf gen-cpp gen-mycpp gen-bincat thrift-gen-bincat
 
 endif
diff --git a/compiler/cpp/test/bincat.sh b/compiler/cpp/test/bincat.sh
new file mode 100755
index 0000000..c7f9078
--- /dev/null
+++ b/compiler/cpp/test/bincat.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+exec /bin/cat
diff --git a/compiler/cpp/test/plugin/conversion_test.cc b/compiler/cpp/test/plugin/conversion_test.cc
index 5159ba4..e73aa16 100644
--- a/compiler/cpp/test/plugin/conversion_test.cc
+++ b/compiler/cpp/test/plugin/conversion_test.cc
@@ -234,6 +234,8 @@
 template <typename T>
 T* round_trip(T* t) {
   typename plugin::ToType<T>::type p;
+  plugin::clear_global_cache();
+  plugin_output::clear_global_cache();
   plugin_output::convert(t, p);
   migrate_global_cache();
   return plugin::convert(p);
diff --git a/compiler/cpp/test/plugin_stability_test.sh b/compiler/cpp/test/plugin_stability_test.sh
new file mode 100755
index 0000000..eb7c93d
--- /dev/null
+++ b/compiler/cpp/test/plugin_stability_test.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# this file is intended to be invoked by make.
+#
+# This file runs the compiler twice, using a plugin that just invokes
+# /bin/cat, and compares the output.  If GeneratorInput is
+# nondeterminsitic, you'd expect the output to differ from run-to-run.
+# So this tests that in fact, the output is stable from run-to-run.
+set -e
+mkdir -p gen-bincat
+PATH=.:"$PATH" ../thrift -r -gen bincat ../../../test/Include.thrift > gen-bincat/1.ser
+PATH=.:"$PATH" ../thrift -r -gen bincat ../../../test/Include.thrift > gen-bincat/2.ser
+diff --binary gen-bincat/1.ser gen-bincat/2.ser