THRIFT-2200: nested structs cause generate_fingerprint() to slow down at excessive CPU load

Patch: Jens Geyer
diff --git a/compiler/cpp/src/generate/t_cpp_generator.cc b/compiler/cpp/src/generate/t_cpp_generator.cc
index c7cdd26..1faa2c3 100644
--- a/compiler/cpp/src/generate/t_cpp_generator.cc
+++ b/compiler/cpp/src/generate/t_cpp_generator.cc
@@ -1079,6 +1079,9 @@
     comment = "; // ";
   }
 
+  if (! tstruct->has_fingerprint()) {
+    tstruct->generate_fingerprint();  // lazy fingerprint generation
+  }
   if (tstruct->has_fingerprint()) {
     out <<
       indent() << stat << "const char* " << nspace
@@ -1105,8 +1108,9 @@
     return;
   }
   ttype = get_true_type(ttype);
-  assert(ttype->has_fingerprint());
   string key = ttype->get_ascii_fingerprint() + (is_definition ? "-defn" : "-decl");
+  assert(ttype->has_fingerprint());  // test AFTER get due to lazy fingerprint generation
+
   // Note that we have generated this fingerprint.  If we already did, bail out.
   if (!reflected_fingerprints_.insert(key).second) {
     return;
diff --git a/compiler/cpp/src/main.cc b/compiler/cpp/src/main.cc
index 54c23f5..b9f7513 100755
--- a/compiler/cpp/src/main.cc
+++ b/compiler/cpp/src/main.cc
@@ -958,8 +958,8 @@
   try {
     pverbose("Program: %s\n", program->get_path().c_str());
 
-    // Compute fingerprints.
-    generate_all_fingerprints(program);
+    // Compute fingerprints. - not anymore, we do it on the fly now
+    //generate_all_fingerprints(program);
 
     if (dump_docs) {
       dump_docstrings(program);
diff --git a/compiler/cpp/src/parse/parse.cc b/compiler/cpp/src/parse/parse.cc
index a655652..4b42f66 100644
--- a/compiler/cpp/src/parse/parse.cc
+++ b/compiler/cpp/src/parse/parse.cc
@@ -2,13 +2,17 @@
 #include "t_typedef.h"
 
 #include "md5.h"
+#include "main.h"
 
 void t_type::generate_fingerprint() {
-  std::string material = get_fingerprint_material();
-  md5_state_t ctx;
-  md5_init(&ctx);
-  md5_append(&ctx, (md5_byte_t*)(material.data()), (int)material.size());
-  md5_finish(&ctx, (md5_byte_t*)fingerprint_);
+  if (! has_fingerprint()) {
+    pdebug("generating fingerprint for %s", get_name().c_str());
+    std::string material = get_fingerprint_material();
+    md5_state_t ctx;
+    md5_init(&ctx);
+    md5_append(&ctx, (md5_byte_t*)(material.data()), (int)material.size());
+    md5_finish(&ctx, (md5_byte_t*)fingerprint_);
+  }
 }
 
 t_type* t_type::get_true_type() {
diff --git a/compiler/cpp/src/parse/t_struct.h b/compiler/cpp/src/parse/t_struct.h
index 1d03542..621f42e 100644
--- a/compiler/cpp/src/parse/t_struct.h
+++ b/compiler/cpp/src/parse/t_struct.h
@@ -147,10 +147,18 @@
 
   virtual std::string get_fingerprint_material() const {
     std::string rv = "{";
+    bool do_reserve = (members_in_id_order_.size() > 1);
+    size_t estimation = 0;
     members_type::const_iterator m_iter;
     for (m_iter = members_in_id_order_.begin(); m_iter != members_in_id_order_.end(); ++m_iter) {
       rv += (*m_iter)->get_fingerprint_material();
       rv += ";";
+      
+      if( do_reserve) {
+        estimation = members_in_id_order_.size() * rv.size() + 16;
+        rv.reserve( estimation);
+        do_reserve = false;
+      }
     }
     rv += "}";
     return rv;
diff --git a/compiler/cpp/src/parse/t_type.h b/compiler/cpp/src/parse/t_type.h
index f024279..74686b0 100644
--- a/compiler/cpp/src/parse/t_type.h
+++ b/compiler/cpp/src/parse/t_type.h
@@ -91,11 +91,13 @@
     return false;
   }
 
-  const uint8_t* get_binary_fingerprint() const {
+  const uint8_t* get_binary_fingerprint()  {
+    if(! has_fingerprint())  // lazy fingerprint generation, right now only used with the c++ generator
+      generate_fingerprint();
     return fingerprint_;
   }
 
-  std::string get_ascii_fingerprint() const {
+  std::string get_ascii_fingerprint() {
     std::string rv;
     const uint8_t* fp = get_binary_fingerprint();
     for (int i = 0; i < fingerprint_len; i++) {