Thrift: Generate structural fingerprints for thrift structs.

Summary:
We are going to write a dense protocol soon that eliminates some metadata.
To prevent version conflicts, we want each structure to have a
structural fingerprint that will change whenever the struct changes
in a way that will affect the dense protocol.
This change computes those fingerprints and puts them in
the generated C++ code.

Reviewed By: aditya, mcslee

Test Plan:
Recompiled thrift.
Thrifted DebugProtoTest with old and new compilers.
Compared output.
Also ran thrift with those "cout"s uncommented,
examined the fingerprint material,
and verified the hashes.

Revert Plan: ok


git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@665227 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/cpp/src/parse/t_base_type.h b/compiler/cpp/src/parse/t_base_type.h
index c996cf5..c08c449 100644
--- a/compiler/cpp/src/parse/t_base_type.h
+++ b/compiler/cpp/src/parse/t_base_type.h
@@ -7,6 +7,7 @@
 #ifndef T_BASE_TYPE_H
 #define T_BASE_TYPE_H
 
+#include <cstdlib>
 #include "t_type.h"
 
 /**
@@ -84,7 +85,22 @@
   bool is_base_type() const {
     return true;
   }
-    
+
+  virtual std::string get_fingerprint_material() const {
+    switch (base_) {
+      case TYPE_VOID   : return   "void"; break;
+      case TYPE_STRING : return "string"; break;
+      case TYPE_BOOL   : return   "bool"; break;
+      case TYPE_BYTE   : return   "byte"; break;
+      case TYPE_I16    : return    "i16"; break;
+      case TYPE_I32    : return    "i32"; break;
+      case TYPE_I64    : return    "164"; break;
+      case TYPE_DOUBLE : return "double"; break;
+      default:
+        throw "BUG: Can't get fingerprint material for this base type.";
+    }
+  }
+
  private:
   t_base base_;
 
diff --git a/compiler/cpp/src/parse/t_enum.h b/compiler/cpp/src/parse/t_enum.h
index bc46017..766c100 100644
--- a/compiler/cpp/src/parse/t_enum.h
+++ b/compiler/cpp/src/parse/t_enum.h
@@ -36,6 +36,10 @@
     return true;
   }
 
+  virtual std::string get_fingerprint_material() const {
+    return "enum";
+  }
+
  private:
   std::vector<t_enum_value*> constants_;
 };
diff --git a/compiler/cpp/src/parse/t_field.h b/compiler/cpp/src/parse/t_field.h
index a4a2cc7..25c50a5 100644
--- a/compiler/cpp/src/parse/t_field.h
+++ b/compiler/cpp/src/parse/t_field.h
@@ -8,6 +8,7 @@
 #define T_FIELD_H
 
 #include <string>
+#include <boost/lexical_cast.hpp>
 
 // Forward declare for xsd_attrs
 class t_struct;
@@ -112,6 +113,14 @@
     has_doc_ = true;                                          
   }                                                           
 
+  // This is not the same function as t_type::get_fingerprint_material,
+  // but it does the same thing.
+  std::string get_fingerprint_material() const {
+    return boost::lexical_cast<std::string>(key_) + ":" +
+      (req_ == OPTIONAL ? "opt-" : "") +
+      type_->get_fingerprint_material();
+  }
+
  private:
   t_type* type_;
   std::string name_;
diff --git a/compiler/cpp/src/parse/t_list.h b/compiler/cpp/src/parse/t_list.h
index 71978ef..32dc90b 100644
--- a/compiler/cpp/src/parse/t_list.h
+++ b/compiler/cpp/src/parse/t_list.h
@@ -27,6 +27,10 @@
     return true;
   }
 
+  virtual std::string get_fingerprint_material() const {
+    return "list<" + elem_type_->get_fingerprint_material() + ">";
+  }
+
  private:
   t_type* elem_type_;
 };
diff --git a/compiler/cpp/src/parse/t_map.h b/compiler/cpp/src/parse/t_map.h
index 02c671e..8876858 100644
--- a/compiler/cpp/src/parse/t_map.h
+++ b/compiler/cpp/src/parse/t_map.h
@@ -33,6 +33,11 @@
     return true;
   }
 
+  virtual std::string get_fingerprint_material() const {
+    return "map<" + key_type_->get_fingerprint_material() +
+      "," + val_type_->get_fingerprint_material() + ">";
+  }
+
  private:
   t_type* key_type_;
   t_type* val_type_;
diff --git a/compiler/cpp/src/parse/t_service.h b/compiler/cpp/src/parse/t_service.h
index c10ee11..f5665b6 100644
--- a/compiler/cpp/src/parse/t_service.h
+++ b/compiler/cpp/src/parse/t_service.h
@@ -43,6 +43,11 @@
     return extends_;
   }
 
+  virtual std::string get_fingerprint_material() const {
+    // Services should never be used in fingerprints.
+    throw "BUG: Can't get fingerprint material for service.";
+  }
+
  private:
   std::vector<t_function*> functions_;
   t_service* extends_;
diff --git a/compiler/cpp/src/parse/t_set.h b/compiler/cpp/src/parse/t_set.h
index 6e505ad..68a0029 100644
--- a/compiler/cpp/src/parse/t_set.h
+++ b/compiler/cpp/src/parse/t_set.h
@@ -27,6 +27,10 @@
     return true;
   }
 
+  virtual std::string get_fingerprint_material() const {
+    return "set<" + elem_type_->get_fingerprint_material() + ">";
+  }
+
  private:
   t_type* elem_type_;
 };
diff --git a/compiler/cpp/src/parse/t_struct.h b/compiler/cpp/src/parse/t_struct.h
index 256e82c..ad12677 100644
--- a/compiler/cpp/src/parse/t_struct.h
+++ b/compiler/cpp/src/parse/t_struct.h
@@ -9,10 +9,14 @@
 
 #include <vector>
 #include <string>
+#include <cstring>
 
 #include "t_type.h"
 #include "t_field.h"
 
+// What's worse?  This, or making a src/parse/non_inlined.cc?
+#include "md5.h"
+
 // Forward declare that puppy
 class t_program;
 
@@ -27,12 +31,18 @@
   t_struct(t_program* program) :
     t_type(program),
     is_xception_(false),
-    xsd_all_(false) {}
+    xsd_all_(false)
+  {
+    memset(fingerprint_, 0, sizeof(fingerprint_));
+  }
 
   t_struct(t_program* program, const std::string& name) :
     t_type(program, name),
     is_xception_(false),
-    xsd_all_(false) {}
+    xsd_all_(false)
+  {
+    memset(fingerprint_, 0, sizeof(fingerprint_));
+  }
 
   void set_name(const std::string& name) {
     name_ = name;
@@ -66,12 +76,80 @@
     return is_xception_;
   }
 
+  virtual std::string get_fingerprint_material() const {
+    std::string rv = "{";
+    std::vector<t_field*>::const_iterator m_iter;
+    for (m_iter = members_.begin(); m_iter != members_.end(); ++m_iter) {
+      rv += (**m_iter).get_fingerprint_material();
+      rv += ";";
+    }
+    rv += "}";
+    return rv;
+  }
+
+  // Fingerprint should change whenever (and only when)
+  // the encoding via TDenseProtocol changes.
+  static const int fingerprint_len = 16;
+
+  // Call this before trying get_*_fingerprint().
+  void generate_fingerprint() {
+    std::string material = get_fingerprint_material();
+    MD5_CTX ctx;
+    MD5Init(&ctx);
+    MD5Update(&ctx, (unsigned char*)(material.data()), material.size());
+    MD5Final(fingerprint_, &ctx);
+    //std::cout << get_name() << std::endl;
+    //std::cout << material << std::endl;
+    //std::cout << get_ascii_fingerprint() << std::endl << std::endl;
+  }
+
+  bool has_fingerprint() const {
+    for (int i = 0; i < fingerprint_len; i++) {
+      if (fingerprint_[i] != 0) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  const uint8_t* get_binary_fingerprint() const {
+    return fingerprint_;
+  }
+
+  std::string get_ascii_fingerprint() const {
+    std::string rv;
+    const uint8_t* fp = get_binary_fingerprint();
+    for (int i = 0; i < fingerprint_len; i++) {
+      rv += byte_to_hex(fp[i]);
+    }
+    return rv;
+  }
+
+  // This function will break (maybe badly) unless 0 <= num <= 16.
+  static char nybble_to_xdigit(int num) {
+    if (num < 10) {
+      return '0' + num;
+    } else {
+      return 'A' + num - 10;
+    }
+  }
+
+  static std::string byte_to_hex(uint8_t byte) {
+    std::string rv;
+    rv += nybble_to_xdigit(byte >> 4);
+    rv += nybble_to_xdigit(byte & 0x0f);
+    return rv;
+  }
+
+
  private:
+
   std::vector<t_field*> members_;
   bool is_xception_;
 
   bool xsd_all_;
 
+  uint8_t fingerprint_[fingerprint_len];
 };
 
 #endif
diff --git a/compiler/cpp/src/parse/t_type.h b/compiler/cpp/src/parse/t_type.h
index e210a6b..44f7cc8 100644
--- a/compiler/cpp/src/parse/t_type.h
+++ b/compiler/cpp/src/parse/t_type.h
@@ -50,6 +50,11 @@
     return program_;
   }
 
+  // Return a string that uniquely identifies this type
+  // from any other thrift type in the world, as far as
+  // TDenseProtocol is concerned.
+  virtual std::string get_fingerprint_material() const = 0;
+
  protected:
   t_type() {}
 
diff --git a/compiler/cpp/src/parse/t_typedef.h b/compiler/cpp/src/parse/t_typedef.h
index 417b964..341a6ff 100644
--- a/compiler/cpp/src/parse/t_typedef.h
+++ b/compiler/cpp/src/parse/t_typedef.h
@@ -39,6 +39,10 @@
     return true;
   }
 
+  virtual std::string get_fingerprint_material() const {
+    return type_->get_fingerprint_material();
+  }
+
  private:
   t_type* type_;
   std::string symbolic_;