Thrift: Generate structural fingerprints for thrift structs.
Summary:
We are going to write a dense protocol soon that eliminates some metadata.
To prevent version conflicts, we want each structure to have a
structural fingerprint that will change whenever the struct changes
in a way that will affect the dense protocol.
This change computes those fingerprints and puts them in
the generated C++ code.
Reviewed By: aditya, mcslee
Test Plan:
Recompiled thrift.
Thrifted DebugProtoTest with old and new compilers.
Compared output.
Also ran thrift with those "cout"s uncommented,
examined the fingerprint material,
and verified the hashes.
Revert Plan: ok
git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@665227 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/cpp/src/parse/t_base_type.h b/compiler/cpp/src/parse/t_base_type.h
index c996cf5..c08c449 100644
--- a/compiler/cpp/src/parse/t_base_type.h
+++ b/compiler/cpp/src/parse/t_base_type.h
@@ -7,6 +7,7 @@
#ifndef T_BASE_TYPE_H
#define T_BASE_TYPE_H
+#include <cstdlib>
#include "t_type.h"
/**
@@ -84,7 +85,22 @@
bool is_base_type() const {
return true;
}
-
+
+ virtual std::string get_fingerprint_material() const {
+ switch (base_) {
+ case TYPE_VOID : return "void"; break;
+ case TYPE_STRING : return "string"; break;
+ case TYPE_BOOL : return "bool"; break;
+ case TYPE_BYTE : return "byte"; break;
+ case TYPE_I16 : return "i16"; break;
+ case TYPE_I32 : return "i32"; break;
+ case TYPE_I64 : return "164"; break;
+ case TYPE_DOUBLE : return "double"; break;
+ default:
+ throw "BUG: Can't get fingerprint material for this base type.";
+ }
+ }
+
private:
t_base base_;
diff --git a/compiler/cpp/src/parse/t_enum.h b/compiler/cpp/src/parse/t_enum.h
index bc46017..766c100 100644
--- a/compiler/cpp/src/parse/t_enum.h
+++ b/compiler/cpp/src/parse/t_enum.h
@@ -36,6 +36,10 @@
return true;
}
+ virtual std::string get_fingerprint_material() const {
+ return "enum";
+ }
+
private:
std::vector<t_enum_value*> constants_;
};
diff --git a/compiler/cpp/src/parse/t_field.h b/compiler/cpp/src/parse/t_field.h
index a4a2cc7..25c50a5 100644
--- a/compiler/cpp/src/parse/t_field.h
+++ b/compiler/cpp/src/parse/t_field.h
@@ -8,6 +8,7 @@
#define T_FIELD_H
#include <string>
+#include <boost/lexical_cast.hpp>
// Forward declare for xsd_attrs
class t_struct;
@@ -112,6 +113,14 @@
has_doc_ = true;
}
+ // This is not the same function as t_type::get_fingerprint_material,
+ // but it does the same thing.
+ std::string get_fingerprint_material() const {
+ return boost::lexical_cast<std::string>(key_) + ":" +
+ (req_ == OPTIONAL ? "opt-" : "") +
+ type_->get_fingerprint_material();
+ }
+
private:
t_type* type_;
std::string name_;
diff --git a/compiler/cpp/src/parse/t_list.h b/compiler/cpp/src/parse/t_list.h
index 71978ef..32dc90b 100644
--- a/compiler/cpp/src/parse/t_list.h
+++ b/compiler/cpp/src/parse/t_list.h
@@ -27,6 +27,10 @@
return true;
}
+ virtual std::string get_fingerprint_material() const {
+ return "list<" + elem_type_->get_fingerprint_material() + ">";
+ }
+
private:
t_type* elem_type_;
};
diff --git a/compiler/cpp/src/parse/t_map.h b/compiler/cpp/src/parse/t_map.h
index 02c671e..8876858 100644
--- a/compiler/cpp/src/parse/t_map.h
+++ b/compiler/cpp/src/parse/t_map.h
@@ -33,6 +33,11 @@
return true;
}
+ virtual std::string get_fingerprint_material() const {
+ return "map<" + key_type_->get_fingerprint_material() +
+ "," + val_type_->get_fingerprint_material() + ">";
+ }
+
private:
t_type* key_type_;
t_type* val_type_;
diff --git a/compiler/cpp/src/parse/t_service.h b/compiler/cpp/src/parse/t_service.h
index c10ee11..f5665b6 100644
--- a/compiler/cpp/src/parse/t_service.h
+++ b/compiler/cpp/src/parse/t_service.h
@@ -43,6 +43,11 @@
return extends_;
}
+ virtual std::string get_fingerprint_material() const {
+ // Services should never be used in fingerprints.
+ throw "BUG: Can't get fingerprint material for service.";
+ }
+
private:
std::vector<t_function*> functions_;
t_service* extends_;
diff --git a/compiler/cpp/src/parse/t_set.h b/compiler/cpp/src/parse/t_set.h
index 6e505ad..68a0029 100644
--- a/compiler/cpp/src/parse/t_set.h
+++ b/compiler/cpp/src/parse/t_set.h
@@ -27,6 +27,10 @@
return true;
}
+ virtual std::string get_fingerprint_material() const {
+ return "set<" + elem_type_->get_fingerprint_material() + ">";
+ }
+
private:
t_type* elem_type_;
};
diff --git a/compiler/cpp/src/parse/t_struct.h b/compiler/cpp/src/parse/t_struct.h
index 256e82c..ad12677 100644
--- a/compiler/cpp/src/parse/t_struct.h
+++ b/compiler/cpp/src/parse/t_struct.h
@@ -9,10 +9,14 @@
#include <vector>
#include <string>
+#include <cstring>
#include "t_type.h"
#include "t_field.h"
+// What's worse? This, or making a src/parse/non_inlined.cc?
+#include "md5.h"
+
// Forward declare that puppy
class t_program;
@@ -27,12 +31,18 @@
t_struct(t_program* program) :
t_type(program),
is_xception_(false),
- xsd_all_(false) {}
+ xsd_all_(false)
+ {
+ memset(fingerprint_, 0, sizeof(fingerprint_));
+ }
t_struct(t_program* program, const std::string& name) :
t_type(program, name),
is_xception_(false),
- xsd_all_(false) {}
+ xsd_all_(false)
+ {
+ memset(fingerprint_, 0, sizeof(fingerprint_));
+ }
void set_name(const std::string& name) {
name_ = name;
@@ -66,12 +76,80 @@
return is_xception_;
}
+ virtual std::string get_fingerprint_material() const {
+ std::string rv = "{";
+ std::vector<t_field*>::const_iterator m_iter;
+ for (m_iter = members_.begin(); m_iter != members_.end(); ++m_iter) {
+ rv += (**m_iter).get_fingerprint_material();
+ rv += ";";
+ }
+ rv += "}";
+ return rv;
+ }
+
+ // Fingerprint should change whenever (and only when)
+ // the encoding via TDenseProtocol changes.
+ static const int fingerprint_len = 16;
+
+ // Call this before trying get_*_fingerprint().
+ void generate_fingerprint() {
+ std::string material = get_fingerprint_material();
+ MD5_CTX ctx;
+ MD5Init(&ctx);
+ MD5Update(&ctx, (unsigned char*)(material.data()), material.size());
+ MD5Final(fingerprint_, &ctx);
+ //std::cout << get_name() << std::endl;
+ //std::cout << material << std::endl;
+ //std::cout << get_ascii_fingerprint() << std::endl << std::endl;
+ }
+
+ bool has_fingerprint() const {
+ for (int i = 0; i < fingerprint_len; i++) {
+ if (fingerprint_[i] != 0) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ const uint8_t* get_binary_fingerprint() const {
+ return fingerprint_;
+ }
+
+ std::string get_ascii_fingerprint() const {
+ std::string rv;
+ const uint8_t* fp = get_binary_fingerprint();
+ for (int i = 0; i < fingerprint_len; i++) {
+ rv += byte_to_hex(fp[i]);
+ }
+ return rv;
+ }
+
+ // This function will break (maybe badly) unless 0 <= num <= 16.
+ static char nybble_to_xdigit(int num) {
+ if (num < 10) {
+ return '0' + num;
+ } else {
+ return 'A' + num - 10;
+ }
+ }
+
+ static std::string byte_to_hex(uint8_t byte) {
+ std::string rv;
+ rv += nybble_to_xdigit(byte >> 4);
+ rv += nybble_to_xdigit(byte & 0x0f);
+ return rv;
+ }
+
+
private:
+
std::vector<t_field*> members_;
bool is_xception_;
bool xsd_all_;
+ uint8_t fingerprint_[fingerprint_len];
};
#endif
diff --git a/compiler/cpp/src/parse/t_type.h b/compiler/cpp/src/parse/t_type.h
index e210a6b..44f7cc8 100644
--- a/compiler/cpp/src/parse/t_type.h
+++ b/compiler/cpp/src/parse/t_type.h
@@ -50,6 +50,11 @@
return program_;
}
+ // Return a string that uniquely identifies this type
+ // from any other thrift type in the world, as far as
+ // TDenseProtocol is concerned.
+ virtual std::string get_fingerprint_material() const = 0;
+
protected:
t_type() {}
diff --git a/compiler/cpp/src/parse/t_typedef.h b/compiler/cpp/src/parse/t_typedef.h
index 417b964..341a6ff 100644
--- a/compiler/cpp/src/parse/t_typedef.h
+++ b/compiler/cpp/src/parse/t_typedef.h
@@ -39,6 +39,10 @@
return true;
}
+ virtual std::string get_fingerprint_material() const {
+ return type_->get_fingerprint_material();
+ }
+
private:
t_type* type_;
std::string symbolic_;