Add explicit binary type to Thrift
Summary: Identical to string in all languages except Java. Java String is NOT binary-safe, so we need to use raw byte[] in that case. PHP/RUBY/Python strings are all binary safe, and C++ std::string works fine and manages memory for you so it's the safest route. Java just needs this tweak.
Reviewed By: aditya
Test Plan: Use "binary" as a type instead of String.
git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@665099 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/cpp/src/generate/t_java_generator.cc b/compiler/cpp/src/generate/t_java_generator.cc
index cc24610..3736ceb 100644
--- a/compiler/cpp/src/generate/t_java_generator.cc
+++ b/compiler/cpp/src/generate/t_java_generator.cc
@@ -1220,7 +1220,11 @@
           name;
         break;
       case t_base_type::TYPE_STRING:        
-        out << "readString();";
+        if (((t_base_type*)type)->is_binary()) {
+          out << "readBinary();";
+        } else {
+          out << "readString();";
+        }
         break;
       case t_base_type::TYPE_BOOL:
         out << "readBool();";
@@ -1431,7 +1435,11 @@
           "compiler error: cannot serialize void field in a struct: " + name;
         break;
       case t_base_type::TYPE_STRING:
-        out << "writeString(" << name << ");";
+        if (((t_base_type*)type)->is_binary()) {
+          out << "writeBinary(" << name << ");";
+        } else {
+          out << "writeString(" << name << ");";
+        }
         break;
       case t_base_type::TYPE_BOOL:
         out << "writeBool(" << name << ");";
@@ -1602,7 +1610,7 @@
   }
 
   if (ttype->is_base_type()) {
-    return base_type_name(((t_base_type*)ttype)->get_base(), in_container);
+    return base_type_name((t_base_type*)ttype, in_container);
   } else if (ttype->is_enum()) {
     return (in_container ? "Integer" : "int");
   } else if (ttype->is_map()) {
@@ -1642,13 +1650,19 @@
  * @param tbase The base type
  * @param container Is it going in a Java container?
  */
-string t_java_generator::base_type_name(t_base_type::t_base tbase,
+string t_java_generator::base_type_name(t_base_type* type,
                                         bool in_container) {
+  t_base_type::t_base tbase = type->get_base();
+
   switch (tbase) {
   case t_base_type::TYPE_VOID:
     return "void";
   case t_base_type::TYPE_STRING:
-    return "String";
+    if (type->is_binary()) {
+      return "byte[]";
+    } else {
+      return "String";
+    }
   case t_base_type::TYPE_BOOL:
     return "boolean";
   case t_base_type::TYPE_BYTE:
diff --git a/compiler/cpp/src/generate/t_java_generator.h b/compiler/cpp/src/generate/t_java_generator.h
index 7afffdb..077000e 100644
--- a/compiler/cpp/src/generate/t_java_generator.h
+++ b/compiler/cpp/src/generate/t_java_generator.h
@@ -124,7 +124,7 @@
   std::string java_type_imports();
   std::string java_thrift_imports();
   std::string type_name(t_type* ttype, bool in_container=false, bool in_init=false);
-  std::string base_type_name(t_base_type::t_base tbase, bool in_container=false);
+  std::string base_type_name(t_base_type* tbase, bool in_container=false);
   std::string declare_field(t_field* tfield, bool init=false);
   std::string function_signature(t_function* tfunction, std::string prefix="");
   std::string argument_list(t_struct* tstruct);
diff --git a/compiler/cpp/src/globals.h b/compiler/cpp/src/globals.h
index dd1067f..afc41da 100644
--- a/compiler/cpp/src/globals.h
+++ b/compiler/cpp/src/globals.h
@@ -47,6 +47,7 @@
 
 extern t_type* g_type_void;
 extern t_type* g_type_string;
+extern t_type* g_type_binary;
 extern t_type* g_type_slist;
 extern t_type* g_type_bool;
 extern t_type* g_type_byte;
diff --git a/compiler/cpp/src/main.cc b/compiler/cpp/src/main.cc
index eb48cd0..51de7fd 100644
--- a/compiler/cpp/src/main.cc
+++ b/compiler/cpp/src/main.cc
@@ -42,6 +42,7 @@
 
 t_type* g_type_void;
 t_type* g_type_string;
+t_type* g_type_binary;
 t_type* g_type_slist;
 t_type* g_type_bool;
 t_type* g_type_byte;
@@ -644,6 +645,8 @@
   // Initialize global types
   g_type_void   = new t_base_type("void",   t_base_type::TYPE_VOID);
   g_type_string = new t_base_type("string", t_base_type::TYPE_STRING);
+  g_type_binary = new t_base_type("string", t_base_type::TYPE_STRING);
+  ((t_base_type*)g_type_binary)->set_binary(true);
   g_type_slist  = new t_base_type("string", t_base_type::TYPE_STRING);
   ((t_base_type*)g_type_slist)->set_string_list(true);
   g_type_bool   = new t_base_type("bool",   t_base_type::TYPE_BOOL);
diff --git a/compiler/cpp/src/parse/t_base_type.h b/compiler/cpp/src/parse/t_base_type.h
index c9548bb..f3c9d37 100644
--- a/compiler/cpp/src/parse/t_base_type.h
+++ b/compiler/cpp/src/parse/t_base_type.h
@@ -48,7 +48,15 @@
   }
 
   bool is_string_list() const {
-    return base_ == TYPE_STRING && string_list_;
+    return (base_ == TYPE_STRING) && string_list_;
+  }
+
+  void set_binary(bool val) {
+    binary_ = val;
+  }
+  
+  bool is_binary() const {
+    return (base_ == TYPE_STRING) && binary_;
   }
 
   void set_string_enum(bool val) {
@@ -75,6 +83,7 @@
   t_base base_;
 
   bool string_list_;
+  bool binary_;
   bool string_enum_;
   std::vector<std::string> string_enum_vals_;
 };
diff --git a/compiler/cpp/src/thriftl.ll b/compiler/cpp/src/thriftl.ll
index abda14f..ff1b201 100644
--- a/compiler/cpp/src/thriftl.ll
+++ b/compiler/cpp/src/thriftl.ll
@@ -76,6 +76,7 @@
 "i64"           { return tok_i64;           }
 "double"        { return tok_double;        }
 "string"        { return tok_string;        }
+"binary"        { return tok_binary;        }
 "slist"         { return tok_slist;         }
 "senum"         { return tok_senum;         }
 "map"           { return tok_map;           }
diff --git a/compiler/cpp/src/thrifty.yy b/compiler/cpp/src/thrifty.yy
index 019fb9d..c6f38a2 100644
--- a/compiler/cpp/src/thrifty.yy
+++ b/compiler/cpp/src/thrifty.yy
@@ -82,6 +82,7 @@
 %token tok_bool
 %token tok_byte
 %token tok_string
+%token tok_binary
 %token tok_slist
 %token tok_senum
 %token tok_i16
@@ -792,6 +793,11 @@
       pdebug("BaseType -> tok_string");
       $$ = g_type_string;
     }
+| tok_binary
+    {
+      pdebug("BaseType -> tok_binary");
+      $$ = g_type_binary;
+    }
 | tok_slist
     {
       pdebug("BaseType -> tok_slist");
diff --git a/lib/java/src/protocol/TBinaryProtocol.java b/lib/java/src/protocol/TBinaryProtocol.java
index 4ba0941..be02a3e 100644
--- a/lib/java/src/protocol/TBinaryProtocol.java
+++ b/lib/java/src/protocol/TBinaryProtocol.java
@@ -126,6 +126,11 @@
     trans_.write(dat, 0, dat.length);
   }
 
+  public void writeBinary(byte[] bin) throws TException {
+    writeI32(bin.length);
+    trans_.write(bin, 0, bin.length);
+  }
+
   /**
    * Reading methods.
    */
@@ -238,4 +243,12 @@
     trans_.readAll(buf, 0, size);
     return new String(buf);
   }
+
+  public byte[] readBinary() throws TException {
+    int size = readI32();
+    byte[] buf = new byte[size];
+    trans_.readAll(buf, 0, size);
+    return buf;
+  }
+
 }
diff --git a/lib/java/src/protocol/TProtocol.java b/lib/java/src/protocol/TProtocol.java
index 23829cf..08a7ef3 100644
--- a/lib/java/src/protocol/TProtocol.java
+++ b/lib/java/src/protocol/TProtocol.java
@@ -84,6 +84,8 @@
 
   public abstract void writeString(String str) throws TException;
 
+  public abstract void writeBinary(byte[] bin) throws TException;
+
   /**
    * Reading methods.
    */
@@ -126,4 +128,6 @@
 
   public abstract String readString() throws TException;
 
+  public abstract byte[] readBinary() throws TException;
+
 }