THRIFT-395. python: Add option to treat strings as UTF-8 unicode
Add the "utf8strings" option to the Python generator.  If set, all
Thrift strings (not binary) will be expected to be unicode objects, not
str.  They will be encoded as UTF-8 before serialization and decoded as
UTF-8 after deserialization.
The accelerator module for TBinaryProtocol is not affected.
git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@959516 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/cpp/src/generate/t_py_generator.cc b/compiler/cpp/src/generate/t_py_generator.cc
index 8c54502..c6f84b8 100644
--- a/compiler/cpp/src/generate/t_py_generator.cc
+++ b/compiler/cpp/src/generate/t_py_generator.cc
@@ -52,6 +52,9 @@
     iter = parsed_options.find("twisted");
     gen_twisted_ = (iter != parsed_options.end());
 
+    iter = parsed_options.find("utf8strings");
+    gen_utf8strings_ = (iter != parsed_options.end());
+
     if (gen_twisted_){
       out_dir_base_ = "gen-py.twisted";
     } else {
@@ -206,6 +209,11 @@
   bool gen_twisted_;
 
   /**
+   * True iff strings should be encoded using utf-8.
+   */
+  bool gen_utf8strings_;
+
+  /**
    * File streams
    */
 
@@ -1752,7 +1760,11 @@
           name;
         break;
       case t_base_type::TYPE_STRING:
-        out << "readString();";
+        if (((t_base_type*)type)->is_binary() || !gen_utf8strings_) {
+          out << "readString();";
+        } else {
+          out << "readString().decode('utf-8')";
+        }
         break;
       case t_base_type::TYPE_BOOL:
         out << "readBool();";
@@ -1946,7 +1958,11 @@
           "compiler error: cannot serialize void field in a struct: " + name;
         break;
       case t_base_type::TYPE_STRING:
-        out << "writeString(" << name << ")";
+        if (((t_base_type*)type)->is_binary() || !gen_utf8strings_) {
+          out << "writeString(" << name << ")";
+        } else {
+          out << "writeString(" << name << ".encode('utf-8'))";
+        }
         break;
       case t_base_type::TYPE_BOOL:
         out << "writeBool(" << name << ")";