THRIFT-3350 Python JSON protocol does not encode binary as Base64
Client: Python
Patch: Nobuaki Sukegawa

This closes #697
diff --git a/compiler/cpp/src/generate/t_py_generator.cc b/compiler/cpp/src/generate/t_py_generator.cc
index 74904ac..31936b7 100644
--- a/compiler/cpp/src/generate/t_py_generator.cc
+++ b/compiler/cpp/src/generate/t_py_generator.cc
@@ -1944,7 +1944,9 @@
         throw "compiler error: cannot serialize void field in a struct: " + name;
         break;
       case t_base_type::TYPE_STRING:
-        if (((t_base_type*)type)->is_binary() || !gen_utf8strings_) {
+        if (((t_base_type*)type)->is_binary()) {
+          out << "readBinary()";
+        } else if(!gen_utf8strings_) {
           out << "readString()";
         } else {
           out << "readString().decode('utf-8')";
@@ -2117,7 +2119,9 @@
         throw "compiler error: cannot serialize void field in a struct: " + name;
         break;
       case t_base_type::TYPE_STRING:
-        if (((t_base_type*)type)->is_binary() || !gen_utf8strings_) {
+        if (((t_base_type*)type)->is_binary()) {
+          out << "writeBinary(" << name << ")";
+        } else if (!gen_utf8strings_) {
           out << "writeString(" << name << ")";
         } else {
           out << "writeString(" << name << ".encode('utf-8'))";
@@ -2455,8 +2459,10 @@
     ttype = ((t_typedef*)ttype)->get_type();
   }
 
-  if (ttype->is_base_type() || ttype->is_enum()) {
-    return "None";
+  if (ttype->is_base_type() && reinterpret_cast<t_base_type*>(ttype)->is_binary()) {
+    return  "'BINARY'";
+  } else if (ttype->is_base_type() || ttype->is_enum()) {
+    return  "None";
   } else if (ttype->is_struct() || ttype->is_xception()) {
     return "(" + type_name(ttype) + ", " + type_name(ttype) + ".thrift_spec)";
   } else if (ttype->is_map()) {
diff --git a/lib/py/src/protocol/TJSONProtocol.py b/lib/py/src/protocol/TJSONProtocol.py
index aada3fc..7d56545 100644
--- a/lib/py/src/protocol/TJSONProtocol.py
+++ b/lib/py/src/protocol/TJSONProtocol.py
@@ -362,6 +362,12 @@
 
   def readJSONBase64(self):
     string = self.readJSONString(False)
+    size = len(string)
+    m = size % 4
+    # Force padding since b64encode method does not allow it
+    if m != 0:
+      for i in range(4 - m):
+        string += '='
     return base64.b64decode(string)
 
   def readJSONObjectStart(self):
diff --git a/lib/py/src/protocol/TProtocol.py b/lib/py/src/protocol/TProtocol.py
index 22339c0..ca22c48 100644
--- a/lib/py/src/protocol/TProtocol.py
+++ b/lib/py/src/protocol/TProtocol.py
@@ -108,6 +108,9 @@
   def writeBinary(self, str_val):
     pass
 
+  def writeBinary(self, str_val):
+    return self.writeString(str_val)
+
   def readMessageBegin(self):
     pass
 
@@ -168,6 +171,9 @@
   def readBinary(self):
     pass
 
+  def readBinary(self):
+    return self.readString()
+
   def skip(self, ttype):
     if ttype == TType.STOP:
       return
@@ -216,7 +222,7 @@
        (None, None, False),  # 0 TType.STOP
        (None, None, False),  # 1 TType.VOID # TODO: handle void?
        ('readBool', 'writeBool', False),  # 2 TType.BOOL
-       ('readByte',  'writeByte', False),  # 3 TType.BYTE and I08
+       ('readByte', 'writeByte', False),  # 3 TType.BYTE and I08
        ('readDouble', 'writeDouble', False),  # 4 TType.DOUBLE
        (None, None, False),  # 5 undefined
        ('readI16', 'writeI16', False),  # 6 TType.I16
@@ -233,9 +239,17 @@
        (None, None, False)  # 17 TType.UTF16 # TODO: handle utf16 types?
       )
 
+  def _ttype_handlers(self, ttype, spec):
+    if spec == 'BINARY':
+      if ttype != TType.STRING:
+        raise TProtocolException(type=TProtocolException.INVALID_DATA,
+                                 message='Invalid binary field type %d' % ttype)
+      return ('readBinary', 'writeBinary', False)
+    return self._TTYPE_HANDLERS[ttype]
+
   def readFieldByTType(self, ttype, spec):
     try:
-      (r_handler, w_handler, is_container) = self._TTYPE_HANDLERS[ttype]
+      (r_handler, w_handler, is_container) = self._ttype_handlers(ttype, spec)
     except IndexError:
       raise TProtocolException(type=TProtocolException.INVALID_DATA,
                                message='Invalid field type %d' % (ttype))
@@ -250,7 +264,7 @@
   def readContainerList(self, spec):
     results = []
     ttype, tspec = spec[0], spec[1]
-    r_handler = self._TTYPE_HANDLERS[ttype][0]
+    r_handler = self._ttype_handlers(ttype, spec)[0]
     reader = getattr(self, r_handler)
     (list_type, list_len) = self.readListBegin()
     if tspec is None:
@@ -259,7 +273,7 @@
         results.append(reader())
     else:
       # this is like an inlined readFieldByTType
-      container_reader = self._TTYPE_HANDLERS[list_type][0]
+      container_reader = self._ttype_handlers(list_type, tspec)[0]
       val_reader = getattr(self, container_reader)
       for idx in range(list_len):
         val = val_reader(tspec)
@@ -270,7 +284,7 @@
   def readContainerSet(self, spec):
     results = set()
     ttype, tspec = spec[0], spec[1]
-    r_handler = self._TTYPE_HANDLERS[ttype][0]
+    r_handler = self._ttype_handlers(ttype, spec)[0]
     reader = getattr(self, r_handler)
     (set_type, set_len) = self.readSetBegin()
     if tspec is None:
@@ -278,7 +292,7 @@
       for idx in range(set_len):
         results.add(reader())
     else:
-      container_reader = self._TTYPE_HANDLERS[set_type][0]
+      container_reader = self._ttype_handlers(set_type, tspec)[0]
       val_reader = getattr(self, container_reader)
       for idx in range(set_len):
         results.add(val_reader(tspec))
@@ -298,8 +312,8 @@
     (map_ktype, map_vtype, map_len) = self.readMapBegin()
     # TODO: compare types we just decoded with thrift_spec and
     # abort/skip if types disagree
-    key_reader = getattr(self, self._TTYPE_HANDLERS[key_ttype][0])
-    val_reader = getattr(self, self._TTYPE_HANDLERS[val_ttype][0])
+    key_reader = getattr(self, self._ttype_handlers(key_ttype, key_spec)[0])
+    val_reader = getattr(self, self._ttype_handlers(val_ttype, val_spec)[0])
     # list values are simple types
     for idx in range(map_len):
       if key_spec is None:
@@ -342,7 +356,7 @@
 
   def writeContainerList(self, val, spec):
     self.writeListBegin(spec[0], len(val))
-    r_handler, w_handler, is_container = self._TTYPE_HANDLERS[spec[0]]
+    r_handler, w_handler, is_container = self._ttype_handlers(spec[0], spec)
     e_writer = getattr(self, w_handler)
     if not is_container:
       for elem in val:
@@ -354,7 +368,7 @@
 
   def writeContainerSet(self, val, spec):
     self.writeSetBegin(spec[0], len(val))
-    r_handler, w_handler, is_container = self._TTYPE_HANDLERS[spec[0]]
+    r_handler, w_handler, is_container = self._ttype_handlers(spec[0], spec)
     e_writer = getattr(self, w_handler)
     if not is_container:
       for elem in val:
@@ -367,8 +381,8 @@
   def writeContainerMap(self, val, spec):
     k_type = spec[0]
     v_type = spec[2]
-    ignore, ktype_name, k_is_container = self._TTYPE_HANDLERS[k_type]
-    ignore, vtype_name, v_is_container = self._TTYPE_HANDLERS[v_type]
+    ignore, ktype_name, k_is_container = self._ttype_handlers(k_type, spec)
+    ignore, vtype_name, v_is_container = self._ttype_handlers(v_type, spec)
     k_writer = getattr(self, ktype_name)
     v_writer = getattr(self, vtype_name)
     self.writeMapBegin(k_type, v_type, len(val))
@@ -404,7 +418,7 @@
     self.writeStructEnd()
 
   def writeFieldByTType(self, ttype, val, spec):
-    r_handler, w_handler, is_container = self._TTYPE_HANDLERS[ttype]
+    r_handler, w_handler, is_container = self._ttype_handlers(ttype, spec)
     writer = getattr(self, w_handler)
     if is_container:
       writer(val, spec)
diff --git a/test/known_failures_Linux.json b/test/known_failures_Linux.json
index d326c59..b98e6c1 100644
--- a/test/known_failures_Linux.json
+++ b/test/known_failures_Linux.json
@@ -1,8 +1,4 @@
 [
-  "c_glib-py_binary-accel_buffered-ip",
-  "c_glib-py_binary-accel_framed-ip",
-  "c_glib-py_binary_buffered-ip",
-  "c_glib-py_binary_framed-ip",
   "c_glib-rb_binary-accel_buffered-ip",
   "c_glib-rb_binary-accel_framed-ip",
   "c_glib-rb_binary_buffered-ip",
diff --git a/test/py/TestClient.py b/test/py/TestClient.py
index 5b858ef..051890b 100755
--- a/test/py/TestClient.py
+++ b/test/py/TestClient.py
@@ -126,8 +126,6 @@
     self.assertEqual(self.client.testDouble(-0.000341012439638598279), -0.000341012439638598279)
 
   def testBinary(self):
-    if isinstance(self, JSONTest):
-      self.skipTest('JSON protocol does not handle binary correctly.')
     print('testBinary')
     val = bytearray([i for i in range(0, 256)])
     self.assertEqual(bytearray(self.client.testBinary(bytes(val))), val)