THRIFT-2409 UTF-8 sent by PHP as JSON is not understood by TJsonProtocol
Client: Java
Patch: Phongphan Phuttha <phongphan@acm.org>

This closes #667
diff --git a/lib/java/src/org/apache/thrift/protocol/TJSONProtocol.java b/lib/java/src/org/apache/thrift/protocol/TJSONProtocol.java
index 9876e13..12341ab 100644
--- a/lib/java/src/org/apache/thrift/protocol/TJSONProtocol.java
+++ b/lib/java/src/org/apache/thrift/protocol/TJSONProtocol.java
@@ -19,8 +19,10 @@
 
 package org.apache.thrift.protocol;
 
+import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.Stack;
 
 import org.apache.thrift.TByteArrayOutputStream;
@@ -640,6 +642,7 @@
   private TByteArrayOutputStream readJSONString(boolean skipContext)
     throws TException {
     TByteArrayOutputStream arr = new TByteArrayOutputStream(DEF_STRING_SIZE);
+    ArrayList<Character> codeunits = new ArrayList<Character>();
     if (!skipContext) {
       context_.read();
     }
@@ -652,10 +655,43 @@
       if (ch == ESCSEQ[0]) {
         ch = reader_.read();
         if (ch == ESCSEQ[1]) {
-          readJSONSyntaxChar(ZERO);
-          readJSONSyntaxChar(ZERO);
-          trans_.readAll(tmpbuf_, 0, 2);
-          ch = (byte)((hexVal((byte)tmpbuf_[0]) << 4) + hexVal(tmpbuf_[1]));
+          trans_.readAll(tmpbuf_, 0, 4);
+          short cu = (short)(
+              ((short)hexVal(tmpbuf_[0]) << 12) +
+              ((short)hexVal(tmpbuf_[1]) << 8) +
+              ((short)hexVal(tmpbuf_[2]) << 4) +
+              (short)hexVal(tmpbuf_[3]));
+          try {
+            if (Character.isHighSurrogate((char)cu)) {
+              if (codeunits.size() > 0) {
+                throw new TProtocolException(TProtocolException.INVALID_DATA,
+                    "Expected low surrogate char");
+              }
+              codeunits.add((char)cu);
+            }
+            else if (Character.isLowSurrogate((char)cu)) {
+              if (codeunits.size() == 0) {
+                throw new TProtocolException(TProtocolException.INVALID_DATA,
+                    "Expected high surrogate char");
+              }
+
+              codeunits.add((char)cu);
+              arr.write((new String(new int[] { codeunits.get(0), codeunits.get(1) }, 0, 2)).getBytes("UTF-8"));
+              codeunits.clear();
+            }
+            else {
+              arr.write((new String(new int[] { cu }, 0, 1)).getBytes("UTF-8"));
+            }
+            continue;
+          }
+          catch (UnsupportedEncodingException ex) {
+            throw new TProtocolException(TProtocolException.NOT_IMPLEMENTED,
+                "JVM does not support UTF-8");
+          }
+          catch (IOException ex) {
+            throw new TProtocolException(TProtocolException.INVALID_DATA,
+                "Invalid unicode sequence");
+          }
         }
         else {
           int off = ESCAPE_CHARS.indexOf(ch);
diff --git a/lib/java/test/org/apache/thrift/protocol/TestTJSONProtocol.java b/lib/java/test/org/apache/thrift/protocol/TestTJSONProtocol.java
index d7376ac..1320749 100644
--- a/lib/java/test/org/apache/thrift/protocol/TestTJSONProtocol.java
+++ b/lib/java/test/org/apache/thrift/protocol/TestTJSONProtocol.java
@@ -18,6 +18,12 @@
  */
 package org.apache.thrift.protocol;
 
+import java.io.IOException;
+
+import org.apache.thrift.TException;
+import org.apache.thrift.protocol.TJSONProtocol;
+import org.apache.thrift.transport.TMemoryBuffer;
+
 public class TestTJSONProtocol extends ProtocolTestBase {
   @Override
   protected TProtocolFactory getFactory() {
@@ -28,4 +34,15 @@
   protected boolean canBeUsedNaked() {
     return false;
   }
+
+  public void testEscapedUnicode() throws TException, IOException {
+    String jsonString = "\"hello unicode \\u0e01\\ud834\\udd1e world\"";
+    String expectedString = "hello unicode \u0e01\ud834\udd1e world";
+
+    TMemoryBuffer buffer = new TMemoryBuffer(1000);
+    TJSONProtocol protocol = new TJSONProtocol(buffer);
+    buffer.write(jsonString.getBytes("UTF-8"));
+
+    assertEquals(expectedString, protocol.readString());
+  }
 }