THRIFT-3396 DART: UTF-8 sent by PHP as JSON is not understood by TJsonProtocol
Client: Dart
Patch: Phongphan Phuttha <phongphan@acm.org>

This closes #658
diff --git a/lib/dart/lib/src/protocol/t_json_protocol.dart b/lib/dart/lib/src/protocol/t_json_protocol.dart
index 4fa6499..ca91c8b 100644
--- a/lib/dart/lib/src/protocol/t_json_protocol.dart
+++ b/lib/dart/lib/src/protocol/t_json_protocol.dart
@@ -41,6 +41,7 @@
   TJsonProtocol(TTransport transport) : super(transport) {
     _rootContext = new _BaseContext(this);
     _reader = new _LookaheadReader(this);
+    _resetContext();
   }
 
   void _pushContext(_BaseContext c) {
@@ -284,10 +285,15 @@
     _writeJsonBase64(bytes);
   }
 
+  bool _isHighSurrogate(int b) => b >= 0xD800 && b <= 0xDBFF;
+
+  bool _isLowSurrogate(int b) => b >= 0xDC00 && b <= 0xDFFF;
+
   /// read
 
   Uint8List _readJsonString({bool skipContext: false}) {
     List<int> bytes = [];
+    List<int> codeunits = [];
 
     if (!skipContext) {
       _context.read();
@@ -308,7 +314,7 @@
 
       byte = _reader.read();
 
-      // distinguish between \u00XX and control chars like \n
+      // distinguish between \uXXXX and control chars like \n
       if (byte != _Constants.ESCSEQ_BYTES[1]) {
         String char = new String.fromCharCode(byte);
         int offset = _Constants.ESCAPE_CHARS.indexOf(char);
@@ -321,12 +327,36 @@
         continue;
       }
 
-      // it's \u00XX
-      _readJsonSyntaxChar(_Constants.HEX_0_BYTES[0]);
-      _readJsonSyntaxChar(_Constants.HEX_0_BYTES[0]);
-      transport.readAll(_tempBuffer, 0, 2);
-      byte = _hexVal(_tempBuffer[0]) << 4 + _hexVal(_tempBuffer[1]);
-      bytes.add(byte);
+      // it's \uXXXX
+      transport.readAll(_tempBuffer, 0, 4);
+      byte = (_hexVal(_tempBuffer[0]) << 12)
+        + (_hexVal(_tempBuffer[1]) << 8)
+        + (_hexVal(_tempBuffer[2]) << 4)
+        + _hexVal(_tempBuffer[3]);
+      if (_isHighSurrogate(byte)) {
+        if (codeunits.isNotEmpty) {
+          throw new TProtocolError(
+              TProtocolErrorType.INVALID_DATA, "Expected low surrogate");
+        }
+        codeunits.add(byte);
+      }
+      else if (_isLowSurrogate(byte)) {
+        if (codeunits.isEmpty) {
+          throw new TProtocolError(
+              TProtocolErrorType.INVALID_DATA, "Expected high surrogate");
+        }
+        codeunits.add(byte);
+        bytes.addAll(utf8Codec.encode(new String.fromCharCodes(codeunits)));
+        codeunits.clear();
+      }
+      else {
+        bytes.addAll(utf8Codec.encode(new String.fromCharCode(byte)));
+      }
+    }
+
+    if (codeunits.isNotEmpty) {
+      throw new TProtocolError(
+          TProtocolErrorType.INVALID_DATA, "Expected low surrogate");
     }
 
     return new Uint8List.fromList(bytes);
diff --git a/lib/dart/test/protocol/t_protocol_test.dart b/lib/dart/test/protocol/t_protocol_test.dart
index 88ddd4f..7362884 100644
--- a/lib/dart/test/protocol/t_protocol_test.dart
+++ b/lib/dart/test/protocol/t_protocol_test.dart
@@ -18,6 +18,7 @@
 library thrift.test.transport.t_json_protocol_test;
 
 import 'dart:async';
+import 'dart:convert' show UTF8;
 import 'dart:typed_data' show Uint8List;
 
 import 'package:test/test.dart';
@@ -352,6 +353,28 @@
       protocol.writeMessageBegin(message);
     });
 
+
+    test('Test escaped unicode', () async {
+      /*
+         KOR_KAI
+           UTF-8:  0xE0 0xB8 0x81
+           UTF-16: 0x0E01
+         G clef:
+           UTF-8:  0xF0 0x9D 0x84 0x9E
+           UTF-16: 0xD834 0xDD1E
+       */
+      var buffer = UTF8.encode(r'"\u0001\u0e01 \ud834\udd1e"');
+      var transport = new TBufferedTransport();
+      transport.writeAll(buffer);
+
+      var protocol = new TJsonProtocol(transport);
+
+      await protocol.transport.flush();
+
+      var subject = protocol.readString();
+      expect(subject, UTF8.decode([0x01, 0xE0, 0xB8, 0x81, 0x20, 0xF0, 0x9D, 0x84, 0x9E]));
+    });
+
     group('shared tests', sharedTests);
   });