THRIFT-3396 DART: UTF-8 sent by PHP as JSON is not understood by TJsonProtocol
Client: Dart
Patch: Phongphan Phuttha <phongphan@acm.org>
This closes #658
diff --git a/lib/dart/lib/src/protocol/t_json_protocol.dart b/lib/dart/lib/src/protocol/t_json_protocol.dart
index 4fa6499..ca91c8b 100644
--- a/lib/dart/lib/src/protocol/t_json_protocol.dart
+++ b/lib/dart/lib/src/protocol/t_json_protocol.dart
@@ -41,6 +41,7 @@
TJsonProtocol(TTransport transport) : super(transport) {
_rootContext = new _BaseContext(this);
_reader = new _LookaheadReader(this);
+ _resetContext();
}
void _pushContext(_BaseContext c) {
@@ -284,10 +285,15 @@
_writeJsonBase64(bytes);
}
+ bool _isHighSurrogate(int b) => b >= 0xD800 && b <= 0xDBFF;
+
+ bool _isLowSurrogate(int b) => b >= 0xDC00 && b <= 0xDFFF;
+
/// read
Uint8List _readJsonString({bool skipContext: false}) {
List<int> bytes = [];
+ List<int> codeunits = [];
if (!skipContext) {
_context.read();
@@ -308,7 +314,7 @@
byte = _reader.read();
- // distinguish between \u00XX and control chars like \n
+ // distinguish between \uXXXX and control chars like \n
if (byte != _Constants.ESCSEQ_BYTES[1]) {
String char = new String.fromCharCode(byte);
int offset = _Constants.ESCAPE_CHARS.indexOf(char);
@@ -321,12 +327,36 @@
continue;
}
- // it's \u00XX
- _readJsonSyntaxChar(_Constants.HEX_0_BYTES[0]);
- _readJsonSyntaxChar(_Constants.HEX_0_BYTES[0]);
- transport.readAll(_tempBuffer, 0, 2);
- byte = _hexVal(_tempBuffer[0]) << 4 + _hexVal(_tempBuffer[1]);
- bytes.add(byte);
+ // it's \uXXXX
+ transport.readAll(_tempBuffer, 0, 4);
+ byte = (_hexVal(_tempBuffer[0]) << 12)
+ + (_hexVal(_tempBuffer[1]) << 8)
+ + (_hexVal(_tempBuffer[2]) << 4)
+ + _hexVal(_tempBuffer[3]);
+ if (_isHighSurrogate(byte)) {
+ if (codeunits.isNotEmpty) {
+ throw new TProtocolError(
+ TProtocolErrorType.INVALID_DATA, "Expected low surrogate");
+ }
+ codeunits.add(byte);
+ }
+ else if (_isLowSurrogate(byte)) {
+ if (codeunits.isEmpty) {
+ throw new TProtocolError(
+ TProtocolErrorType.INVALID_DATA, "Expected high surrogate");
+ }
+ codeunits.add(byte);
+ bytes.addAll(utf8Codec.encode(new String.fromCharCodes(codeunits)));
+ codeunits.clear();
+ }
+ else {
+ bytes.addAll(utf8Codec.encode(new String.fromCharCode(byte)));
+ }
+ }
+
+ if (codeunits.isNotEmpty) {
+ throw new TProtocolError(
+ TProtocolErrorType.INVALID_DATA, "Expected low surrogate");
}
return new Uint8List.fromList(bytes);
diff --git a/lib/dart/test/protocol/t_protocol_test.dart b/lib/dart/test/protocol/t_protocol_test.dart
index 88ddd4f..7362884 100644
--- a/lib/dart/test/protocol/t_protocol_test.dart
+++ b/lib/dart/test/protocol/t_protocol_test.dart
@@ -18,6 +18,7 @@
library thrift.test.transport.t_json_protocol_test;
import 'dart:async';
+import 'dart:convert' show UTF8;
import 'dart:typed_data' show Uint8List;
import 'package:test/test.dart';
@@ -352,6 +353,28 @@
protocol.writeMessageBegin(message);
});
+
+ test('Test escaped unicode', () async {
+ /*
+ KOR_KAI
+ UTF-8: 0xE0 0xB8 0x81
+ UTF-16: 0x0E01
+ G clef:
+ UTF-8: 0xF0 0x9D 0x84 0x9E
+ UTF-16: 0xD834 0xDD1E
+ */
+ var buffer = UTF8.encode(r'"\u0001\u0e01 \ud834\udd1e"');
+ var transport = new TBufferedTransport();
+ transport.writeAll(buffer);
+
+ var protocol = new TJsonProtocol(transport);
+
+ await protocol.transport.flush();
+
+ var subject = protocol.readString();
+ expect(subject, UTF8.decode([0x01, 0xE0, 0xB8, 0x81, 0x20, 0xF0, 0x9D, 0x84, 0x9E]));
+ });
+
group('shared tests', sharedTests);
});