THRIFT-3404 Fixed JSON String reader doesn't recognize UTF-16 surrogate pairs.
Client: Delphi
Patch: Phongphan Phuttha <phongphan@acm.org>
This closes #671
diff --git a/lib/delphi/src/Thrift.Protocol.JSON.pas b/lib/delphi/src/Thrift.Protocol.JSON.pas
index f491f53..36c3d72 100644
--- a/lib/delphi/src/Thrift.Protocol.JSON.pas
+++ b/lib/delphi/src/Thrift.Protocol.JSON.pas
@@ -24,6 +24,7 @@
interface
uses
+ Character,
Classes,
SysUtils,
Math,
@@ -821,9 +822,12 @@
var buffer : TMemoryStream;
ch : Byte;
wch : Word;
+ highSurogate: Char;
+ surrogatePairs: Array[0..1] of Char;
off : Integer;
tmp : TBytes;
begin
+ highSurogate := #0;
buffer := TMemoryStream.Create;
try
if not skipContext
@@ -862,11 +866,31 @@
+ (HexVal(tmp[1]) shl 8)
+ (HexVal(tmp[2]) shl 4)
+ HexVal(tmp[3]);
+
// we need to make UTF8 bytes from it, to be decoded later
- tmp := SysUtils.TEncoding.UTF8.GetBytes(Char(wch));
- buffer.Write( tmp[0], length(tmp));
+ if Character.IsHighSurrogate(char(wch)) then begin
+ if highSurogate <> #0
+ then raise TProtocolException.Create( TProtocolException.INVALID_DATA, 'Expected low surrogate char');
+ highSurogate := char(wch);
+ end
+ else if Character.IsLowSurrogate(char(wch)) then begin
+ if highSurogate = #0
+ then TProtocolException.Create( TProtocolException.INVALID_DATA, 'Expected high surrogate char');
+ surrogatePairs[0] := highSurogate;
+ surrogatePairs[1] := char(wch);
+ tmp := TEncoding.UTF8.GetBytes(surrogatePairs);
+ buffer.Write( tmp[0], Length(tmp));
+ highSurogate := #0;
+ end
+ else begin
+ tmp := SysUtils.TEncoding.UTF8.GetBytes(Char(wch));
+ buffer.Write( tmp[0], Length(tmp));
+ end;
end;
+ if highSurogate <> #0
+ then raise TProtocolException.Create( TProtocolException.INVALID_DATA, 'Expected low surrogate char');
+
SetLength( result, buffer.Size);
if buffer.Size > 0 then Move( buffer.Memory^, result[0], Length(result));
diff --git a/lib/delphi/test/TestClient.pas b/lib/delphi/test/TestClient.pas
index 5f375ef..144334b 100644
--- a/lib/delphi/test/TestClient.pas
+++ b/lib/delphi/test/TestClient.pas
@@ -1028,9 +1028,9 @@
TEST_DOUBLE = -1.234e-56;
DELTA_DOUBLE = TEST_DOUBLE * 1e-14;
TEST_STRING = 'abc-'#$00E4#$00f6#$00fc; // german umlauts (en-us: "funny chars")
- // Test THRIFT-2336 with 'Русское Название';
- RUSSIAN_TEXT = #$0420#$0443#$0441#$0441#$043a#$043e#$0435' '#$041d#$0430#$0437#$0432#$0430#$043d#$0438#$0435;
- RUSSIAN_JSON = '"\u0420\u0443\u0441\u0441\u043a\u043e\u0435 \u041d\u0430\u0437\u0432\u0430\u043d\u0438\u0435"';
+ // Test THRIFT-2336 and THRIFT-3404 with U+1D11E (G Clef symbol) and 'Русское Название';
+ G_CLEF_AND_CYRILLIC_TEXT = #$1d11e' '#$0420#$0443#$0441#$0441#$043a#$043e#$0435' '#$041d#$0430#$0437#$0432#$0430#$043d#$0438#$0435;
+ G_CLEF_AND_CYRILLIC_JSON = '"\ud834\udd1e \u0420\u0443\u0441\u0441\u043a\u043e\u0435 \u041d\u0430\u0437\u0432\u0430\u043d\u0438\u0435"';
// test both possible solidus encodings
SOLIDUS_JSON_DATA = '"one/two\/three"';
SOLIDUS_EXCPECTED = 'one/two/three';
@@ -1117,22 +1117,22 @@
prot := TJSONProtocolImpl.Create(
TStreamTransportImpl.Create(
nil, TThriftStreamAdapterDelphi.Create( stm, FALSE)));
- prot.WriteString( RUSSIAN_TEXT);
+ prot.WriteString( G_CLEF_AND_CYRILLIC_TEXT);
stm.Position := 0;
prot := TJSONProtocolImpl.Create(
TStreamTransportImpl.Create(
TThriftStreamAdapterDelphi.Create( stm, FALSE), nil));
- Expect( prot.ReadString = RUSSIAN_TEXT, 'Writing JSON with chars > 8 bit');
+ Expect( prot.ReadString = G_CLEF_AND_CYRILLIC_TEXT, 'Writing JSON with chars > 8 bit');
// Widechars should work with hex-encoding too. Do they?
stm.Position := 0;
stm.Size := 0;
- stm.WriteString( RUSSIAN_JSON);
+ stm.WriteString( G_CLEF_AND_CYRILLIC_JSON);
stm.Position := 0;
prot := TJSONProtocolImpl.Create(
TStreamTransportImpl.Create(
TThriftStreamAdapterDelphi.Create( stm, FALSE), nil));
- Expect( prot.ReadString = RUSSIAN_TEXT, 'Reading JSON with chars > 8 bit');
+ Expect( prot.ReadString = G_CLEF_AND_CYRILLIC_TEXT, 'Reading JSON with chars > 8 bit');
finally