THRIFT-2345 Delphi: UTF-8 sent by PHP as JSON is not understood by TJsonProtocol
Patch: Jens Geyer
diff --git a/lib/delphi/src/Thrift.Protocol.JSON.pas b/lib/delphi/src/Thrift.Protocol.JSON.pas
index cce6c3c..6d305d8 100644
--- a/lib/delphi/src/Thrift.Protocol.JSON.pas
+++ b/lib/delphi/src/Thrift.Protocol.JSON.pas
@@ -254,7 +254,6 @@
RBRACKET : TBytes;
QUOTE : TBytes;
BACKSLASH : TBytes;
- ZERO : TBytes;
ESCSEQ : TBytes;
const
@@ -815,7 +814,8 @@
function TJSONProtocolImpl.ReadJSONString( skipContext : Boolean) : TBytes;
var buffer : TMemoryStream;
- ch : Byte;
+ ch : Byte;
+ wch : Word;
off : Integer;
tmp : TBytes;
begin
@@ -832,25 +832,34 @@
if (ch = QUOTE[0])
then Break;
- if (ch = ESCSEQ[0])
- then begin
- ch := FReader.Read;
- if (ch = ESCSEQ[1])
- then begin
- ReadJSONSyntaxChar( ZERO[0]);
- ReadJSONSyntaxChar( ZERO[0]);
- SetLength( tmp, 2);
- Transport.ReadAll( tmp, 0, 2);
- ch := (HexVal(tmp[0]) shl 4) + HexVal(tmp[1]);
- end
- else begin
- off := Pos( Char(ch), ESCAPE_CHARS);
- if off < 1
- then raise TProtocolException.Create( TProtocolException.INVALID_DATA, 'Expected control char');
- ch := Byte( ESCAPE_CHAR_VALS[off]);
- end;
+ // check for escapes
+ if (ch <> ESCSEQ[0]) then begin
+ buffer.Write( ch, 1);
+ Continue;
end;
- buffer.Write( ch, 1);
+
+ // distuinguish between \uNNNN and \?
+ ch := FReader.Read;
+ if (ch <> ESCSEQ[1])
+ then begin
+ off := Pos( Char(ch), ESCAPE_CHARS);
+ if off < 1
+ then raise TProtocolException.Create( TProtocolException.INVALID_DATA, 'Expected control char');
+ ch := Byte( ESCAPE_CHAR_VALS[off]);
+ buffer.Write( ch, 1);
+ Continue;
+ end;
+
+ // it is \uXXXX
+ SetLength( tmp, 4);
+ Transport.ReadAll( tmp, 0, 4);
+ wch := (HexVal(tmp[0]) shl 12)
+ + (HexVal(tmp[1]) shl 8)
+ + (HexVal(tmp[2]) shl 4)
+ + HexVal(tmp[3]);
+ // we need to make UTF8 bytes from it, to be decoded later
+ tmp := SysUtils.TEncoding.UTF8.GetBytes(Char(wch));
+ buffer.Write( tmp[0], length(tmp));
end;
SetLength( result, buffer.Size);
@@ -1174,6 +1183,5 @@
InitBytes( RBRACKET, [Byte(']')]);
InitBytes( QUOTE, [Byte('"')]);
InitBytes( BACKSLASH, [Byte('\')]);
- InitBytes( ZERO, [Byte('0')]);
InitBytes( ESCSEQ, [Byte('\'),Byte('u'),Byte('0'),Byte('0')]);
end.
diff --git a/lib/delphi/test/TestClient.pas b/lib/delphi/test/TestClient.pas
index 0f09489..9fb0b7a 100644
--- a/lib/delphi/test/TestClient.pas
+++ b/lib/delphi/test/TestClient.pas
@@ -1,4 +1,4 @@
-(*
+(*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -898,6 +898,9 @@
TEST_DOUBLE = -1.234e-56;
DELTA_DOUBLE = TEST_DOUBLE * 1e-14;
TEST_STRING = 'abc-'#$00E4#$00f6#$00fc; // german umlauts (en-us: "funny chars")
+ // Test THRIFT-2336 with 'Русское Название';
+ RUSSIAN_TEXT = #$0420#$0443#$0441#$0441#$043a#$043e#$0435' '#$041d#$0430#$0437#$0432#$0430#$043d#$0438#$0435;
+ RUSSIAN_JSON = '"\u0420\u0443\u0441\u0441\u043a\u043e\u0435 \u041d\u0430\u0437\u0432\u0430\u043d\u0438\u0435"';
// test both possible solidus encodings
SOLIDUS_JSON_DATA = '"one/two\/three"';
SOLIDUS_EXCPECTED = 'one/two/three';
@@ -965,6 +968,7 @@
Expect( stm.Position = stm.Size, 'Stream position after read');
+
// Solidus can be encoded in two ways. Make sure we can read both
stm.Position := 0;
stm.Size := 0;
@@ -976,6 +980,32 @@
Expect( prot.ReadString = SOLIDUS_EXCPECTED, 'Solidus encoding');
+ // Widechars should work too. Do they?
+ // After writing, we ensure that we are able to read it back
+ // We can't assume hex-encoding, since (nearly) any Unicode char is valid JSON
+ stm.Position := 0;
+ stm.Size := 0;
+ prot := TJSONProtocolImpl.Create(
+ TStreamTransportImpl.Create(
+ nil, TThriftStreamAdapterDelphi.Create( stm, FALSE)));
+ prot.WriteString( RUSSIAN_TEXT);
+ stm.Position := 0;
+ prot := TJSONProtocolImpl.Create(
+ TStreamTransportImpl.Create(
+ TThriftStreamAdapterDelphi.Create( stm, FALSE), nil));
+ Expect( prot.ReadString = RUSSIAN_TEXT, 'Writing JSON with chars > 8 bit');
+
+ // Widechars should work with hex-encoding too. Do they?
+ stm.Position := 0;
+ stm.Size := 0;
+ stm.WriteString( RUSSIAN_JSON);
+ stm.Position := 0;
+ prot := TJSONProtocolImpl.Create(
+ TStreamTransportImpl.Create(
+ TThriftStreamAdapterDelphi.Create( stm, FALSE), nil));
+ Expect( prot.ReadString = RUSSIAN_TEXT, 'Reading JSON with chars > 8 bit');
+
+
finally
stm.Free;
prot := nil; //-> Release
@@ -1068,10 +1098,10 @@
begin
// perform all tests
try
+ JSONProtocolReadWriteTest;
for i := 0 to FNumIteration - 1 do
begin
ClientTest;
- JSONProtocolReadWriteTest;
end;
except
on e:Exception do Expect( FALSE, 'unexpected exception: "'+e.message+'"');