THRIFT-2890 binary data may lose bytes with JSON transport under specific circumstances
Client: Delphi
Patch: Jens Geyer
This closes #319
This patch consists of a ported version of the base64 encoding/decoding used in C#. It handles the above case correctly, decodes data more efficiently in-place, and removes the dependency to Indy (IdCoderMIME).
diff --git a/lib/delphi/src/Thrift.Protocol.JSON.pas b/lib/delphi/src/Thrift.Protocol.JSON.pas
index 6d305d8..9ec3871 100644
--- a/lib/delphi/src/Thrift.Protocol.JSON.pas
+++ b/lib/delphi/src/Thrift.Protocol.JSON.pas
@@ -27,10 +27,10 @@
Classes,
SysUtils,
Math,
- IdCoderMIME,
Generics.Collections,
Thrift.Transport,
- Thrift.Protocol;
+ Thrift.Protocol,
+ Thrift.Utils;
type
IJSONProtocol = interface( IProtocol)
@@ -622,24 +622,29 @@
procedure TJSONProtocolImpl.WriteJSONBase64( const b : TBytes);
-var str : string;
- tmp : TBytes;
- i : Integer;
+var len, off, cnt : Integer;
+ tmpBuf : TBytes;
begin
FContext.Write;
Transport.Write( QUOTE);
- // First base64-encode b, then write the resulting 8-bit chars
- // Unfortunately, EncodeBytes() returns a string of 16-bit (wide) chars
- // And for the sake of efficiency, we want to write everything at once
- str := TIdEncoderMIME.EncodeBytes(b);
- ASSERT( SizeOf(str[1]) = SizeOf(Word));
- SetLength( tmp, Length(str));
- for i := 1 to Length(str) do begin
- ASSERT( Hi(Word(str[i])) = 0); // base64 consists of a well-defined set of 8-bit chars only
- tmp[i-1] := Lo(Word(str[i])); // extract the lower byte
+ len := Length(b);
+ off := 0;
+ SetLength( tmpBuf, 4);
+
+ while len >= 3 do begin
+ // Encode 3 bytes at a time
+ Base64Utils.Encode( b, off, 3, tmpBuf, 0);
+ Transport.Write( tmpBuf, 0, 4);
+ Inc( off, 3);
+ Dec( len, 3);
end;
- Transport.Write( tmp); // now write all the data
+
+ // Encode remainder, if any
+ if len > 0 then begin
+ cnt := Base64Utils.Encode( b, off, len, tmpBuf, 0);
+ Transport.Write( tmpBuf, 0, cnt);
+ end;
Transport.Write( QUOTE);
end;
@@ -960,12 +965,37 @@
function TJSONProtocolImpl.ReadJSONBase64 : TBytes;
var b : TBytes;
- str : string;
+ len, off, size, cnt : Integer;
begin
b := ReadJSONString(false);
- SetString( str, PAnsiChar(b), Length(b));
- result := TIdDecoderMIME.DecodeBytes( str);
+ len := Length(b);
+ off := 0;
+ size := 0;
+
+ // reduce len to ignore fill bytes
+ Dec(len);
+ while (len >= 0) and (b[len] = Byte('=')) do Dec(len);
+ Inc(len);
+
+ // read & decode full byte triplets = 4 source bytes
+ while (len >= 4) do begin
+ // Decode 4 bytes at a time
+ Inc( size, Base64Utils.Decode( b, off, 4, b, size)); // decoded in place
+ Inc( off, 4);
+ Dec( len, 4);
+ end;
+
+ // Don't decode if we hit the end or got a single leftover byte (invalid
+ // base64 but legal for skip of regular string type)
+ if len > 1 then begin
+ // Decode remainder
+ Inc( size, Base64Utils.Decode( b, off, len, b, size)); // decoded in place
+ end;
+
+ // resize to final size and return the data
+ SetLength( b, size);
+ result := b;
end;
diff --git a/lib/delphi/src/Thrift.Utils.pas b/lib/delphi/src/Thrift.Utils.pas
index 5c3d8a5..05fb964 100644
--- a/lib/delphi/src/Thrift.Utils.pas
+++ b/lib/delphi/src/Thrift.Utils.pas
@@ -49,6 +49,13 @@
end;
+ Base64Utils = class sealed
+ public
+ class function Encode( const src : TBytes; srcOff, len : Integer; dst : TBytes; dstOff : Integer) : Integer; static;
+ class function Decode( const src : TBytes; srcOff, len : Integer; dst : TBytes; dstOff : Integer) : Integer; static;
+ end;
+
+
implementation
{ TOverlappedHelperImpl }
@@ -100,6 +107,82 @@
end;
+{ Base64Utils }
+
+class function Base64Utils.Encode( const src : TBytes; srcOff, len : Integer; dst : TBytes; dstOff : Integer) : Integer;
+const ENCODE_TABLE : PAnsiChar = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
+begin
+ ASSERT( len in [1..3]);
+ dst[dstOff] := Byte( ENCODE_TABLE[ (src[srcOff] shr 2) and $3F]);
+ case len of
+ 3 : begin
+ Inc(dstOff);
+ dst[dstOff] := Byte( ENCODE_TABLE[ ((src[srcOff] shl 4) and $30) or ((src[srcOff + 1] shr 4) and $0F)]);
+ Inc(dstOff);
+ dst[dstOff] := Byte( ENCODE_TABLE[ ((src[srcOff + 1] shl 2) and $3C) or ((src[srcOff + 2] shr 6) and $03)]);
+ Inc(dstOff);
+ dst[dstOff] := Byte( ENCODE_TABLE[ src[srcOff + 2] and $3F]);
+ result := 4;
+ end;
+
+ 2 : begin
+ Inc(dstOff);
+ dst[dstOff] := Byte( ENCODE_TABLE[ ((src[srcOff] shl 4) and $30) or ((src[srcOff + 1] shr 4) and $0F)]);
+ Inc(dstOff);
+ dst[dstOff] := Byte( ENCODE_TABLE[ (src[srcOff + 1] shl 2) and $3C]);
+ result := 3;
+ end;
+
+ 1 : begin
+ Inc(dstOff);
+ dst[dstOff] := Byte( ENCODE_TABLE[ (src[srcOff] shl 4) and $30]);
+ result := 2;
+ end;
+
+ else
+ ASSERT( FALSE);
+ end;
+end;
+
+
+class function Base64Utils.Decode( const src : TBytes; srcOff, len : Integer; dst : TBytes; dstOff : Integer) : Integer;
+const DECODE_TABLE : array[0..$FF] of Integer
+ = ( -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63,
+ 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1,
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
+ 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1,
+ -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
+ 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 );
+begin
+ ASSERT( len in [1..4]);
+ result := 1;
+ dst[dstOff] := ((DECODE_TABLE[src[srcOff] and $0FF] shl 2)
+ or (DECODE_TABLE[src[srcOff + 1] and $0FF] shr 4));
+
+ if (len > 2) then begin
+ Inc( result);
+ Inc( dstOff);
+ dst[dstOff] := (((DECODE_TABLE[src[srcOff + 1] and $0FF] shl 4) and $F0)
+ or (DECODE_TABLE[src[srcOff + 2] and $0FF] shr 2));
+
+ if (len > 3) then begin
+ Inc( result);
+ Inc( dstOff);
+ dst[dstOff] := (((DECODE_TABLE[src[srcOff + 2] and $0FF] shl 6) and $C0)
+ or DECODE_TABLE[src[srcOff + 3] and $0FF]);
+ end;
+ end;
+end;
end.