THRIFT-2890 binary data may lose bytes with JSON transport under specific circumstances
Client: Delphi
Patch: Jens Geyer

This closes #319

This patch consists of a ported version of the base64 encoding/decoding used in C#. It handles the above case correctly, decodes data more efficiently in-place, and removes the dependency to Indy (IdCoderMIME).
diff --git a/lib/delphi/src/Thrift.Protocol.JSON.pas b/lib/delphi/src/Thrift.Protocol.JSON.pas
index 6d305d8..9ec3871 100644
--- a/lib/delphi/src/Thrift.Protocol.JSON.pas
+++ b/lib/delphi/src/Thrift.Protocol.JSON.pas
@@ -27,10 +27,10 @@
   Classes,
   SysUtils,
   Math,
-  IdCoderMIME,
   Generics.Collections,
   Thrift.Transport,
-  Thrift.Protocol;
+  Thrift.Protocol,
+  Thrift.Utils;
 
 type
   IJSONProtocol = interface( IProtocol)
@@ -622,24 +622,29 @@
 
 
 procedure TJSONProtocolImpl.WriteJSONBase64( const b : TBytes);
-var str : string;
-    tmp : TBytes;
-    i   : Integer;
+var len, off, cnt : Integer;
+    tmpBuf : TBytes;
 begin
   FContext.Write;
   Transport.Write( QUOTE);
 
-  // First base64-encode b, then write the resulting 8-bit chars
-  // Unfortunately, EncodeBytes() returns a string of 16-bit (wide) chars
-  // And for the sake of efficiency, we want to write everything at once
-  str := TIdEncoderMIME.EncodeBytes(b);
-  ASSERT( SizeOf(str[1]) = SizeOf(Word));
-  SetLength( tmp, Length(str));
-  for i := 1 to Length(str) do begin
-    ASSERT( Hi(Word(str[i])) = 0);   // base64 consists of a well-defined set of 8-bit chars only
-    tmp[i-1] := Lo(Word(str[i]));    // extract the lower byte
+  len := Length(b);
+  off := 0;
+  SetLength( tmpBuf, 4);
+
+  while len >= 3 do begin
+    // Encode 3 bytes at a time
+    Base64Utils.Encode( b, off, 3, tmpBuf, 0);
+    Transport.Write( tmpBuf, 0, 4);
+    Inc( off, 3);
+    Dec( len, 3);
   end;
-  Transport.Write( tmp);  // now write all the data
+
+  // Encode remainder, if any
+  if len > 0 then begin
+    cnt := Base64Utils.Encode( b, off, len, tmpBuf, 0);
+    Transport.Write( tmpBuf, 0, cnt);
+  end;
 
   Transport.Write( QUOTE);
 end;
@@ -960,12 +965,37 @@
 
 function TJSONProtocolImpl.ReadJSONBase64 : TBytes;
 var b : TBytes;
-    str : string;
+    len, off, size, cnt : Integer;
 begin
   b := ReadJSONString(false);
 
-  SetString( str, PAnsiChar(b), Length(b));
-  result := TIdDecoderMIME.DecodeBytes( str);
+  len := Length(b);
+  off := 0;
+  size := 0;
+
+  // reduce len to ignore fill bytes
+  Dec(len);
+  while (len >= 0) and (b[len] = Byte('=')) do Dec(len);
+  Inc(len);
+
+  // read & decode full byte triplets = 4 source bytes
+  while (len >= 4) do begin
+    // Decode 4 bytes at a time
+    Inc( size, Base64Utils.Decode( b, off, 4, b, size)); // decoded in place
+    Inc( off, 4);
+    Dec( len, 4);
+  end;
+
+  // Don't decode if we hit the end or got a single leftover byte (invalid
+  // base64 but legal for skip of regular string type)
+  if len > 1 then begin
+    // Decode remainder
+    Inc( size, Base64Utils.Decode( b, off, len, b, size)); // decoded in place
+  end;
+
+  // resize to final size and return the data
+  SetLength( b, size);
+  result := b;
 end;
 
 
diff --git a/lib/delphi/src/Thrift.Utils.pas b/lib/delphi/src/Thrift.Utils.pas
index 5c3d8a5..05fb964 100644
--- a/lib/delphi/src/Thrift.Utils.pas
+++ b/lib/delphi/src/Thrift.Utils.pas
@@ -49,6 +49,13 @@
   end;
 
 
+  Base64Utils = class sealed
+  public
+    class function Encode( const src : TBytes; srcOff, len : Integer; dst : TBytes; dstOff : Integer) : Integer; static;
+    class function Decode( const src : TBytes; srcOff, len : Integer; dst : TBytes; dstOff : Integer) : Integer; static;
+  end;
+
+
 implementation
 
 { TOverlappedHelperImpl }
@@ -100,6 +107,82 @@
 end;
 
 
+{ Base64Utils }
+
+class function Base64Utils.Encode( const src : TBytes; srcOff, len : Integer; dst : TBytes; dstOff : Integer) : Integer;
+const ENCODE_TABLE : PAnsiChar = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
+begin
+  ASSERT( len in [1..3]);
+  dst[dstOff] := Byte( ENCODE_TABLE[ (src[srcOff] shr 2) and $3F]);
+  case len of
+    3 : begin
+      Inc(dstOff);
+      dst[dstOff] := Byte( ENCODE_TABLE[ ((src[srcOff] shl 4) and $30) or ((src[srcOff + 1] shr 4) and $0F)]);
+      Inc(dstOff);
+      dst[dstOff] := Byte( ENCODE_TABLE[ ((src[srcOff + 1] shl 2) and $3C) or ((src[srcOff + 2] shr 6) and $03)]);
+      Inc(dstOff);
+      dst[dstOff] := Byte( ENCODE_TABLE[ src[srcOff + 2] and $3F]);
+      result := 4;
+    end;
+
+    2 : begin
+      Inc(dstOff);
+      dst[dstOff] := Byte( ENCODE_TABLE[ ((src[srcOff] shl 4) and $30) or ((src[srcOff + 1] shr 4) and $0F)]);
+      Inc(dstOff);
+      dst[dstOff] := Byte( ENCODE_TABLE[ (src[srcOff + 1] shl 2) and $3C]);
+      result := 3;
+    end;
+
+    1 : begin
+      Inc(dstOff);
+      dst[dstOff] := Byte( ENCODE_TABLE[ (src[srcOff] shl 4) and $30]);
+      result := 2;
+    end;
+
+  else
+    ASSERT( FALSE);
+  end;
+end;
+
+
+class function Base64Utils.Decode( const src : TBytes; srcOff, len : Integer; dst : TBytes; dstOff : Integer) : Integer;
+const DECODE_TABLE : array[0..$FF] of Integer
+                   = ( -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+                       -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+                       -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63,
+                       52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1,
+                       -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
+                       15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1,
+                       -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
+                       41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1,
+                       -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+                       -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+                       -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+                       -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+                       -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+                       -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+                       -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+                       -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1  );
+begin
+  ASSERT( len in [1..4]);
+  result := 1;
+  dst[dstOff] := ((DECODE_TABLE[src[srcOff]     and $0FF] shl 2)
+              or  (DECODE_TABLE[src[srcOff + 1] and $0FF] shr 4));
+
+  if (len > 2) then begin
+    Inc( result);
+    Inc( dstOff);
+    dst[dstOff] := (((DECODE_TABLE[src[srcOff + 1] and $0FF] shl 4) and $F0)
+                or   (DECODE_TABLE[src[srcOff + 2] and $0FF] shr 2));
+
+    if (len > 3) then begin
+      Inc( result);
+      Inc( dstOff);
+      dst[dstOff] := (((DECODE_TABLE[src[srcOff + 2] and $0FF] shl 6) and $C0)
+                  or    DECODE_TABLE[src[srcOff + 3] and $0FF]);
+    end;
+  end;
+end;
 
 
 end.