THRIFT-3403 Fixed JSON string reader doesn't recognize UTF-16 surrogate pairs
Client: C#
Patch: Phongphan Phuttha <phongphan@acm.org>
This closes #668
diff --git a/lib/csharp/src/Protocol/TJSONProtocol.cs b/lib/csharp/src/Protocol/TJSONProtocol.cs
index 9d51c74..5e6589e 100644
--- a/lib/csharp/src/Protocol/TJSONProtocol.cs
+++ b/lib/csharp/src/Protocol/TJSONProtocol.cs
@@ -725,6 +725,7 @@
private byte[] ReadJSONString(bool skipContext)
{
MemoryStream buffer = new MemoryStream();
+ List<char> codeunits = new List<char>();
if (!skipContext)
@@ -769,9 +770,41 @@
(HexVal((byte)tempBuffer[1]) << 8) +
(HexVal((byte)tempBuffer[2]) << 4) +
HexVal(tempBuffer[3]));
- var tmp = utf8Encoding.GetBytes(new char[] { (char)wch });
- buffer.Write(tmp, 0, tmp.Length);
+ if (Char.IsHighSurrogate((char)wch))
+ {
+ if (codeunits.Count > 0)
+ {
+ throw new TProtocolException(TProtocolException.INVALID_DATA,
+ "Expected low surrogate char");
+ }
+ codeunits.Add((char)wch);
+ }
+ else if (Char.IsLowSurrogate((char)wch))
+ {
+ if (codeunits.Count == 0)
+ {
+ throw new TProtocolException(TProtocolException.INVALID_DATA,
+ "Expected high surrogate char");
+ }
+ codeunits.Add((char)wch);
+ var tmp = utf8Encoding.GetBytes(codeunits.ToArray());
+ buffer.Write(tmp, 0, tmp.Length);
+ codeunits.Clear();
+ }
+ else
+ {
+ var tmp = utf8Encoding.GetBytes(new char[] { (char)wch });
+ buffer.Write(tmp, 0, tmp.Length);
+ }
}
+
+
+ if (codeunits.Count > 0)
+ {
+ throw new TProtocolException(TProtocolException.INVALID_DATA,
+ "Expected low surrogate char");
+ }
+
return buffer.ToArray();
}