THRIFT-2779: Always write unescaped JSON unicode string.
Client: PHP
Patch: Phongphan Phuttha
This closes #666
diff --git a/lib/php/lib/Thrift/Protocol/TJSONProtocol.php b/lib/php/lib/Thrift/Protocol/TJSONProtocol.php
index 6c93b09..6d8e81f 100644
--- a/lib/php/lib/Thrift/Protocol/TJSONProtocol.php
+++ b/lib/php/lib/Thrift/Protocol/TJSONProtocol.php
@@ -215,6 +215,44 @@
return dechex($val);
}
+ private function hasJSONUnescapedUnicode()
+ {
+ if (PHP_MAJOR_VERSION > 5
+ || (PHP_MAJOR_VERSION == 5 && PHP_MINOR_VERSION >= 4))
+ return true;
+
+ return false;
+ }
+
+ private function unescapedUnicode($str)
+ {
+ if ($this->hasJSONUnescapedUnicode()) {
+ return json_encode($str, JSON_UNESCAPED_UNICODE);
+ }
+
+ $json = json_encode($str);
+
+ /*
+ * Unescaped character outside the Basic Multilingual Plane
+ * High surrogate: 0xD800 - 0xDBFF
+ * Low surrogate: 0xDC00 - 0xDFFF
+ */
+ $json = preg_replace_callback('/\\\\u(d[89ab][0-9a-f]{2})\\\\u(d[cdef][0-9a-f]{2})/i',
+ function ($matches) {
+ return mb_convert_encoding(pack('H*', $matches[1].$matches[2]), 'UTF-8', 'UTF-16BE');
+ }, $json);
+
+ /*
+ * Unescaped characters within the Basic Multilingual Plane
+ */
+ $json = preg_replace_callback('/\\\\u([0-9a-f]{4})/i',
+ function ($matches) {
+ return mb_convert_encoding(pack('H*', $matches[1]), 'UTF-8', 'UTF-16BE');
+ }, $json);
+
+ return $json;
+ }
+
private function writeJSONString($b)
{
$this->context_->write();
@@ -223,7 +261,7 @@
$this->trans_->write(self::QUOTE);
}
- $this->trans_->write(json_encode($b));
+ $this->trans_->write($this->unescapedUnicode($b));
if (is_numeric($b) && $this->context_->escapeNum()) {
$this->trans_->write(self::QUOTE);
diff --git a/lib/php/test/Test/Thrift/Fixtures.php b/lib/php/test/Test/Thrift/Fixtures.php
index d9d487f..2c60a08 100644
--- a/lib/php/test/Test/Thrift/Fixtures.php
+++ b/lib/php/test/Test/Thrift/Fixtures.php
@@ -46,6 +46,9 @@
self::$testArgs['testString3'] =
"string that ends in double-backslash \\\\";
+ self::$testArgs['testUnicodeStringWithNonBMP'] =
+ "สวัสดี/𝒯";
+
self::$testArgs['testDouble'] = 3.1415926535898;
// TODO: add testBinary() call
diff --git a/lib/php/test/Test/Thrift/Protocol/TestTJSONProtocol.php b/lib/php/test/Test/Thrift/Protocol/TestTJSONProtocol.php
index 7ba3441..a4ca9d5 100755
--- a/lib/php/test/Test/Thrift/Protocol/TestTJSONProtocol.php
+++ b/lib/php/test/Test/Thrift/Protocol/TestTJSONProtocol.php
@@ -200,7 +200,12 @@
$actual = $this->transport->read( BUFSIZ );
$expected = TestTJSONProtocol_Fixtures::$testArgsJSON['testStringMap'];
- $this->assertEquals( $expected, $actual );
+ /*
+ * The $actual returns unescaped string.
+ * It is required to to decode then encode it again
+ * to get the expected escaped unicode.
+ */
+ $this->assertEquals( $expected, json_encode(json_decode($actual)) );
}
public function testSet_Write()
@@ -308,6 +313,18 @@
$this->assertEquals( $expected, $actual );
}
+ public function testString4_Write()
+ {
+ $args = new \ThriftTest\ThriftTest_testString_args();
+ $args->thing = Fixtures::$testArgs['testUnicodeStringWithNonBMP'];
+ $args->write( $this->protocol );
+
+ $actual = $this->transport->read( BUFSIZ );
+ $expected = TestTJSONProtocol_Fixtures::$testArgsJSON['testUnicodeStringWithNonBMP'];
+
+ $this->assertEquals( $expected, $actual );
+ }
+
public function testDouble_Read()
{
$this->transport->write(
@@ -528,6 +545,8 @@
self::$testArgsJSON['testString3'] = '{"1":{"str":"string that ends in double-backslash \\\\\\\\"}}';
+ self::$testArgsJSON['testUnicodeStringWithNonBMP'] = '{"1":{"str":"สวัสดี\/𝒯"}}';
+
self::$testArgsJSON['testDouble'] = '{"1":{"dbl":3.1415926535898}}';
self::$testArgsJSON['testByte'] = '{"1":{"i8":1}}';