THRIFT-5767: use string builder to parse strings with escaped quotes (#2946)
Client: Go
diff --git a/lib/go/test/Makefile.am b/lib/go/test/Makefile.am
index cb8928b..dc56963 100644
--- a/lib/go/test/Makefile.am
+++ b/lib/go/test/Makefile.am
@@ -62,7 +62,8 @@
ProcessorMiddlewareTest.thrift \
ClientMiddlewareExceptionTest.thrift \
ValidateTest.thrift \
- ForwardType.thrift
+ ForwardType.thrift \
+ StringParseAllocationTest.thrift
mkdir -p gopath/src
grep -v list.*map.*list.*map $(THRIFTTEST) | grep -v 'set<Insanity>' > ThriftTest.thrift
$(THRIFT) $(THRIFTARGS) -r IncludesTest.thrift
@@ -98,6 +99,7 @@
$(THRIFT) $(THRIFTARGS) ClientMiddlewareExceptionTest.thrift
$(THRIFT) $(THRIFTARGS) ValidateTest.thrift
$(THRIFT) $(THRIFTARGS) ForwardType.thrift
+ $(THRIFT) $(THRIFTARGS) StringParseAllocationTest.thrift
ln -nfs ../../tests gopath/src/tests
cp -r ./dontexportrwtest gopath/src
touch gopath
@@ -169,6 +171,7 @@
RefAnnotationFieldsTest.thrift \
RequiredFieldTest.thrift \
ServicesTest.thrift \
+ StringParseAllocationTest.thrift \
TypedefFieldTest.thrift \
UnionBinaryTest.thrift \
UnionDefaultValueTest.thrift \
diff --git a/lib/go/test/StringParseAllocationTest.thrift b/lib/go/test/StringParseAllocationTest.thrift
new file mode 100644
index 0000000..0ede9a5
--- /dev/null
+++ b/lib/go/test/StringParseAllocationTest.thrift
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+struct StringStruct {
+ 1: required string example
+}
diff --git a/lib/go/test/tests/string_parse_allocation_test.go b/lib/go/test/tests/string_parse_allocation_test.go
new file mode 100644
index 0000000..12790c4
--- /dev/null
+++ b/lib/go/test/tests/string_parse_allocation_test.go
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package tests
+
+import (
+ "context"
+ "fmt"
+ "strings"
+ "testing"
+
+ "github.com/apache/thrift/lib/go/test/gopath/src/stringparseallocationtest"
+ "github.com/apache/thrift/lib/go/thrift"
+)
+
+func TestSimpleJsonStringParse_Allocations(t *testing.T) {
+ byteAllocationLimit := 100 * 1024 // 100 KB
+ res := testing.Benchmark(BenchmarkSimpleJsonStringParse_Allocations)
+ if res.AllocedBytesPerOp() > int64(byteAllocationLimit) {
+ t.Errorf("Total memory allocation size too high: %d (> %d)", res.AllocedBytesPerOp(), byteAllocationLimit)
+ }
+}
+
+func BenchmarkSimpleJsonStringParse_Allocations(b *testing.B) {
+ b.ReportAllocs()
+ b.StopTimer()
+ numEscapedQuotes := 1000
+ var sb strings.Builder
+ for i := 0; i < numEscapedQuotes; i++ {
+ sb.WriteString(`\"`)
+ }
+
+ testString := fmt.Sprintf(`{"1": {"str": "this is a test with %d of escaped quotes %s"}}`, numEscapedQuotes, sb.String())
+ stringStruct := stringparseallocationtest.NewStringStruct()
+ transport := thrift.NewTMemoryBuffer()
+ p := thrift.NewTJSONProtocol(transport)
+
+ for i := 0; i < b.N; i++ {
+ transport.Reset()
+ transport.WriteString(testString)
+ transport.Flush(context.Background())
+ b.StartTimer()
+ _ = stringStruct.Read(context.Background(), p)
+ b.StopTimer()
+ }
+}
diff --git a/lib/go/thrift/simple_json_protocol.go b/lib/go/thrift/simple_json_protocol.go
index 8b1284f..da12248 100644
--- a/lib/go/thrift/simple_json_protocol.go
+++ b/lib/go/thrift/simple_json_protocol.go
@@ -30,6 +30,7 @@
"io"
"math"
"strconv"
+ "strings"
)
type _ParseContext int
@@ -922,15 +923,7 @@
if err != nil {
return "", NewTProtocolException(err)
}
- l := len(line)
- // count number of escapes to see if we need to keep going
- i := 1
- for ; i < l; i++ {
- if line[l-i-1] != '\\' {
- break
- }
- }
- if i&0x01 == 1 {
+ if endsWithoutEscapedQuote(line) {
v, ok := jsonUnquote(string(JSON_QUOTE) + line)
if !ok {
return "", NewTProtocolException(err)
@@ -951,27 +944,29 @@
}
func (p *TSimpleJSONProtocol) ParseQuotedStringBody() (string, error) {
- line, err := p.reader.ReadString(JSON_QUOTE)
- if err != nil {
- return "", NewTProtocolException(err)
+ var sb strings.Builder
+
+ for {
+ line, err := p.reader.ReadString(JSON_QUOTE)
+ if err != nil {
+ return "", NewTProtocolException(err)
+ }
+ sb.WriteString(line)
+ if endsWithoutEscapedQuote(line) {
+ return sb.String(), nil
+ }
}
- l := len(line)
- // count number of escapes to see if we need to keep going
+}
+
+func endsWithoutEscapedQuote(s string) bool {
+ l := len(s)
i := 1
for ; i < l; i++ {
- if line[l-i-1] != '\\' {
+ if s[l-i-1] != '\\' {
break
}
}
- if i&0x01 == 1 {
- return line, nil
- }
- s, err := p.ParseQuotedStringBody()
- if err != nil {
- return "", NewTProtocolException(err)
- }
- v := line + s
- return v, nil
+ return i&0x01 == 1
}
func (p *TSimpleJSONProtocol) ParseBase64EncodedBody() ([]byte, error) {