go: Improve efficiency with zlib in THeaderTransport

When enabled zlib in THeaderTransport we observed very high cpu
overhead, use a pool for zlib writers to improve efficiency.
diff --git a/lib/go/thrift/header_transport.go b/lib/go/thrift/header_transport.go
index d81fb29..8148796 100644
--- a/lib/go/thrift/header_transport.go
+++ b/lib/go/thrift/header_transport.go
@@ -211,6 +211,25 @@
 	return nil
 }
 
+var zlibDefaultLevelWriterPool = newPool(
+	func() *zlib.Writer {
+		return zlib.NewWriter(nil)
+	},
+	nil,
+)
+
+type zlibPoolCloser struct {
+	writer *zlib.Writer
+}
+
+func (z *zlibPoolCloser) Close() error {
+	defer func() {
+		z.writer.Reset(nil)
+		zlibDefaultLevelWriterPool.put(&z.writer)
+	}()
+	return z.writer.Close()
+}
+
 // AddTransform adds a transform.
 func (tw *TransformWriter) AddTransform(id THeaderTransformID) error {
 	switch id {
@@ -222,9 +241,12 @@
 	case TransformNone:
 		// no-op
 	case TransformZlib:
-		writeCloser := zlib.NewWriter(tw.Writer)
+		writeCloser := zlibDefaultLevelWriterPool.get()
+		writeCloser.Reset(tw.Writer)
 		tw.Writer = writeCloser
-		tw.closers = append(tw.closers, writeCloser)
+		tw.closers = append(tw.closers, &zlibPoolCloser{
+			writer: writeCloser,
+		})
 	}
 	return nil
 }