THRIFT-1189. rb: Ruby deserializer speed improvements

This patch gives the Ruby deserialization stack a healthy performance boost.

Patch: Ilya Maykov

git-svn-id: https://svn.apache.org/repos/asf/thrift/trunk@1140780 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/lib/rb/ext/binary_protocol_accelerated.c b/lib/rb/ext/binary_protocol_accelerated.c
index 3854887..bd1c2da 100644
--- a/lib/rb/ext/binary_protocol_accelerated.c
+++ b/lib/rb/ext/binary_protocol_accelerated.c
@@ -34,6 +34,7 @@
 static int VERSION_MASK;
 static int TYPE_MASK;
 static int BAD_VERSION;
+static ID rbuf_ivar_id;
 
 static void write_byte_direct(VALUE trans, int8_t b) {
   WRITE(trans, (char*)&b, 1);
@@ -226,26 +227,36 @@
 VALUE rb_thrift_binary_proto_read_i16(VALUE self);
 
 static char read_byte_direct(VALUE self) {
-  VALUE buf = READ(self, 1);
-  return RSTRING_PTR(buf)[0];
+  VALUE byte = rb_funcall(GET_TRANSPORT(self), read_byte_method_id, 0);
+  return (char)(FIX2INT(byte));
 }
 
 static int16_t read_i16_direct(VALUE self) {
-  VALUE buf = READ(self, 2);
-  return (int16_t)(((uint8_t)(RSTRING_PTR(buf)[1])) | ((uint16_t)((RSTRING_PTR(buf)[0]) << 8)));
+  VALUE rbuf = rb_ivar_get(self, rbuf_ivar_id);
+  rb_funcall(GET_TRANSPORT(self), read_into_buffer_method_id, 2, rbuf, INT2FIX(2));
+  return (int16_t)(((uint8_t)(RSTRING_PTR(rbuf)[1])) | ((uint16_t)((RSTRING_PTR(rbuf)[0]) << 8)));
 }
 
 static int32_t read_i32_direct(VALUE self) {
-  VALUE buf = READ(self, 4);
-  return ((uint8_t)(RSTRING_PTR(buf)[3])) | 
-    (((uint8_t)(RSTRING_PTR(buf)[2])) << 8) | 
-    (((uint8_t)(RSTRING_PTR(buf)[1])) << 16) | 
-    (((uint8_t)(RSTRING_PTR(buf)[0])) << 24);
+  VALUE rbuf = rb_ivar_get(self, rbuf_ivar_id);
+  rb_funcall(GET_TRANSPORT(self), read_into_buffer_method_id, 2, rbuf, INT2FIX(4));
+  return ((uint8_t)(RSTRING_PTR(rbuf)[3])) |
+    (((uint8_t)(RSTRING_PTR(rbuf)[2])) << 8) |
+    (((uint8_t)(RSTRING_PTR(rbuf)[1])) << 16) |
+    (((uint8_t)(RSTRING_PTR(rbuf)[0])) << 24);
 }
 
 static int64_t read_i64_direct(VALUE self) {
-  uint64_t hi = read_i32_direct(self);
-  uint32_t lo = read_i32_direct(self);
+  VALUE rbuf = rb_ivar_get(self, rbuf_ivar_id);
+  rb_funcall(GET_TRANSPORT(self), read_into_buffer_method_id, 2, rbuf, INT2FIX(8));
+  uint64_t hi = ((uint8_t)(RSTRING_PTR(rbuf)[3])) |
+    (((uint8_t)(RSTRING_PTR(rbuf)[2])) << 8) |
+    (((uint8_t)(RSTRING_PTR(rbuf)[1])) << 16) |
+    (((uint8_t)(RSTRING_PTR(rbuf)[0])) << 24);
+  uint32_t lo = ((uint8_t)(RSTRING_PTR(rbuf)[7])) |
+    (((uint8_t)(RSTRING_PTR(rbuf)[6])) << 8) |
+    (((uint8_t)(RSTRING_PTR(rbuf)[5])) << 16) |
+    (((uint8_t)(RSTRING_PTR(rbuf)[4])) << 24);
   return (hi << 32) | lo;
 }
 
@@ -425,4 +436,6 @@
   rb_define_method(bpa_class, "read_map_end", rb_thift_binary_proto_read_map_end, 0);
   rb_define_method(bpa_class, "read_list_end", rb_thift_binary_proto_read_list_end, 0);
   rb_define_method(bpa_class, "read_set_end", rb_thift_binary_proto_read_set_end, 0);
+
+  rbuf_ivar_id = rb_intern("@rbuf");
 }
diff --git a/lib/rb/ext/compact_protocol.c b/lib/rb/ext/compact_protocol.c
index 6c0123d..a47fe6c 100644
--- a/lib/rb/ext/compact_protocol.c
+++ b/lib/rb/ext/compact_protocol.c
@@ -34,6 +34,7 @@
 static ID last_field_id;
 static ID boolean_field_id;
 static ID bool_value_id;
+static ID rbuf_ivar_id;
 
 static int VERSION;
 static int VERSION_MASK;
@@ -354,8 +355,8 @@
 }
 
 static char read_byte_direct(VALUE self) {
-  VALUE buf = READ(self, 1);
-  return RSTRING_PTR(buf)[0];
+  VALUE byte = rb_funcall(GET_TRANSPORT(self), read_byte_method_id, 0);
+  return (char)(FIX2INT(byte));
 }
 
 static int64_t zig_zag_to_ll(int64_t n) {
@@ -528,15 +529,16 @@
     double f;
     int64_t l;
   } transfer;
-  VALUE bytes = READ(self, 8);
-  uint32_t lo = ((uint8_t)(RSTRING_PTR(bytes)[0]))
-    | (((uint8_t)(RSTRING_PTR(bytes)[1])) << 8)
-    | (((uint8_t)(RSTRING_PTR(bytes)[2])) << 16)
-    | (((uint8_t)(RSTRING_PTR(bytes)[3])) << 24);
-  uint64_t hi = (((uint8_t)(RSTRING_PTR(bytes)[4])))
-    | (((uint8_t)(RSTRING_PTR(bytes)[5])) << 8)
-    | (((uint8_t)(RSTRING_PTR(bytes)[6])) << 16)
-    | (((uint8_t)(RSTRING_PTR(bytes)[7])) << 24);
+  VALUE rbuf = rb_ivar_get(self, rbuf_ivar_id);
+  rb_funcall(GET_TRANSPORT(self), read_into_buffer_method_id, 2, rbuf, INT2FIX(8));
+  uint32_t lo = ((uint8_t)(RSTRING_PTR(rbuf)[0]))
+    | (((uint8_t)(RSTRING_PTR(rbuf)[1])) << 8)
+    | (((uint8_t)(RSTRING_PTR(rbuf)[2])) << 16)
+    | (((uint8_t)(RSTRING_PTR(rbuf)[3])) << 24);
+  uint64_t hi = (((uint8_t)(RSTRING_PTR(rbuf)[4])))
+    | (((uint8_t)(RSTRING_PTR(rbuf)[5])) << 8)
+    | (((uint8_t)(RSTRING_PTR(rbuf)[6])) << 16)
+    | (((uint8_t)(RSTRING_PTR(rbuf)[7])) << 24);
   transfer.l = (hi << 32) | lo;
 
   return rb_float_new(transfer.f);
@@ -559,6 +561,7 @@
   last_field_id = rb_intern("@last_field");
   boolean_field_id = rb_intern("@boolean_field");
   bool_value_id = rb_intern("@bool_value");
+  rbuf_ivar_id = rb_intern("@rbuf");
 }
 
 static void Init_rb_methods() {
diff --git a/lib/rb/ext/constants.h b/lib/rb/ext/constants.h
index 57df544..9ea00d2 100644
--- a/lib/rb/ext/constants.h
+++ b/lib/rb/ext/constants.h
@@ -74,6 +74,7 @@
 extern ID skip_method_id;
 extern ID write_method_id;
 extern ID read_all_method_id;
+extern ID read_into_buffer_method_id;
 extern ID native_qmark_method_id;
 
 extern ID fields_const_id;
diff --git a/lib/rb/ext/memory_buffer.c b/lib/rb/ext/memory_buffer.c
index 74efa2c..23cd9eb 100644
--- a/lib/rb/ext/memory_buffer.c
+++ b/lib/rb/ext/memory_buffer.c
@@ -30,6 +30,11 @@
 
 #define GET_BUF(self) rb_ivar_get(self, buf_ivar_id)
 
+VALUE rb_thrift_memory_buffer_write(VALUE self, VALUE str);
+VALUE rb_thrift_memory_buffer_read(VALUE self, VALUE length_value);
+VALUE rb_thrift_memory_buffer_read_byte(VALUE self);
+VALUE rb_thrift_memory_buffer_read_into_buffer(VALUE self, VALUE buffer_value, VALUE size_value);
+
 VALUE rb_thrift_memory_buffer_write(VALUE self, VALUE str) {
   VALUE buf = GET_BUF(self);
   rb_str_buf_cat(buf, RSTRING_PTR(str), RSTRING_LEN(str));
@@ -62,10 +67,59 @@
   return data;
 }
 
+VALUE rb_thrift_memory_buffer_read_byte(VALUE self) {
+  VALUE index_value = rb_ivar_get(self, index_ivar_id);
+  int index = FIX2INT(index_value);
+
+  VALUE buf = GET_BUF(self);
+  if (index >= RSTRING_LEN(buf)) {
+    rb_raise(rb_eEOFError, "Not enough bytes remain in memory buffer");
+  }
+  char byte = RSTRING_PTR(buf)[index++];
+  rb_ivar_set(self, index_ivar_id, INT2FIX(index));
+
+  if (index >= GARBAGE_BUFFER_SIZE) {
+    rb_ivar_set(self, buf_ivar_id, rb_funcall(buf, slice_method_id, 2, INT2FIX(index), INT2FIX(RSTRING_LEN(buf) - 1)));
+    index = 0;
+  }
+  int result = (int) byte;
+  return INT2FIX(result);
+}
+
+VALUE rb_thrift_memory_buffer_read_into_buffer(VALUE self, VALUE buffer_value, VALUE size_value) {
+  int i = 0;
+  int size = FIX2INT(size_value);
+  int index;
+  VALUE buf = GET_BUF(self);
+
+  while (i < size) {
+    index = FIX2INT(rb_ivar_get(self, index_ivar_id));
+    if (index >= RSTRING_LEN(buf)) {
+      rb_raise(rb_eEOFError, "Not enough bytes remain in memory buffer");
+    }
+    char byte = RSTRING_PTR(buf)[index++];
+    rb_ivar_set(self, index_ivar_id, INT2FIX(index));
+
+    if (index >= GARBAGE_BUFFER_SIZE) {
+      rb_ivar_set(self, buf_ivar_id, rb_funcall(buf, slice_method_id, 2, INT2FIX(index), INT2FIX(RSTRING_LEN(buf) - 1)));
+      index = 0;
+    }
+
+    if (i >= RSTRING_LEN(buffer_value)) {
+      rb_raise(rb_eIndexError, "index %d out of string", i);
+    }
+    ((char*)RSTRING_PTR(buffer_value))[i] = byte;
+    i++;
+  }
+  return INT2FIX(i);
+}
+
 void Init_memory_buffer() {
   VALUE thrift_memory_buffer_class = rb_const_get(thrift_module, rb_intern("MemoryBufferTransport"));
   rb_define_method(thrift_memory_buffer_class, "write", rb_thrift_memory_buffer_write, 1);
   rb_define_method(thrift_memory_buffer_class, "read", rb_thrift_memory_buffer_read, 1);
+  rb_define_method(thrift_memory_buffer_class, "read_byte", rb_thrift_memory_buffer_read_byte, 0);
+  rb_define_method(thrift_memory_buffer_class, "read_into_buffer", rb_thrift_memory_buffer_read_into_buffer, 2);
   
   buf_ivar_id = rb_intern("@buf");
   index_ivar_id = rb_intern("@index");
diff --git a/lib/rb/ext/thrift_native.c b/lib/rb/ext/thrift_native.c
index 09b9fe4..2f6bb1a 100644
--- a/lib/rb/ext/thrift_native.c
+++ b/lib/rb/ext/thrift_native.c
@@ -88,6 +88,7 @@
 ID skip_method_id;
 ID write_method_id;
 ID read_all_method_id;
+ID read_into_buffer_method_id;
 ID native_qmark_method_id;
 
 // constant ids
@@ -170,6 +171,7 @@
   skip_method_id = rb_intern("skip");
   write_method_id = rb_intern("write");
   read_all_method_id = rb_intern("read_all");
+  read_into_buffer_method_id = rb_intern("read_into_buffer");
   native_qmark_method_id = rb_intern("native?");
 
   // constant ids