THRIFT-1189. rb: Ruby deserializer speed improvements
This patch gives the Ruby deserialization stack a healthy performance boost.
Patch: Ilya Maykov
git-svn-id: https://svn.apache.org/repos/asf/thrift/trunk@1140780 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/lib/rb/ext/binary_protocol_accelerated.c b/lib/rb/ext/binary_protocol_accelerated.c
index 3854887..bd1c2da 100644
--- a/lib/rb/ext/binary_protocol_accelerated.c
+++ b/lib/rb/ext/binary_protocol_accelerated.c
@@ -34,6 +34,7 @@
static int VERSION_MASK;
static int TYPE_MASK;
static int BAD_VERSION;
+static ID rbuf_ivar_id;
static void write_byte_direct(VALUE trans, int8_t b) {
WRITE(trans, (char*)&b, 1);
@@ -226,26 +227,36 @@
VALUE rb_thrift_binary_proto_read_i16(VALUE self);
static char read_byte_direct(VALUE self) {
- VALUE buf = READ(self, 1);
- return RSTRING_PTR(buf)[0];
+ VALUE byte = rb_funcall(GET_TRANSPORT(self), read_byte_method_id, 0);
+ return (char)(FIX2INT(byte));
}
static int16_t read_i16_direct(VALUE self) {
- VALUE buf = READ(self, 2);
- return (int16_t)(((uint8_t)(RSTRING_PTR(buf)[1])) | ((uint16_t)((RSTRING_PTR(buf)[0]) << 8)));
+ VALUE rbuf = rb_ivar_get(self, rbuf_ivar_id);
+ rb_funcall(GET_TRANSPORT(self), read_into_buffer_method_id, 2, rbuf, INT2FIX(2));
+ return (int16_t)(((uint8_t)(RSTRING_PTR(rbuf)[1])) | ((uint16_t)((RSTRING_PTR(rbuf)[0]) << 8)));
}
static int32_t read_i32_direct(VALUE self) {
- VALUE buf = READ(self, 4);
- return ((uint8_t)(RSTRING_PTR(buf)[3])) |
- (((uint8_t)(RSTRING_PTR(buf)[2])) << 8) |
- (((uint8_t)(RSTRING_PTR(buf)[1])) << 16) |
- (((uint8_t)(RSTRING_PTR(buf)[0])) << 24);
+ VALUE rbuf = rb_ivar_get(self, rbuf_ivar_id);
+ rb_funcall(GET_TRANSPORT(self), read_into_buffer_method_id, 2, rbuf, INT2FIX(4));
+ return ((uint8_t)(RSTRING_PTR(rbuf)[3])) |
+ (((uint8_t)(RSTRING_PTR(rbuf)[2])) << 8) |
+ (((uint8_t)(RSTRING_PTR(rbuf)[1])) << 16) |
+ (((uint8_t)(RSTRING_PTR(rbuf)[0])) << 24);
}
static int64_t read_i64_direct(VALUE self) {
- uint64_t hi = read_i32_direct(self);
- uint32_t lo = read_i32_direct(self);
+ VALUE rbuf = rb_ivar_get(self, rbuf_ivar_id);
+ rb_funcall(GET_TRANSPORT(self), read_into_buffer_method_id, 2, rbuf, INT2FIX(8));
+ uint64_t hi = ((uint8_t)(RSTRING_PTR(rbuf)[3])) |
+ (((uint8_t)(RSTRING_PTR(rbuf)[2])) << 8) |
+ (((uint8_t)(RSTRING_PTR(rbuf)[1])) << 16) |
+ (((uint8_t)(RSTRING_PTR(rbuf)[0])) << 24);
+ uint32_t lo = ((uint8_t)(RSTRING_PTR(rbuf)[7])) |
+ (((uint8_t)(RSTRING_PTR(rbuf)[6])) << 8) |
+ (((uint8_t)(RSTRING_PTR(rbuf)[5])) << 16) |
+ (((uint8_t)(RSTRING_PTR(rbuf)[4])) << 24);
return (hi << 32) | lo;
}
@@ -425,4 +436,6 @@
rb_define_method(bpa_class, "read_map_end", rb_thift_binary_proto_read_map_end, 0);
rb_define_method(bpa_class, "read_list_end", rb_thift_binary_proto_read_list_end, 0);
rb_define_method(bpa_class, "read_set_end", rb_thift_binary_proto_read_set_end, 0);
+
+ rbuf_ivar_id = rb_intern("@rbuf");
}
diff --git a/lib/rb/ext/compact_protocol.c b/lib/rb/ext/compact_protocol.c
index 6c0123d..a47fe6c 100644
--- a/lib/rb/ext/compact_protocol.c
+++ b/lib/rb/ext/compact_protocol.c
@@ -34,6 +34,7 @@
static ID last_field_id;
static ID boolean_field_id;
static ID bool_value_id;
+static ID rbuf_ivar_id;
static int VERSION;
static int VERSION_MASK;
@@ -354,8 +355,8 @@
}
static char read_byte_direct(VALUE self) {
- VALUE buf = READ(self, 1);
- return RSTRING_PTR(buf)[0];
+ VALUE byte = rb_funcall(GET_TRANSPORT(self), read_byte_method_id, 0);
+ return (char)(FIX2INT(byte));
}
static int64_t zig_zag_to_ll(int64_t n) {
@@ -528,15 +529,16 @@
double f;
int64_t l;
} transfer;
- VALUE bytes = READ(self, 8);
- uint32_t lo = ((uint8_t)(RSTRING_PTR(bytes)[0]))
- | (((uint8_t)(RSTRING_PTR(bytes)[1])) << 8)
- | (((uint8_t)(RSTRING_PTR(bytes)[2])) << 16)
- | (((uint8_t)(RSTRING_PTR(bytes)[3])) << 24);
- uint64_t hi = (((uint8_t)(RSTRING_PTR(bytes)[4])))
- | (((uint8_t)(RSTRING_PTR(bytes)[5])) << 8)
- | (((uint8_t)(RSTRING_PTR(bytes)[6])) << 16)
- | (((uint8_t)(RSTRING_PTR(bytes)[7])) << 24);
+ VALUE rbuf = rb_ivar_get(self, rbuf_ivar_id);
+ rb_funcall(GET_TRANSPORT(self), read_into_buffer_method_id, 2, rbuf, INT2FIX(8));
+ uint32_t lo = ((uint8_t)(RSTRING_PTR(rbuf)[0]))
+ | (((uint8_t)(RSTRING_PTR(rbuf)[1])) << 8)
+ | (((uint8_t)(RSTRING_PTR(rbuf)[2])) << 16)
+ | (((uint8_t)(RSTRING_PTR(rbuf)[3])) << 24);
+ uint64_t hi = (((uint8_t)(RSTRING_PTR(rbuf)[4])))
+ | (((uint8_t)(RSTRING_PTR(rbuf)[5])) << 8)
+ | (((uint8_t)(RSTRING_PTR(rbuf)[6])) << 16)
+ | (((uint8_t)(RSTRING_PTR(rbuf)[7])) << 24);
transfer.l = (hi << 32) | lo;
return rb_float_new(transfer.f);
@@ -559,6 +561,7 @@
last_field_id = rb_intern("@last_field");
boolean_field_id = rb_intern("@boolean_field");
bool_value_id = rb_intern("@bool_value");
+ rbuf_ivar_id = rb_intern("@rbuf");
}
static void Init_rb_methods() {
diff --git a/lib/rb/ext/constants.h b/lib/rb/ext/constants.h
index 57df544..9ea00d2 100644
--- a/lib/rb/ext/constants.h
+++ b/lib/rb/ext/constants.h
@@ -74,6 +74,7 @@
extern ID skip_method_id;
extern ID write_method_id;
extern ID read_all_method_id;
+extern ID read_into_buffer_method_id;
extern ID native_qmark_method_id;
extern ID fields_const_id;
diff --git a/lib/rb/ext/memory_buffer.c b/lib/rb/ext/memory_buffer.c
index 74efa2c..23cd9eb 100644
--- a/lib/rb/ext/memory_buffer.c
+++ b/lib/rb/ext/memory_buffer.c
@@ -30,6 +30,11 @@
#define GET_BUF(self) rb_ivar_get(self, buf_ivar_id)
+VALUE rb_thrift_memory_buffer_write(VALUE self, VALUE str);
+VALUE rb_thrift_memory_buffer_read(VALUE self, VALUE length_value);
+VALUE rb_thrift_memory_buffer_read_byte(VALUE self);
+VALUE rb_thrift_memory_buffer_read_into_buffer(VALUE self, VALUE buffer_value, VALUE size_value);
+
VALUE rb_thrift_memory_buffer_write(VALUE self, VALUE str) {
VALUE buf = GET_BUF(self);
rb_str_buf_cat(buf, RSTRING_PTR(str), RSTRING_LEN(str));
@@ -62,10 +67,59 @@
return data;
}
+VALUE rb_thrift_memory_buffer_read_byte(VALUE self) {
+ VALUE index_value = rb_ivar_get(self, index_ivar_id);
+ int index = FIX2INT(index_value);
+
+ VALUE buf = GET_BUF(self);
+ if (index >= RSTRING_LEN(buf)) {
+ rb_raise(rb_eEOFError, "Not enough bytes remain in memory buffer");
+ }
+ char byte = RSTRING_PTR(buf)[index++];
+ rb_ivar_set(self, index_ivar_id, INT2FIX(index));
+
+ if (index >= GARBAGE_BUFFER_SIZE) {
+ rb_ivar_set(self, buf_ivar_id, rb_funcall(buf, slice_method_id, 2, INT2FIX(index), INT2FIX(RSTRING_LEN(buf) - 1)));
+ index = 0;
+ }
+ int result = (int) byte;
+ return INT2FIX(result);
+}
+
+VALUE rb_thrift_memory_buffer_read_into_buffer(VALUE self, VALUE buffer_value, VALUE size_value) {
+ int i = 0;
+ int size = FIX2INT(size_value);
+ int index;
+ VALUE buf = GET_BUF(self);
+
+ while (i < size) {
+ index = FIX2INT(rb_ivar_get(self, index_ivar_id));
+ if (index >= RSTRING_LEN(buf)) {
+ rb_raise(rb_eEOFError, "Not enough bytes remain in memory buffer");
+ }
+ char byte = RSTRING_PTR(buf)[index++];
+ rb_ivar_set(self, index_ivar_id, INT2FIX(index));
+
+ if (index >= GARBAGE_BUFFER_SIZE) {
+ rb_ivar_set(self, buf_ivar_id, rb_funcall(buf, slice_method_id, 2, INT2FIX(index), INT2FIX(RSTRING_LEN(buf) - 1)));
+ index = 0;
+ }
+
+ if (i >= RSTRING_LEN(buffer_value)) {
+ rb_raise(rb_eIndexError, "index %d out of string", i);
+ }
+ ((char*)RSTRING_PTR(buffer_value))[i] = byte;
+ i++;
+ }
+ return INT2FIX(i);
+}
+
void Init_memory_buffer() {
VALUE thrift_memory_buffer_class = rb_const_get(thrift_module, rb_intern("MemoryBufferTransport"));
rb_define_method(thrift_memory_buffer_class, "write", rb_thrift_memory_buffer_write, 1);
rb_define_method(thrift_memory_buffer_class, "read", rb_thrift_memory_buffer_read, 1);
+ rb_define_method(thrift_memory_buffer_class, "read_byte", rb_thrift_memory_buffer_read_byte, 0);
+ rb_define_method(thrift_memory_buffer_class, "read_into_buffer", rb_thrift_memory_buffer_read_into_buffer, 2);
buf_ivar_id = rb_intern("@buf");
index_ivar_id = rb_intern("@index");
diff --git a/lib/rb/ext/thrift_native.c b/lib/rb/ext/thrift_native.c
index 09b9fe4..2f6bb1a 100644
--- a/lib/rb/ext/thrift_native.c
+++ b/lib/rb/ext/thrift_native.c
@@ -88,6 +88,7 @@
ID skip_method_id;
ID write_method_id;
ID read_all_method_id;
+ID read_into_buffer_method_id;
ID native_qmark_method_id;
// constant ids
@@ -170,6 +171,7 @@
skip_method_id = rb_intern("skip");
write_method_id = rb_intern("write");
read_all_method_id = rb_intern("read_all");
+ read_into_buffer_method_id = rb_intern("read_into_buffer");
native_qmark_method_id = rb_intern("native?");
// constant ids