THRIFT-5928: fix skip() for binary fields with non-UTF-8 bytes
Client: rs
skip_till_depth() calls read_string() for TType::String fields, which
performs String::from_utf8() and fails on binary fields containing raw
bytes. Since string and binary share the same wire type and skip()
discards the value anyway, use read_bytes() instead.
This is safe because:
- The result is discarded immediately via .map(|_| ())
- read_string() is literally read_bytes() + String::from_utf8()
- Both consume identical wire bytes (4-byte length + N bytes)
- No behavioral change for valid UTF-8 string data
The bug triggers any time a newer schema adds a binary field to a
struct that an older binary is still deserializing — the older binary
hits the skip() path for the unknown field and fails on the raw bytes.
diff --git a/lib/rs/src/protocol/mod.rs b/lib/rs/src/protocol/mod.rs
index 0fbf509..a467ed7 100644
--- a/lib/rs/src/protocol/mod.rs
+++ b/lib/rs/src/protocol/mod.rs
@@ -208,7 +208,7 @@
TType::I32 => self.read_i32().map(|_| ()),
TType::I64 => self.read_i64().map(|_| ()),
TType::Double => self.read_double().map(|_| ()),
- TType::String => self.read_string().map(|_| ()),
+ TType::String => self.read_bytes().map(|_| ()),
TType::Uuid => self.read_uuid().map(|_| ()),
TType::Struct => {
self.read_struct_begin()?;
@@ -1060,4 +1060,73 @@
{
t.flush().unwrap();
}
+
+ // Test unknown binary fields are skipped without error (THRIFT-5928)
+
+ fn build_struct_with_unknown_binary_field(payload: &[u8]) -> Vec<u8> {
+ let mut buf = Vec::new();
+ buf.push(0x0A); // field 1: TType::I64
+ buf.extend_from_slice(&1_i16.to_be_bytes());
+ buf.extend_from_slice(&42_i64.to_be_bytes());
+ buf.push(0x0B); // field 99: TType::String (same wire type for binary)
+ buf.extend_from_slice(&99_i16.to_be_bytes());
+ buf.extend_from_slice(&(payload.len() as i32).to_be_bytes());
+ buf.extend_from_slice(payload);
+ buf.push(0x00); // stop
+ buf
+ }
+
+ fn read_struct_skipping_unknown(data: &[u8]) -> crate::Result<i64> {
+ let cursor = Cursor::new(data.to_vec());
+ let mut proto = TBinaryInputProtocol::new(cursor, true);
+ proto.read_struct_begin()?;
+ let mut known_value: Option<i64> = None;
+ loop {
+ let field = proto.read_field_begin()?;
+ if field.field_type == TType::Stop {
+ break;
+ }
+ match field.id {
+ Some(1) if field.field_type == TType::I64 => {
+ known_value = Some(proto.read_i64()?);
+ }
+ _ => {
+ proto.skip(field.field_type)?;
+ }
+ }
+ proto.read_field_end()?;
+ }
+ proto.read_struct_end()?;
+ known_value.ok_or_else(|| {
+ crate::Error::Protocol(crate::ProtocolError {
+ kind: crate::ProtocolErrorKind::InvalidData,
+ message: "missing known field".to_string(),
+ })
+ })
+ }
+
+ #[test]
+ fn must_skip_binary_field_with_non_utf8_bytes() {
+ let non_utf8: Vec<u8> = vec![
+ 0x04, 0xFF, 0xFE, 0x80, 0x90, 0xAB, 0xCD, 0xEF,
+ 0xDE, 0xAD, 0xBE, 0xEF, 0xCA, 0xFE, 0xBA, 0xBE,
+ ];
+ assert!(String::from_utf8(non_utf8.clone()).is_err());
+ let data = build_struct_with_unknown_binary_field(&non_utf8);
+ let result = read_struct_skipping_unknown(&data);
+ assert!(result.is_ok(), "skip() failed on non-UTF-8 binary: {:?}", result.err());
+ assert_eq!(result.unwrap(), 42);
+ }
+
+ #[test]
+ fn must_skip_valid_utf8_string_field() {
+ let data = build_struct_with_unknown_binary_field(b"hello world");
+ assert_eq!(read_struct_skipping_unknown(&data).unwrap(), 42);
+ }
+
+ #[test]
+ fn must_skip_empty_binary_field() {
+ let data = build_struct_with_unknown_binary_field(&[]);
+ assert_eq!(read_struct_skipping_unknown(&data).unwrap(), 42);
+ }
}