Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid use of flatbuffers::size_prefixed_root #7109

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 36 additions & 25 deletions arrow-ipc/src/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use std::fmt::{Debug, Formatter};
use std::sync::Arc;

use crate::writer::DictionaryTracker;
use crate::{size_prefixed_root_as_message, KeyValue, Message, CONTINUATION_MARKER};
use crate::{KeyValue, Message, CONTINUATION_MARKER};
use DataType::*;

/// Low level Arrow [Schema] to IPC bytes converter
Expand Down Expand Up @@ -255,32 +255,43 @@ pub fn try_schema_from_ipc_buffer(buffer: &[u8]) -> Result<Schema, ArrowError> {
// 4 bytes - an optional IPC_CONTINUATION_TOKEN prefix
// 4 bytes - the byte length of the payload
// a flatbuffer Message whose header is the Schema
if buffer.len() >= 4 {
// check continuation marker
let continuation_marker = &buffer[0..4];
let begin_offset: usize = if continuation_marker.eq(&CONTINUATION_MARKER) {
// 4 bytes: CONTINUATION_MARKER
// 4 bytes: length
// buffer
4
} else {
// backward compatibility for buffer without the continuation marker
// 4 bytes: length
// buffer
0
};
let msg = size_prefixed_root_as_message(&buffer[begin_offset..]).map_err(|err| {
ArrowError::ParseError(format!("Unable to convert flight info to a message: {err}"))
})?;
let ipc_schema = msg.header_as_schema().ok_or_else(|| {
ArrowError::ParseError("Unable to convert flight info to a schema".to_string())
})?;
Ok(fb_to_schema(ipc_schema))
} else {
Err(ArrowError::ParseError(
if buffer.len() < 4 {
return Err(ArrowError::ParseError(
"The buffer length is less than 4 and missing the continuation marker or length of buffer".to_string()
))
));
}

let (len, buffer) = if buffer[..4] == CONTINUATION_MARKER {
if buffer.len() < 8 {
return Err(ArrowError::ParseError(
"The buffer length is less than 8 and missing the length of buffer".to_string(),
));
}
buffer[4..].split_at(4)
} else {
buffer.split_at(4)
};

let len = <i32>::from_le_bytes(len.try_into().unwrap());
if len < 0 {
return Err(ArrowError::ParseError(format!(
"The encapsulated message's reported length is negative ({len})"
)));
}

if buffer.len() < len as usize {
let actual_len = buffer.len();
return Err(ArrowError::ParseError(
format!("The buffer length ({actual_len}) is less than the encapsulated message's reported length ({len})")
));
}

let msg = crate::root_as_message(buffer)
.map_err(|err| ArrowError::ParseError(format!("Unable to get root as message: {err:?}")))?;
let ipc_schema = msg.header_as_schema().ok_or_else(|| {
ArrowError::ParseError("Unable to convert flight info to a schema".to_string())
})?;
Ok(fb_to_schema(ipc_schema))
}

/// Get the Arrow data type from the flatbuffer Field table
Expand Down
42 changes: 40 additions & 2 deletions arrow-ipc/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1480,11 +1480,14 @@ impl<R: Read> RecordBatchReader for StreamReader<R> {

#[cfg(test)]
mod tests {
use crate::writer::{unslice_run_array, DictionaryTracker, IpcDataGenerator, IpcWriteOptions};
use crate::writer::{
unslice_run_array, write_message, DictionaryTracker, IpcDataGenerator, IpcWriteOptions,
};
use crate::convert::fb_to_schema;

use super::*;

use crate::root_as_message;
use crate::{root_as_message, size_prefixed_root_as_message};
use arrow_array::builder::{PrimitiveRunBuilder, UnionBuilder};
use arrow_array::types::*;
use arrow_buffer::NullBuffer;
Expand Down Expand Up @@ -2472,4 +2475,39 @@ mod tests {
assert_eq!(decoded_batch.expect("Failed to read RecordBatch"), batch);
});
}

#[test]
fn test_roundtrip_schema() {
let schema = Schema::new(vec![
Field::new(
"a",
DataType::Dictionary(Box::new(DataType::UInt16), Box::new(DataType::Utf8)),
false,
),
Field::new(
"b",
DataType::Dictionary(Box::new(DataType::UInt16), Box::new(DataType::Utf8)),
false,
),
]);

let options = IpcWriteOptions::default();
let data_gen = IpcDataGenerator::default();
let mut dict_tracker = DictionaryTracker::new(false);
let encoded_data =
data_gen.schema_to_bytes_with_dictionary_tracker(&schema, &mut dict_tracker, &options);
let mut schema_bytes = vec![];
write_message(&mut schema_bytes, encoded_data, &options).expect("write_message");

let begin_offset: usize = if schema_bytes[0..4].eq(&CONTINUATION_MARKER) { 4 } else { 0 };

size_prefixed_root_as_message(&schema_bytes[begin_offset..])
.expect_err("size_prefixed_root_as_message");

let msg = parse_message(&schema_bytes).expect("parse_message");
let ipc_schema = msg.header_as_schema().expect("header_as_schema");
let new_schema = fb_to_schema(ipc_schema);

assert_eq!(schema, new_schema);
}
}
Loading