Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add proof of concept for serializers without bytecode #120

Merged
merged 14 commits into from
Jan 27, 2024
Merged
7 changes: 7 additions & 0 deletions Changes.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Change log

## 0.10.0

- Remove deprecated APIs
- Use the serde serialization APIs directly, instead of using the bytecode
serializer. Serialization will be about `2x` faster
- Fix bug in `SchemaLike::from_value` with incorrect strategy deserialization

## 0.9.1

- `Decimal128` support: serialize / deserialize
Expand Down
25 changes: 25 additions & 0 deletions serde_arrow/benches/groups/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ macro_rules! define_benchmark {
let bench_serde_arrow = true;
$($(let bench_serde_arrow = $bench_serde_arrow; )?)?

if bench_serde_arrow {
group.bench_function("serde_arrow_ng", |b| {
b.iter(|| criterion::black_box(crate::groups::impls::serde_arrow_ng::serialize(&arrow_fields, &items).unwrap()));
});
}

if bench_serde_arrow {
group.bench_function("serde_arrow", |b| {
b.iter(|| criterion::black_box(crate::groups::impls::serde_arrow::serialize(&arrow_fields, &items).unwrap()));
Expand Down Expand Up @@ -90,6 +96,25 @@ pub mod serde_arrow {
}
}

pub mod serde_arrow_ng {
use serde::Serialize;
use serde_arrow::{
Result,
_impl::{arrow::datatypes::Field, ArrayBuilder},
schema::SerdeArrowSchema,
};

pub fn serialize<T>(fields: &[Field], items: &T) -> Result<()>
where
T: Serialize + ?Sized,
{
let mut builder = ArrayBuilder::new(&SerdeArrowSchema::from_arrow_fields(fields)?)?;
builder.extend(items)?;

Ok(())
}
}

pub mod arrow {

use std::sync::Arc;
Expand Down
237 changes: 11 additions & 226 deletions serde_arrow/src/arrow2_impl/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,9 @@ use crate::{
_impl::arrow2::{array::Array, datatypes::Field},
internal::{
error::Result,
generic,
schema::GenericField,
serialization::{compile_serialization, CompilationOptions, Interpreter},
sink::serialize_into_sink,
schema::{GenericField, SerdeArrowSchema},
serialization_ng::ArrayBuilder,
source::deserialize_from_source,
tracing::{Tracer, TracingOptions},
},
};

Expand Down Expand Up @@ -61,7 +58,7 @@ use crate::{
/// # Ok(())
/// # }
/// ```
pub struct Arrow2Builder(generic::GenericBuilder);
pub struct Arrow2Builder(ArrayBuilder);

impl std::fmt::Debug for Arrow2Builder {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
Expand All @@ -76,17 +73,14 @@ impl Arrow2Builder {
/// given fields.
///
pub fn new(fields: &[Field]) -> Result<Self> {
let fields = fields
.iter()
.map(GenericField::try_from)
.collect::<Result<Vec<_>>>()?;
Ok(Self(generic::GenericBuilder::new_for_arrays(&fields)?))
let schema = SerdeArrowSchema::from_arrow2_fields(fields)?;
Ok(Self(ArrayBuilder::new(&schema)?))
}

/// Add a single record to the arrays
///
pub fn push<T: Serialize + ?Sized>(&mut self, item: &T) -> Result<()> {
self.0.push(item)
self.0.extend(&[item])
}

/// Add multiple records to the arrays
Expand All @@ -100,7 +94,7 @@ impl Arrow2Builder {
/// This operation will reset the underlying buffers and start a new batch.
///
pub fn build_arrays(&mut self) -> Result<Vec<Box<dyn Array>>> {
self.0 .0.build_arrow2_arrays()
self.0.build_arrow2_arrays()
}
}

Expand Down Expand Up @@ -143,16 +137,9 @@ pub fn to_arrow2<T>(fields: &[Field], items: &T) -> Result<Vec<Box<dyn Array>>>
where
T: Serialize + ?Sized,
{
let fields = fields
.iter()
.map(GenericField::try_from)
.collect::<Result<Vec<_>>>()?;

let program = compile_serialization(&fields, CompilationOptions::default())?;
let mut interpreter = Interpreter::new(program);
serialize_into_sink(&mut interpreter, items)?;

interpreter.build_arrow2_arrays()
let mut builder = Arrow2Builder::new(fields)?;
builder.extend(items)?;
builder.build_arrays()
}

/// Deserialize items from the given arrow2 arrays (*requires* one of the
Expand Down Expand Up @@ -211,208 +198,6 @@ where
mappings.push(array.as_ref().extract_buffers(field, &mut buffers)?);
}

let interpreter = deserialization::compile_deserialization(
num_items,
&mappings,
buffers,
deserialization::CompilationOptions::default(),
)?;
let interpreter = deserialization::compile_deserialization(num_items, &mappings, buffers)?;
deserialize_from_source(interpreter)
}

/// Replaced by
/// [`SchemaLike::from_samples`][crate::schema::SchemaLike::from_samples]
/// (*[example][serialize_into_fields]*)
///
/// ```rust
/// # fn main() -> serde_arrow::Result<()> {
/// # use serde_arrow::_impl::arrow2;
/// use arrow2::datatypes::Field;
/// use serde::Serialize;
/// use serde_arrow::schema::{SchemaLike, TracingOptions};
///
/// ##[derive(Serialize)]
/// struct Record {
/// a: u32,
/// b: f32,
/// }
///
/// let samples = [Record { a: 1, b: 2.0 }, /* ... */ ];
/// let fields = Vec::<Field>::from_samples(&samples, TracingOptions::default())?;
/// #
/// # drop(fields);
/// # Ok(())
/// # }
/// ```
#[deprecated = "serde_arrow::arrow2::serialize_into_fields is deprecated. Use serde_arrow::schema::SchemaLike::from_samples instead"]
pub fn serialize_into_fields<T>(items: &T, options: TracingOptions) -> Result<Vec<Field>>
where
T: Serialize + ?Sized,
{
let mut tracer = Tracer::new(String::from("$"), options);
tracer.trace_samples(items)?;

let schema = tracer.to_schema()?;
schema.to_arrow2_fields()
}

/// Renamed to [`serde_arrow::to_arrow2`][crate::to_arrow2]
#[deprecated = "serde_arrow::arrow2::serialize_into_arrays is deprecated. Use serde_arrow::to_arrow2 instead"]
pub fn serialize_into_arrays<T>(fields: &[Field], items: &T) -> Result<Vec<Box<dyn Array>>>
where
T: Serialize + ?Sized,
{
crate::to_arrow2(fields, items)
}

/// Renamed to [`serde_arrow::from_arrow2`][crate::from_arrow2]
#[deprecated = "serde_arrow::arrow2::deserialize_from_arrays is deprecated. Use serde_arrow::from_arrow2 instead"]
pub fn deserialize_from_arrays<'de, T, A>(fields: &'de [Field], arrays: &'de [A]) -> Result<T>
where
T: Deserialize<'de>,
A: AsRef<dyn Array>,
{
crate::from_arrow2(fields, arrays)
}

/// Replaced by
/// [`SchemaLike::from_samples`][crate::schema::SchemaLike::from_samples] and
/// [`Items`][crate::utils::Items] (*[example][serialize_into_field]*)
///
/// ```rust
/// # fn main() -> serde_arrow::Result<()> {
/// # use serde_arrow::_impl::arrow2;
/// use arrow2::datatypes::Field;
/// use serde_arrow::{
/// schema::{SchemaLike, TracingOptions},
/// utils::Items,
/// };
///
/// let samples: Vec<u32> = vec![1, 2, 3, /* ... */ ];
/// let fields = Vec::<Field>::from_samples(&Items(&samples), TracingOptions::default())?;
/// #
/// # drop(fields);
/// # Ok(())
/// # }
/// ```
#[deprecated = "serde_arrow::arrow2::serialize_into_field is deprecated. Use serde_arrow::schema::SchemaLike::from_samples with serde_arrow::utils::Items instead"]
pub fn serialize_into_field<T>(items: &T, name: &str, options: TracingOptions) -> Result<Field>
where
T: Serialize + ?Sized,
{
let mut tracer = Tracer::new(String::from("$"), options);
tracer.trace_samples(items)?;
let field = tracer.to_field(name)?;
Field::try_from(&field)
}

/// Replaced by [`serde_arrow::to_arrow2`][crate::to_arrow2] and
/// [`Items`][crate::utils::Items] (*[example][serialize_into_array]*)
///
/// ```rust
/// # fn main() -> serde_arrow::Result<()> {
/// # use serde_arrow::_impl::arrow2::datatypes::Field;
/// # use serde_arrow::schema::{SchemaLike, TracingOptions};
/// use serde_arrow::utils::Items;
///
/// let samples: Vec<u32> = vec![1, 2, 3, /* ... */ ];
/// # let fields = Vec::<Field>::from_samples(&Items(&samples), TracingOptions::default())?;
/// let arrays = serde_arrow::to_arrow2(&fields, &Items(&samples))?;
/// # Ok(())
/// # }
/// ```
#[deprecated = "serde_arrow::arrow2::serialize_into_array is deprecated. Use serde_arrow::to_arrow2 with serde_arrow::utils::Items instead"]
pub fn serialize_into_array<T>(field: &Field, items: &T) -> Result<Box<dyn Array>>
where
T: Serialize + ?Sized,
{
let field: GenericField = field.try_into()?;

let program = compile_serialization(
std::slice::from_ref(&field),
CompilationOptions::default().wrap_with_struct(false),
)?;
let mut interpreter = Interpreter::new(program);
serialize_into_sink(&mut interpreter, items)?;
interpreter.build_arrow2_array()
}

/// Replaced by [`serde_arrow::to_arrow2`][crate::from_arrow2] and
/// [`Items`][crate::utils::Items] (*[example][deserialize_from_array]*)
///
/// ```rust
/// # fn main() -> serde_arrow::Result<()> {
/// # use serde_arrow::schema::{SerdeArrowSchema, SchemaLike, TracingOptions};
/// # let samples: Vec<u32> = vec![1, 2, 3, /* ... */ ];
/// # let fields = SerdeArrowSchema::from_samples(&Items(&samples), TracingOptions::default())?
/// # .to_arrow2_fields()?;
/// # let arrays = serde_arrow::to_arrow2(&fields, &Items(&samples))?;
/// #
/// use serde_arrow::utils::Items;
///
/// let Items(items): Items<Vec<u32>> = serde_arrow::from_arrow2(&fields, &arrays)?;
/// #
/// # drop(items);
/// # Ok(())
/// # }
/// ```
#[deprecated = "serde_arrow::arrow2::deserialize_from_array is deprecated. Use serde_arrow::from_arrow2 instead"]
pub fn deserialize_from_array<'de, T, A>(field: &'de Field, array: &'de A) -> Result<T>
where
T: Deserialize<'de>,
A: AsRef<dyn Array> + 'de + ?Sized,
{
generic::deserialize_from_array(field, array.as_ref())
}

/// Replaced by [`Arrow2Builder`][crate::Arrow2Builder] and
/// [`Items`][crate::utils::Items] / [`Item`][crate::utils::Item] (*[example][ArrayBuilder]*)
///
/// ```rust
/// # fn main() -> serde_arrow::Result<()> {
/// # use serde_arrow::_impl::arrow2;
/// use arrow2::datatypes::{DataType, Field};
/// use serde_arrow::{Arrow2Builder, utils::{Items, Item}};
///
/// let mut builder = Arrow2Builder::new(&[
/// Field::new("item", DataType::UInt8, false),
/// ])?;
///
/// builder.push(&Item(0))?;
/// builder.push(&Item(1))?;
/// builder.push(&Item(2))?;
///
/// builder.extend(&Items(&[3, 4, 5]))?;
///
/// let arrays = builder.build_arrays()?;
/// # drop(arrays);
/// # Ok(())
/// # }
/// ```
#[deprecated = "serde_arrow::arrow2::ArrayBuilder is deprecated. Use serde_arrow::Arrow2Builder with serde_arrow::utils::Items instead"]
pub struct ArrayBuilder(generic::GenericBuilder);

#[allow(deprecated)]
impl ArrayBuilder {
/// Construct a new build for the given field
pub fn new(field: &Field) -> Result<Self> {
Ok(Self(generic::GenericBuilder::new_for_array(
GenericField::try_from(field)?,
)?))
}

/// Add a single item to the arrays
pub fn push<T: Serialize + ?Sized>(&mut self, item: &T) -> Result<()> {
self.0.push(item)
}

/// Add multiple items to the arrays
pub fn extend<T: Serialize + ?Sized>(&mut self, items: &T) -> Result<()> {
self.0.extend(items)
}

/// Build the array from the rows pushed to far.
pub fn build_array(&mut self) -> Result<Box<dyn Array>> {
self.0 .0.build_arrow2_array()
}
}
3 changes: 0 additions & 3 deletions serde_arrow/src/arrow2_impl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,3 @@ pub(crate) mod deserialization;
pub(crate) mod schema;
pub(crate) mod serialization;
mod type_support;

#[cfg(test)]
mod test_deprecated_api;
Loading
Loading