Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add json transcoding benchmarks #130

Merged
merged 8 commits into from
Feb 18, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion serde_arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ license = "MIT"
bench = false

[[bench]]
name = "arrow2"
name = "serde_arrow_bench"
# arrow-version:replace: required-features = ["arrow2-0-17", "arrow-{version}"]
required-features = ["arrow2-0-17", "arrow-50"]
harness = false
Expand Down Expand Up @@ -128,6 +128,7 @@ bigdecimal = {version = "0.4", features = ["serde"] }
arrow-json-50 = { package = "arrow-json", version = "50" }
criterion = "0.4"
arrow2_convert = "0.5.0"
serde-transcode = "1"

[dev-dependencies.rust_decimal]
version = "1.33"
Expand Down
4 changes: 2 additions & 2 deletions serde_arrow/benches/groups/complex_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize};
use serde_arrow::_impl::arrow2;

#[derive(Debug, Serialize, Deserialize, ArrowField, ArrowSerialize, ArrowDeserialize)]
struct Item {
pub(crate) struct Item {
string: String,
points: Vec<Point>,
child: SubItem,
Expand All @@ -30,7 +30,7 @@ struct SubItem {
}

impl Item {
fn random<R: Rng + ?Sized>(rng: &mut R) -> Self {
pub(crate) fn random<R: Rng + ?Sized>(rng: &mut R) -> Self {
let n_string = Uniform::new(1, 50).sample(rng);
let n_points = Uniform::new(1, 50).sample(rng);

Expand Down
101 changes: 101 additions & 0 deletions serde_arrow/benches/groups/json_to_arrow.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
use crate::groups::complex_common::Item;

use {
serde_arrow::schema::{SchemaLike as _, SerdeArrowSchema},
std::sync::Arc,
};

// arrow-version:replace: use arrow_json_{version}::ReaderBuilder;
use arrow_json_50::ReaderBuilder;

// arrow-version:replace: use arrow_schema_{version}::Schema as ArrowSchema;
use arrow_schema_50::Schema as ArrowSchema;

// arrow-version:replace: use arrow_array_{version}::RecordBatch;
use arrow_array_50::RecordBatch;

fn benchmark_json_to_arrow(c: &mut criterion::Criterion) {
let rng = &mut rand::thread_rng();
let items = (0..10_000)
.map(|_| Item::random(rng))
.collect::<Vec<Item>>();
let jsons_to_deserialize = items
.iter()
.map(|item| serde_json::to_string(item).expect("Failed to serialize JSON"))
.collect::<Vec<_>>();
let jsons_to_deserialize_concatenated = jsons_to_deserialize.join("\n");
let jsons_to_deserialize: Vec<&str> = {
let mut prev = 0;
jsons_to_deserialize
.iter()
.map(|s| {
let ret = &jsons_to_deserialize_concatenated[prev..(prev + s.len())];
prev += s.len() + 1;
ret
})
.collect::<Vec<_>>()
};
let schema = SerdeArrowSchema::from_samples(&items, Default::default()).unwrap();
let arrow_fields = schema.to_arrow_fields().unwrap();
let mut group = c.benchmark_group(format!("json_to_arrow({})", items.len()));

// arrow-json direct
group.bench_function("arrow-json", |b| {
b.iter(|| {
let schema = Arc::new(ArrowSchema::new(arrow_fields.to_owned()));
let mut decoder = ReaderBuilder::new(schema.clone()).build_decoder().unwrap();
decoder
.decode(criterion::black_box(
jsons_to_deserialize_concatenated.as_bytes(),
))
.unwrap();
let arrays = decoder.flush().unwrap().unwrap().columns().to_vec();
let record_batch = RecordBatch::try_new(schema, arrays).unwrap();
record_batch
})
});

// arrow-json via serde
group.bench_function("serde_json_transcode_arrow-json", |b| {
b.iter(|| {
let schema = Arc::new(ArrowSchema::new(arrow_fields.to_owned()));
let mut decoder = ReaderBuilder::new(schema.clone()).build_decoder().unwrap();
let mut deserializers = jsons_to_deserialize
.iter()
.map(|json_to_deserialize| {
serde_json::Deserializer::from_slice(criterion::black_box(
json_to_deserialize.as_bytes(),
))
})
.collect::<Vec<_>>();
let transcoders = deserializers
.iter_mut()
.map(|deserializer| serde_transcode::Transcoder::new(deserializer))
.collect::<Vec<_>>();
decoder.serialize(&transcoders).unwrap();
let arrays = decoder.flush().unwrap().unwrap().columns().to_vec();
let record_batch = RecordBatch::try_new(schema, arrays).unwrap();
record_batch
})
});

// serde_arrow via serde
group.bench_function("serde_json_transcode_serde_arrow", |b| {
b.iter(|| {
let mut arrow_builder = serde_arrow::ArrowBuilder::new(&arrow_fields).unwrap();
for json_to_deserialize in &jsons_to_deserialize {
let mut deserializer = serde_json::Deserializer::from_slice(criterion::black_box(
json_to_deserialize.as_bytes(),
));
let transcoder = serde_transcode::Transcoder::new(&mut deserializer);
arrow_builder.push(&transcoder).unwrap();
}
let arrays = arrow_builder.build_arrays().unwrap();
let schema = ArrowSchema::new(arrow_fields.to_owned());
let record_batch = RecordBatch::try_new(Arc::new(schema), arrays).unwrap();
record_batch
})
});
}

criterion::criterion_group!(benchmark, benchmark_json_to_arrow);
1 change: 1 addition & 0 deletions serde_arrow/benches/groups/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod complex_common;
pub mod impls;
pub mod json_to_arrow;
pub mod primitives;
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ mod groups;
criterion::criterion_main!(
groups::complex_common::benchmark,
groups::primitives::benchmark,
groups::json_to_arrow::benchmark,
);
2 changes: 1 addition & 1 deletion serde_arrow/src/internal/serialization_ng/bool_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ impl SimpleSerializer for BoolBuilder {
Ok(())
}

fn serialize_none(&mut self) -> Result<()> {
fn serialize_unit(&mut self) -> Result<()> {
push_validity(&mut self.validity, false)?;
self.buffer.push(false);
Ok(())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ impl SimpleSerializer for Date64Builder {
Ok(())
}

fn serialize_none(&mut self) -> Result<()> {
fn serialize_unit(&mut self) -> Result<()> {
push_validity(&mut self.validity, false)?;
self.buffer.push(0);
Ok(())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ impl SimpleSerializer for DecimalBuilder {
Ok(())
}

fn serialize_none(&mut self) -> Result<()> {
fn serialize_unit(&mut self) -> Result<()> {
push_validity(&mut self.validity, false)?;
self.buffer.push(0);
Ok(())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ impl SimpleSerializer for DictionaryUtf8Builder {
}

fn serialize_default(&mut self) -> Result<()> {
self.indices.serialize_none()
self.indices.serialize_unit()
}

fn serialize_none(&mut self) -> Result<()> {
self.indices.serialize_none()
fn serialize_unit(&mut self) -> Result<()> {
self.indices.serialize_unit()
}

fn serialize_str(&mut self, v: &str) -> Result<()> {
Expand Down
6 changes: 3 additions & 3 deletions serde_arrow/src/internal/serialization_ng/float_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ impl SimpleSerializer for FloatBuilder<f32> {
Ok(())
}

fn serialize_none(&mut self) -> Result<()> {
fn serialize_unit(&mut self) -> Result<()> {
push_validity(&mut self.validity, false)?;
self.buffer.push(0.0);
Ok(())
Expand Down Expand Up @@ -108,7 +108,7 @@ impl SimpleSerializer for FloatBuilder<f64> {
Ok(())
}

fn serialize_none(&mut self) -> Result<()> {
fn serialize_unit(&mut self) -> Result<()> {
push_validity(&mut self.validity, false)?;
self.buffer.push(0.0);
Ok(())
Expand Down Expand Up @@ -166,7 +166,7 @@ impl SimpleSerializer for FloatBuilder<f16> {
Ok(())
}

fn serialize_none(&mut self) -> Result<()> {
fn serialize_unit(&mut self) -> Result<()> {
push_validity(&mut self.validity, false)?;
self.buffer.push(f16::ZERO);
Ok(())
Expand Down
2 changes: 1 addition & 1 deletion serde_arrow/src/internal/serialization_ng/int_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ where
Ok(())
}

fn serialize_none(&mut self) -> Result<()> {
fn serialize_unit(&mut self) -> Result<()> {
push_validity(&mut self.validity, false)?;
self.buffer.push(I::default());
Ok(())
Expand Down
2 changes: 1 addition & 1 deletion serde_arrow/src/internal/serialization_ng/map_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ impl SimpleSerializer for MapBuilder {
Ok(())
}

fn serialize_none(&mut self) -> Result<()> {
fn serialize_unit(&mut self) -> Result<()> {
self.offsets.push_current_items();
push_validity(&mut self.validity, false)
}
Expand Down
5 changes: 0 additions & 5 deletions serde_arrow/src/internal/serialization_ng/null_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,6 @@ impl SimpleSerializer for NullBuilder {
Ok(())
}

fn serialize_none(&mut self) -> Result<()> {
self.count += 1;
Ok(())
}

fn serialize_unit(&mut self) -> Result<()> {
self.count += 1;
Ok(())
Expand Down
4 changes: 2 additions & 2 deletions serde_arrow/src/internal/serialization_ng/struct_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ impl StructBuilder {
);
}

self.named_fields[idx].1.serialize_none()?;
self.named_fields[idx].1.serialize_unit()?;
}
}
Ok(())
Expand Down Expand Up @@ -142,7 +142,7 @@ impl SimpleSerializer for StructBuilder {
Ok(())
}

fn serialize_none(&mut self) -> Result<()> {
fn serialize_unit(&mut self) -> Result<()> {
push_validity(&mut self.validity, false)?;

for (_, field) in &mut self.named_fields {
Expand Down
2 changes: 1 addition & 1 deletion serde_arrow/src/internal/serialization_ng/utf8_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ impl<O: Offset> SimpleSerializer for Utf8Builder<O> {
Ok(())
}

fn serialize_none(&mut self) -> Result<()> {
fn serialize_unit(&mut self) -> Result<()> {
push_validity(&mut self.validity, false)?;
self.offsets.push_current_items();
Ok(())
Expand Down
7 changes: 5 additions & 2 deletions serde_arrow/src/internal/serialization_ng/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,14 @@ pub trait SimpleSerializer: Sized {
}

fn serialize_unit(&mut self) -> Result<()> {
fail!("serialize_unit is not supported for {}", self.name());
fail!(
"serialize_unit/serialize_none is not supported for {}",
self.name()
);
}

fn serialize_none(&mut self) -> Result<()> {
fail!("serialize_none is not supported for {}", self.name());
self.serialize_unit()
}

fn serialize_some<V: serde::Serialize + ?Sized>(&mut self, value: &V) -> Result<()> {
Expand Down
Loading