Skip to content

Commit

Permalink
fix: Fix bug with decoding timestamp as decimal (#36)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jefffrey authored Jan 3, 2025
1 parent 7618add commit 03429d6
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/array_decoder/timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ fn decimal128_decoder(
let data = get_rle_reader(column, data)?;

let secondary = stripe.stream_map().get(column, Kind::Secondary);
let secondary = get_rle_reader(column, secondary)?;
let secondary = get_unsigned_rle_reader(column, secondary);

let present = PresentDecoder::from_stripe(stripe, column);

Expand Down
2 changes: 2 additions & 0 deletions src/encoding/timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ fn decode(base: i64, seconds_since_orc_base: i64, nanoseconds: i64) -> (i128, i6
// while we encode them as a single i64 of nanoseconds in Arrow.
let nanoseconds_since_epoch =
(seconds as i128 * NANOSECONDS_IN_SECOND as i128) + (nanoseconds as i128);
// Returning seconds & nanoseconds only for error message
// TODO: does the error message really need those details? Can simplify by removing.
(nanoseconds_since_epoch, seconds, nanoseconds)
}

Expand Down
40 changes: 40 additions & 0 deletions tests/basic/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,46 @@ pub fn decimal128_timestamps_test() {
assert_batches_eq(&batch, &expected);
}

fn integration_path(path: &str) -> String {
let dir = env!("CARGO_MANIFEST_DIR");
format!("{}/tests/integration/data/{}", dir, path)
}

// TODO: Move this to integration test file. Placed here because it has access to assert_batches_eq.
// Should make that function available to both basics & integration.
#[test]
pub fn decimal128_timestamps_1900_test() {
let path = integration_path("TestOrcFile.testDate1900.orc");
let f = File::open(path).expect("no file found");
let mut reader = ArrowReaderBuilder::try_new(f)
.unwrap()
.with_batch_size(11) // it's a big file, we don't want to test more than that
.with_schema(Arc::new(Schema::new(vec![
Field::new("time", DataType::Decimal128(38, 9), true),
Field::new("date", DataType::Date32, true),
])))
.build();
let batch = reader.next().unwrap().unwrap();
let expected = [
"+-----------------------+------------+",
"| time | date |",
"+-----------------------+------------+",
"| -2198229903.900000000 | 1900-12-25 |",
"| -2198229903.899900000 | 1900-12-25 |",
"| -2198229903.899800000 | 1900-12-25 |",
"| -2198229903.899700000 | 1900-12-25 |",
"| -2198229903.899600000 | 1900-12-25 |",
"| -2198229903.899500000 | 1900-12-25 |",
"| -2198229903.899400000 | 1900-12-25 |",
"| -2198229903.899300000 | 1900-12-25 |",
"| -2198229903.899200000 | 1900-12-25 |",
"| -2198229903.899100000 | 1900-12-25 |",
"| -2198229903.899000000 | 1900-12-25 |",
"+-----------------------+------------+",
];
assert_batches_eq(&[batch], &expected);
}

// From https://github.com/apache/arrow-rs/blob/7705acad845e8b2a366a08640f7acb4033ed7049/arrow-flight/src/sql/metadata/mod.rs#L67-L75
pub fn assert_batches_eq(batches: &[RecordBatch], expected_lines: &[&str]) {
let formatted = pretty::pretty_format_batches(batches).unwrap().to_string();
Expand Down

0 comments on commit 03429d6

Please sign in to comment.