Skip to content

Commit

Permalink
Adding check for null element in a DF (#60)
Browse files Browse the repository at this point in the history
Co-authored-by: Darek <dchrostowski@medallia.com>
  • Loading branch information
Bidek56 and Darek authored Apr 6, 2023
1 parent acd62be commit c3df322
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 28 deletions.
14 changes: 14 additions & 0 deletions __tests__/dataframe.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,20 @@ describe("dataframe", () => {
expect(actual).toFrameEqual(expected);
});
});
test("DF with nulls", () => {
const actual = pl
.DataFrame([
{ foo: 1, bar: 6.0, ham: "a" },
{ foo: null,bar: 0.5, ham: "b" },
{ foo: 3, bar: 7.0, ham: "c" },
]);
const expected = pl.DataFrame({
foo: [1, null, 3],
bar: [6.0, 0.5, 7.0],
ham: ["a", "b", "c"],
});
expect(actual).toFrameEqual(expected);
});
test("dropNulls", () => {
const actual = pl
.DataFrame({
Expand Down
63 changes: 35 additions & 28 deletions src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1524,37 +1524,44 @@ fn obj_to_pairs(rows: &Array, len: usize) -> impl '_ + Iterator<Item = Vec<(Stri
let keys = Object::keys(&obj).unwrap();
keys.iter()
.map(|key| {
let value = obj.get::<_, napi::JsUnknown>(&key).unwrap().unwrap();
let ty = value.get_type().unwrap();
let dtype = match ty {
ValueType::Boolean => DataType::Boolean,
ValueType::Number => DataType::Float64,
ValueType::String => DataType::Utf8,
ValueType::Object => {
if value.is_array().unwrap() {
let arr: napi::JsObject = unsafe { value.cast() };
let len = arr.get_array_length().unwrap();
// dont compare too many items, as it could be expensive
let max_take = std::cmp::min(len as usize, 10);
let mut dtypes: Vec<DataType> = Vec::with_capacity(len as usize);

for idx in 0..max_take {
let item: napi::JsUnknown = arr.get_element(idx as u32).unwrap();
let ty = item.get_type().unwrap();
let dt: Wrap<DataType> = ty.into();
dtypes.push(dt.0)
let value = obj.get::<_, napi::JsUnknown>(&key).unwrap_or(None);
let dtype = match value {
Some(val) => {
let ty = val.get_type().unwrap();
match ty {
ValueType::Boolean => DataType::Boolean,
ValueType::Number => DataType::Float64,
ValueType::String => DataType::Utf8,
ValueType::Object => {
if val.is_array().unwrap() {
let arr: napi::JsObject = unsafe { val.cast() };
let len = arr.get_array_length().unwrap();
// dont compare too many items, as it could be expensive
let max_take = std::cmp::min(len as usize, 10);
let mut dtypes: Vec<DataType> = Vec::with_capacity(len as usize);

for idx in 0..max_take {
let item: napi::JsUnknown = arr.get_element(idx as u32).unwrap();
let ty = item.get_type().unwrap();
let dt: Wrap<DataType> = ty.into();
dtypes.push(dt.0)
}
let dtype = coerce_data_type(&dtypes);

DataType::List(dtype.into())
} else if val.is_date().unwrap() {
DataType::Datetime(TimeUnit::Milliseconds, None)
} else {
DataType::Struct(vec![])
}
}
let dtype = coerce_data_type(&dtypes);

DataType::List(dtype.into())
} else if value.is_date().unwrap() {
DataType::Datetime(TimeUnit::Milliseconds, None)
} else {
DataType::Struct(vec![])
ValueType::BigInt => DataType::UInt64,
_ => DataType::Null,
}
}
ValueType::BigInt => DataType::UInt64,
_ => DataType::Null,
None => {
DataType::Null
}
};
(key.to_owned(), dtype)
})
Expand Down

0 comments on commit c3df322

Please sign in to comment.