-
Notifications
You must be signed in to change notification settings - Fork 180
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: implement bool_and and bool_or (#3754)
# Description This PR adds two sets of boolean functions: 1. List Boolean Functions: - `list_bool_and`: Returns true if all non-null elements in a list are true, false if any non-null element is false, and null if all elements are null or the list is empty - `list_bool_or`: Returns true if any non-null element in a list is true, false if all non-null elements are false, and null if all elements are null or the list is empty 2. Boolean Aggregation Functions: - `bool_and()`: Returns true if all non-null values in a column are true, false if any non-null value is false, and null if all values are null - `bool_or()`: Returns true if any non-null value in a column is true, false if all non-null values are false, and null if all values are null Both sets of functions handle nulls consistently - a single non-null value determines the result unless all values are null.
- Loading branch information
Showing
22 changed files
with
909 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
use arrow2::array::{Array, BooleanArray}; | ||
use common_error::DaftResult; | ||
|
||
use crate::{ | ||
array::{ | ||
ops::{DaftBoolAggable, GroupIndices}, | ||
DataArray, | ||
}, | ||
datatypes::BooleanType, | ||
}; | ||
|
||
impl DaftBoolAggable for DataArray<BooleanType> { | ||
type Output = DaftResult<Self>; | ||
|
||
fn bool_and(&self) -> Self::Output { | ||
let array = self.data(); | ||
let array = array.as_any().downcast_ref::<BooleanArray>().unwrap(); | ||
|
||
// If array is empty or all null, return null | ||
if array.null_count() == array.len() { | ||
return Ok(Self::from(( | ||
self.field.name.as_ref(), | ||
Box::new(BooleanArray::from_iter(std::iter::once(None))), | ||
))); | ||
} | ||
|
||
// Look for first non-null false value | ||
let mut result = true; | ||
for i in 0..array.len() { | ||
if !array.is_null(i) && !array.value(i) { | ||
result = false; | ||
break; | ||
} | ||
} | ||
|
||
Ok(Self::from(( | ||
self.field.name.as_ref(), | ||
Box::new(BooleanArray::from_iter(std::iter::once(Some(result)))), | ||
))) | ||
} | ||
|
||
fn bool_or(&self) -> Self::Output { | ||
let array = self.data(); | ||
let array = array.as_any().downcast_ref::<BooleanArray>().unwrap(); | ||
|
||
// If array is empty or all null, return null | ||
if array.null_count() == array.len() { | ||
return Ok(Self::from(( | ||
self.field.name.as_ref(), | ||
Box::new(BooleanArray::from_iter(std::iter::once(None))), | ||
))); | ||
} | ||
|
||
// Look for first non-null true value | ||
let mut result = false; | ||
for i in 0..array.len() { | ||
if !array.is_null(i) && array.value(i) { | ||
result = true; | ||
break; | ||
} | ||
} | ||
|
||
Ok(Self::from(( | ||
self.field.name.as_ref(), | ||
Box::new(BooleanArray::from_iter(std::iter::once(Some(result)))), | ||
))) | ||
} | ||
|
||
fn grouped_bool_and(&self, groups: &GroupIndices) -> Self::Output { | ||
let array = self.data(); | ||
let array = array.as_any().downcast_ref::<BooleanArray>().unwrap(); | ||
let mut results = Vec::with_capacity(groups.len()); | ||
|
||
for group in groups { | ||
if group.is_empty() { | ||
results.push(None); | ||
continue; | ||
} | ||
|
||
let mut all_null = true; | ||
let mut result = true; | ||
|
||
for &idx in group { | ||
if !array.is_null(idx as usize) { | ||
all_null = false; | ||
if !array.value(idx as usize) { | ||
result = false; | ||
break; | ||
} | ||
} | ||
} | ||
|
||
results.push(if all_null { None } else { Some(result) }); | ||
} | ||
|
||
Ok(Self::from(( | ||
self.field.name.as_ref(), | ||
Box::new(BooleanArray::from_iter(results)), | ||
))) | ||
} | ||
|
||
fn grouped_bool_or(&self, groups: &GroupIndices) -> Self::Output { | ||
let array = self.data(); | ||
let array = array.as_any().downcast_ref::<BooleanArray>().unwrap(); | ||
let mut results = Vec::with_capacity(groups.len()); | ||
|
||
for group in groups { | ||
if group.is_empty() { | ||
results.push(None); | ||
continue; | ||
} | ||
|
||
let mut all_null = true; | ||
let mut result = false; | ||
|
||
for &idx in group { | ||
if !array.is_null(idx as usize) { | ||
all_null = false; | ||
if array.value(idx as usize) { | ||
result = true; | ||
break; | ||
} | ||
} | ||
} | ||
|
||
results.push(if all_null { None } else { Some(result) }); | ||
} | ||
|
||
Ok(Self::from(( | ||
self.field.name.as_ref(), | ||
Box::new(BooleanArray::from_iter(results)), | ||
))) | ||
} | ||
} |
Oops, something went wrong.