Skip to content

Commit

Permalink
Add support for void columns for all the row-functions (#3290)
Browse files Browse the repository at this point in the history
WIP for #3284
  • Loading branch information
oleksiyskononenko authored and sammychoco committed Jun 3, 2022
1 parent 444630a commit 1bf5832
Show file tree
Hide file tree
Showing 13 changed files with 192 additions and 84 deletions.
5 changes: 4 additions & 1 deletion docs/_ext/xfunction.py
Original file line number Diff line number Diff line change
Expand Up @@ -1173,6 +1173,7 @@ def locate_cxx_function(name, kind, lines):
r"\s*\(.*\)\s*" +
r"(?:const\s*|noexcept\s*|override\s*)*" +
r"\{\s*")
n_signature_lines = 5 # number of lines allowed for the function signature
expect_closing = None
istart = None
ifinish = None
Expand All @@ -1187,9 +1188,11 @@ def locate_cxx_function(name, kind, lines):
if mm:
expect_closing = mm.group(1) + "}"
else:
mm = re.match(rx_start, line + lines[i+1])
src = "".join(lines[i:i+n_signature_lines])
mm = re.match(rx_start, src)
if mm:
expect_closing = mm.group(1) + "}"

if not istart:
raise ValueError("Could not find %s `%s` in <FILE>" % (kind, name))
if not expect_closing:
Expand Down
3 changes: 3 additions & 0 deletions docs/releases/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@
-[fix] Reducer functions :func:`dt.prod()` and :func:`dt.sum()` can now be
applied to :attr:`void <dt.Type.void>` columns. [#3281] [#3282]

-[fix] All the row-wise functions now support :attr:`void <dt.Type.void>`
columns. [#3284]


fread
-----
Expand Down
22 changes: 15 additions & 7 deletions src/core/expr/fnary/fnary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ namespace dt {
namespace expr {


FExpr_RowFn::FExpr_RowFn(ptrExpr&& args)
: args_(std::move(args))
FExpr_RowFn::FExpr_RowFn(ptrExpr&& args, bool process_void_cols /* =false */)
: args_(std::move(args)), process_void_cols_(process_void_cols)
{}


Expand All @@ -47,15 +47,22 @@ std::string FExpr_RowFn::repr() const {
Workframe FExpr_RowFn::evaluate_n(EvalContext& ctx) const {
Workframe inputs = args_->evaluate_n(ctx);
Grouping gmode = inputs.get_grouping_mode();
std::vector<Column> columns;
columns.reserve(inputs.ncols());
for (size_t i = 0; i < inputs.ncols(); ++i) {
columns.emplace_back(inputs.retrieve_column(i));
colvec columns;
size_t ncols = inputs.ncols();
size_t nrows = 1;
columns.reserve(ncols);
for (size_t i = 0; i < ncols; ++i) {
Column col = inputs.retrieve_column(i);
xassert(i == 0 || nrows == col.nrows());
nrows = col.nrows();
if (process_void_cols_ || !col.type().is_void()) {
columns.emplace_back(col);
}
}

Workframe out(ctx);
out.add_column(
apply_function(std::move(columns)),
apply_function(std::move(columns), nrows, ncols),
"", gmode
);
return out;
Expand All @@ -66,6 +73,7 @@ SType FExpr_RowFn::common_numeric_stype(const colvec& columns) const {
SType common_stype = SType::INT32;
for (size_t i = 0; i < columns.size(); ++i) {
switch (columns[i].stype()) {
case SType::VOID:
case SType::BOOL:
case SType::INT8:
case SType::INT16:
Expand Down
46 changes: 36 additions & 10 deletions src/core/expr/fnary/fnary.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,23 @@ py::oobj py_rowfn(const py::XArgs& args);
class FExpr_RowFn : public FExpr_Func {
private:
ptrExpr args_;
bool process_void_cols_;
size_t : 56;

public:
FExpr_RowFn(ptrExpr&& args);
FExpr_RowFn(ptrExpr&& args, bool process_void_cols = false);
std::string repr() const override;
Workframe evaluate_n(EvalContext& ctx) const override;

virtual std::string name() const = 0;
virtual Column apply_function(std::vector<Column>&& columns) const = 0;
virtual Column apply_function(
colvec&& columns, // columns to process; if `process_void_cols_` is `False`
// void columns are filtered out
const size_t nrows, // number of rows in the original input frame; needed in the case
// when all the columns are void and filtered out
const size_t ncols // number of columns in the original input frame,
// including the void columns
) const = 0;

SType common_numeric_stype(const colvec&) const;
void promote_columns(colvec& columns, SType target_stype) const;
Expand All @@ -71,7 +80,9 @@ class FExpr_RowAll : public FExpr_RowFn {
using FExpr_RowFn::FExpr_RowFn;

std::string name() const override;
Column apply_function(std::vector<Column>&& columns) const override;
Column apply_function(colvec&& columns,
const size_t nrows,
const size_t ncols) const override;
};


Expand All @@ -81,7 +92,9 @@ class FExpr_RowAny : public FExpr_RowFn {
using FExpr_RowFn::FExpr_RowFn;

std::string name() const override;
Column apply_function(std::vector<Column>&& columns) const override;
Column apply_function(colvec&& columns,
const size_t nrows,
const size_t ncols) const override;
};


Expand All @@ -91,7 +104,9 @@ class FExpr_RowCount : public FExpr_RowFn {
using FExpr_RowFn::FExpr_RowFn;

std::string name() const override;
Column apply_function(std::vector<Column>&& columns) const override;
Column apply_function(colvec&& columns,
const size_t nrows,
const size_t ncols) const override;
};


Expand All @@ -102,7 +117,9 @@ class FExpr_RowFirstLast : public FExpr_RowFn {
using FExpr_RowFn::FExpr_RowFn;

std::string name() const override;
Column apply_function(std::vector<Column>&& columns) const override;
Column apply_function(colvec&& columns,
const size_t nrows,
const size_t ncols) const override;
};

extern template class FExpr_RowFirstLast<true>;
Expand All @@ -115,8 +132,11 @@ class FExpr_RowMinMax : public FExpr_RowFn {
public:
using FExpr_RowFn::FExpr_RowFn;

FExpr_RowMinMax(ptrExpr&& args);
std::string name() const override;
Column apply_function(std::vector<Column>&& columns) const override;
Column apply_function(colvec&& columns,
const size_t nrows,
const size_t ncols) const override;
};

extern template class FExpr_RowMinMax<true,true>;
Expand All @@ -131,7 +151,9 @@ class FExpr_RowMean : public FExpr_RowFn {
using FExpr_RowFn::FExpr_RowFn;

std::string name() const override;
Column apply_function(std::vector<Column>&& columns) const override;
Column apply_function(colvec&& columns,
const size_t nrows,
const size_t ncols) const override;
};


Expand All @@ -141,7 +163,9 @@ class FExpr_RowSd : public FExpr_RowFn {
using FExpr_RowFn::FExpr_RowFn;

std::string name() const override;
Column apply_function(std::vector<Column>&& columns) const override;
Column apply_function(colvec&& columns,
const size_t nrows,
const size_t ncols) const override;
};


Expand All @@ -151,7 +175,9 @@ class FExpr_RowSum : public FExpr_RowFn {
using FExpr_RowFn::FExpr_RowFn;

std::string name() const override;
Column apply_function(std::vector<Column>&& columns) const override;
Column apply_function(colvec&& columns,
const size_t nrows,
const size_t ncols) const override;
};


Expand Down
19 changes: 12 additions & 7 deletions src/core/expr/fnary/rowall.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,21 @@ static bool op_rowall(size_t i, int8_t* out, const colvec& columns) {
}


Column FExpr_RowAll::apply_function(colvec&& columns) const {
if (columns.empty()) {
Column FExpr_RowAll::apply_function(colvec&& columns,
const size_t nrows,
const size_t ncols) const
{
// No columns
if (ncols == 0) {
return Const_ColumnImpl::make_bool_column(1, true);
}
size_t nrows = columns[0].nrows();
// Some void columns
if (columns.size() != ncols) {
return Const_ColumnImpl::make_bool_column(nrows, false);
}
// No void columns
for (size_t i = 0; i < columns.size(); ++i) {
xassert(columns[i].nrows() == nrows);
// If there is even one void column, the result of `rowall()` is `false`
if (columns[i].type().is_void()) {
return Const_ColumnImpl::make_bool_column(nrows, false);
}
if (!columns[i].type().is_boolean()) {
throw TypeError() << "Function `rowall` requires a sequence of boolean "
"columns, however column " << i << " has type `"
Expand All @@ -70,6 +74,7 @@ Column FExpr_RowAll::apply_function(colvec&& columns) const {
std::move(columns), op_rowall, nrows, SType::BOOL));
}


DECLARE_PYFN(&py_rowfn)
->name("rowall")
->docs(dt::doc_dt_rowall)
Expand Down
28 changes: 12 additions & 16 deletions src/core/expr/fnary/rowany.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,29 +50,25 @@ static bool op_rowany(size_t i, int8_t* out, const colvec& columns) {



Column FExpr_RowAny::apply_function(colvec&& columns) const {
size_t ncols = columns.size();
size_t nrows = ncols? columns[0].nrows() : 1;
colvec columns_;
columns_.reserve(ncols);

for (size_t i = 0; i < ncols; ++i) {
Column FExpr_RowAny::apply_function(colvec&& columns,
const size_t nrows,
const size_t ncols) const
{
// No columns or all the columns are void
if (columns.empty()) {
// `ncols == 0` tests that the original input frame had no columns
return Const_ColumnImpl::make_bool_column(nrows, ncols == 0);
}
for (size_t i = 0; i < columns.size(); ++i) {
xassert(columns[i].nrows() == nrows);
if (!columns[i].type().is_boolean_or_void()) {
if (!columns[i].type().is_boolean()) {
throw TypeError() << "Function `rowany` requires a sequence of boolean "
"columns, however column " << i << " has type `"
<< columns[i].stype() << "`";
}
// Filter out void columns, since they don't affect result of `rowany()`
if (columns[i].type().is_boolean()) {
columns_.push_back(std::move(columns[i]));
}
}
if (columns_.empty()) {
return Const_ColumnImpl::make_bool_column(nrows, columns.empty());
}
return Column(new FuncNary_ColumnImpl<int8_t>(
std::move(columns_), op_rowany, nrows, SType::BOOL));
std::move(columns), op_rowany, nrows, SType::BOOL));
}


Expand Down
8 changes: 5 additions & 3 deletions src/core/expr/fnary/rowcount.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,13 @@ static bool op_rowcount(size_t i, int32_t* out, const colvec& columns) {
}


Column FExpr_RowCount::apply_function(colvec&& columns) const {
Column FExpr_RowCount::apply_function(colvec&& columns,
const size_t nrows,
const size_t) const
{
if (columns.empty()) {
return Const_ColumnImpl::make_int_column(1, 0, SType::INT32);
return Const_ColumnImpl::make_int_column(nrows, 0, SType::INT32);
}
size_t nrows = columns[0].nrows();
for (size_t i = 0; i < columns.size(); ++i) {
xassert(columns[i].nrows() == nrows);
columns[i] = unaryop(Op::ISNA, std::move(columns[i]));
Expand Down
39 changes: 15 additions & 24 deletions src/core/expr/fnary/rowfirstlast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,43 +57,34 @@ static inline Column _rowfirstlast(colvec&& columns, SType outtype) {


template <bool FIRST>
Column FExpr_RowFirstLast<FIRST>::apply_function(colvec&& columns) const {
size_t ncols = columns.size();
size_t nrows = ncols? columns[0].nrows() : 1;
colvec columns_;
columns_.reserve(ncols);

for (size_t i = 0; i < ncols; ++i) {
// Filter out void columns, since they don't affect the result
if (!columns[i].type().is_void()) {
columns_.push_back(std::move(columns[i]));
}
}

if (columns_.empty()) {
Column FExpr_RowFirstLast<FIRST>::apply_function(colvec&& columns,
const size_t nrows,
const size_t) const
{
if (columns.empty()) {
return Const_ColumnImpl::make_na_column(nrows);
}

// Detect common stype
SType stype0 = SType::VOID;
for (const auto& col : columns_) {
for (const auto& col : columns) {
stype0 = common_stype(stype0, col.stype());
}
if (stype0 == SType::INVALID) {
throw TypeError() << "Incompatible column types in function `" << name() << "`";
}
promote_columns(columns_, stype0);
promote_columns(columns, stype0);

switch (stype0) {
case SType::BOOL: return _rowfirstlast<int8_t, FIRST>(std::move(columns_), stype0);
case SType::INT8: return _rowfirstlast<int8_t, FIRST>(std::move(columns_), stype0);
case SType::INT16: return _rowfirstlast<int16_t, FIRST>(std::move(columns_), stype0);
case SType::INT32: return _rowfirstlast<int32_t, FIRST>(std::move(columns_), stype0);
case SType::INT64: return _rowfirstlast<int64_t, FIRST>(std::move(columns_), stype0);
case SType::FLOAT32: return _rowfirstlast<float, FIRST>(std::move(columns_), stype0);
case SType::FLOAT64: return _rowfirstlast<double, FIRST>(std::move(columns_), stype0);
case SType::BOOL: return _rowfirstlast<int8_t, FIRST>(std::move(columns), stype0);
case SType::INT8: return _rowfirstlast<int8_t, FIRST>(std::move(columns), stype0);
case SType::INT16: return _rowfirstlast<int16_t, FIRST>(std::move(columns), stype0);
case SType::INT32: return _rowfirstlast<int32_t, FIRST>(std::move(columns), stype0);
case SType::INT64: return _rowfirstlast<int64_t, FIRST>(std::move(columns), stype0);
case SType::FLOAT32: return _rowfirstlast<float, FIRST>(std::move(columns), stype0);
case SType::FLOAT64: return _rowfirstlast<double, FIRST>(std::move(columns), stype0);
case SType::STR32:
case SType::STR64: return _rowfirstlast<CString, FIRST>(std::move(columns_), stype0);
case SType::STR64: return _rowfirstlast<CString, FIRST>(std::move(columns), stype0);
default: {
throw TypeError() << "Function `" << name() << "` doesn't support type `" << stype0 << "`";
}
Expand Down
9 changes: 6 additions & 3 deletions src/core/expr/fnary/rowmean.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,12 @@ static inline Column _rowmean(colvec&& columns) {
}


Column FExpr_RowMean::apply_function(colvec&& columns) const {
Column FExpr_RowMean::apply_function(colvec&& columns,
const size_t nrows,
const size_t) const
{
if (columns.empty()) {
return Const_ColumnImpl::make_na_column(1);
return Column(new ConstNa_ColumnImpl(nrows, SType::FLOAT64));
}
SType res_stype = common_numeric_stype(columns);
if (res_stype == SType::INT32 || res_stype == SType::INT64) {
Expand All @@ -81,6 +84,7 @@ Column FExpr_RowMean::apply_function(colvec&& columns) const {
}
}


DECLARE_PYFN(&py_rowfn)
->name("rowmean")
->docs(doc_dt_rowmean)
Expand All @@ -89,5 +93,4 @@ DECLARE_PYFN(&py_rowfn)




}} // namespace dt::expr
Loading

0 comments on commit 1bf5832

Please sign in to comment.